opc-agent 4.1.0 → 4.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
  3. package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
  4. package/CHANGELOG.md +48 -48
  5. package/CONTRIBUTING.md +36 -36
  6. package/README.zh-CN.md +497 -497
  7. package/USABILITY-ISSUES.md +73 -0
  8. package/dist/channels/web.js +8 -2
  9. package/dist/channels/wechat.js +6 -6
  10. package/dist/cli.js +200 -85
  11. package/dist/core/runtime.js +37 -15
  12. package/dist/deploy/index.js +56 -56
  13. package/dist/doctor.d.ts +1 -0
  14. package/dist/doctor.js +105 -10
  15. package/dist/memory/deepbrain.d.ts +1 -1
  16. package/dist/memory/deepbrain.js +95 -4
  17. package/dist/scheduler/cron-engine.js +3 -36
  18. package/dist/studio/server.js +30 -1
  19. package/dist/studio-ui/index.html +230 -10
  20. package/dist/ui/components.js +105 -105
  21. package/examples/README.md +22 -22
  22. package/examples/basic-agent.ts +90 -90
  23. package/examples/brain-integration.ts +71 -71
  24. package/examples/multi-channel.ts +74 -74
  25. package/fix-sidebar.mjs +188 -188
  26. package/install.ps1 +154 -154
  27. package/install.sh +164 -164
  28. package/package.json +1 -1
  29. package/scripts/install.ps1 +31 -31
  30. package/scripts/install.sh +40 -40
  31. package/serve-studio.js +13 -13
  32. package/serve-test.js +25 -25
  33. package/src/channels/dingtalk.ts +46 -46
  34. package/src/channels/email.ts +351 -351
  35. package/src/channels/feishu.ts +349 -349
  36. package/src/channels/googlechat.ts +42 -42
  37. package/src/channels/imessage.ts +31 -31
  38. package/src/channels/irc.ts +82 -82
  39. package/src/channels/line.ts +32 -32
  40. package/src/channels/matrix.ts +33 -33
  41. package/src/channels/mattermost.ts +57 -57
  42. package/src/channels/msteams.ts +32 -32
  43. package/src/channels/nostr.ts +32 -32
  44. package/src/channels/qq.ts +33 -33
  45. package/src/channels/signal.ts +32 -32
  46. package/src/channels/sms.ts +33 -33
  47. package/src/channels/telegram.ts +616 -616
  48. package/src/channels/twitch.ts +65 -65
  49. package/src/channels/voice-call.ts +100 -100
  50. package/src/channels/web.ts +8 -2
  51. package/src/channels/websocket.ts +399 -399
  52. package/src/channels/wechat.ts +329 -329
  53. package/src/channels/whatsapp.ts +32 -32
  54. package/src/cli/chat.ts +99 -99
  55. package/src/cli/setup.ts +314 -314
  56. package/src/cli.ts +195 -92
  57. package/src/core/agent.ts +476 -476
  58. package/src/core/api-server.ts +277 -277
  59. package/src/core/audio.ts +98 -98
  60. package/src/core/collaboration.ts +275 -275
  61. package/src/core/context-discovery.ts +85 -85
  62. package/src/core/context-refs.ts +140 -140
  63. package/src/core/gateway.ts +106 -106
  64. package/src/core/heartbeat.ts +51 -51
  65. package/src/core/hooks.ts +105 -105
  66. package/src/core/ide-bridge.ts +133 -133
  67. package/src/core/node-network.ts +86 -86
  68. package/src/core/profiles.ts +122 -122
  69. package/src/core/runtime.ts +25 -0
  70. package/src/core/scheduler.ts +187 -187
  71. package/src/core/session-manager.ts +137 -137
  72. package/src/core/subagent.ts +98 -98
  73. package/src/core/vision.ts +180 -180
  74. package/src/core/workflow-graph.ts +365 -365
  75. package/src/daemon.ts +96 -96
  76. package/src/deploy/index.ts +255 -255
  77. package/src/doctor.ts +98 -11
  78. package/src/eval/index.ts +211 -211
  79. package/src/eval/suites/basic.json +16 -16
  80. package/src/eval/suites/memory.json +12 -12
  81. package/src/eval/suites/safety.json +14 -14
  82. package/src/hub/brain-seed.ts +54 -54
  83. package/src/hub/client.ts +60 -60
  84. package/src/mcp/servers/calculator-mcp.ts +65 -65
  85. package/src/mcp/servers/crypto-mcp.ts +73 -73
  86. package/src/mcp/servers/database-mcp.ts +72 -72
  87. package/src/mcp/servers/datetime-mcp.ts +69 -69
  88. package/src/mcp/servers/filesystem.ts +66 -66
  89. package/src/mcp/servers/github-mcp.ts +58 -58
  90. package/src/mcp/servers/index.ts +63 -63
  91. package/src/mcp/servers/json-mcp.ts +102 -102
  92. package/src/mcp/servers/memory-mcp.ts +56 -56
  93. package/src/mcp/servers/regex-mcp.ts +53 -53
  94. package/src/mcp/servers/web-mcp.ts +49 -49
  95. package/src/memory/context-compressor.ts +189 -189
  96. package/src/memory/deepbrain.ts +99 -5
  97. package/src/memory/seed-loader.ts +212 -212
  98. package/src/memory/user-profiler.ts +215 -215
  99. package/src/plugins/content-filter.ts +23 -23
  100. package/src/plugins/logger.ts +18 -18
  101. package/src/plugins/rate-limiter.ts +38 -38
  102. package/src/protocols/a2a/client.ts +132 -132
  103. package/src/protocols/a2a/index.ts +8 -8
  104. package/src/protocols/a2a/server.ts +333 -333
  105. package/src/protocols/a2a/types.ts +88 -88
  106. package/src/protocols/a2a/utils.ts +50 -50
  107. package/src/protocols/agui/client.ts +83 -83
  108. package/src/protocols/agui/index.ts +4 -4
  109. package/src/protocols/agui/server.ts +218 -218
  110. package/src/protocols/agui/types.ts +153 -153
  111. package/src/protocols/index.ts +2 -2
  112. package/src/protocols/mcp/agent-tools.ts +134 -134
  113. package/src/protocols/mcp/index.ts +8 -8
  114. package/src/protocols/mcp/server.ts +262 -262
  115. package/src/protocols/mcp/types.ts +69 -69
  116. package/src/providers/index.ts +632 -632
  117. package/src/publish/index.ts +376 -376
  118. package/src/scheduler/cron-engine.ts +191 -191
  119. package/src/scheduler/index.ts +2 -2
  120. package/src/schema/oad.ts +217 -217
  121. package/src/security/approval.ts +131 -131
  122. package/src/security/approvals.ts +143 -143
  123. package/src/security/elevated.ts +105 -105
  124. package/src/security/guardrails.ts +248 -248
  125. package/src/security/index.ts +9 -9
  126. package/src/security/keys.ts +87 -87
  127. package/src/security/secrets.ts +129 -129
  128. package/src/skills/builtin/index.ts +408 -408
  129. package/src/skills/marketplace.ts +113 -113
  130. package/src/skills/types.ts +42 -42
  131. package/src/studio/server.ts +31 -1
  132. package/src/studio/templates-data.ts +178 -178
  133. package/src/studio-ui/index.html +230 -10
  134. package/src/telemetry/index.ts +324 -324
  135. package/src/tools/builtin/browser.ts +299 -299
  136. package/src/tools/builtin/datetime.ts +41 -41
  137. package/src/tools/builtin/file.ts +107 -107
  138. package/src/tools/builtin/home-assistant.ts +116 -116
  139. package/src/tools/builtin/rl-tools.ts +243 -243
  140. package/src/tools/builtin/shell.ts +43 -43
  141. package/src/tools/builtin/vision.ts +64 -64
  142. package/src/tools/builtin/web-search.ts +126 -126
  143. package/src/tools/builtin/web.ts +35 -35
  144. package/src/tools/document-processor.ts +213 -213
  145. package/src/tools/image-generator.ts +150 -150
  146. package/src/tools/integrations/calendar.ts +73 -73
  147. package/src/tools/integrations/code-exec.ts +39 -39
  148. package/src/tools/integrations/csv-analyzer.ts +92 -92
  149. package/src/tools/integrations/database.ts +44 -44
  150. package/src/tools/integrations/email-send.ts +76 -76
  151. package/src/tools/integrations/git-tool.ts +42 -42
  152. package/src/tools/integrations/github-tool.ts +76 -76
  153. package/src/tools/integrations/image-gen.ts +56 -56
  154. package/src/tools/integrations/index.ts +92 -92
  155. package/src/tools/integrations/jira.ts +83 -83
  156. package/src/tools/integrations/notion.ts +71 -71
  157. package/src/tools/integrations/npm-tool.ts +48 -48
  158. package/src/tools/integrations/pdf-reader.ts +58 -58
  159. package/src/tools/integrations/slack.ts +65 -65
  160. package/src/tools/integrations/summarizer.ts +49 -49
  161. package/src/tools/integrations/translator.ts +48 -48
  162. package/src/tools/integrations/trello.ts +60 -60
  163. package/src/tools/integrations/vector-search.ts +42 -42
  164. package/src/tools/integrations/web-scraper.ts +47 -47
  165. package/src/tools/integrations/web-search.ts +58 -58
  166. package/src/tools/integrations/webhook.ts +38 -38
  167. package/src/tools/mcp-client.ts +131 -131
  168. package/src/tools/web-scraper.ts +179 -179
  169. package/src/tools/web-search.ts +180 -180
  170. package/src/ui/components.ts +127 -127
  171. package/srv-out.txt +1 -1
  172. package/templates/ecommerce-assistant/README.md +45 -45
  173. package/templates/ecommerce-assistant/oad.yaml +47 -47
  174. package/templates/tech-support/README.md +43 -43
  175. package/templates/tech-support/oad.yaml +45 -45
  176. package/test-agent/Dockerfile +9 -9
  177. package/test-agent/README.md +50 -50
  178. package/test-agent/agent.yaml +23 -23
  179. package/test-agent/docker-compose.yml +11 -11
  180. package/test-agent/oad.yaml +31 -31
  181. package/test-agent/package-lock.json +1492 -1492
  182. package/test-agent/package.json +17 -17
  183. package/test-agent/src/index.ts +24 -24
  184. package/test-agent/src/skills/echo.ts +15 -15
  185. package/test-agent/tsconfig.json +24 -24
  186. package/test-full.js +43 -43
  187. package/test-sidebar.js +22 -22
  188. package/test-studio3.js +75 -75
  189. package/test-studio4.js +41 -41
  190. package/tests/a2a-protocol.test.ts +285 -285
  191. package/tests/agui-protocol.test.ts +246 -246
  192. package/tests/api-server.test.ts +148 -148
  193. package/tests/approvals.test.ts +89 -89
  194. package/tests/audio.test.ts +40 -40
  195. package/tests/brain-seed-extended.test.ts +490 -490
  196. package/tests/brain-seed.test.ts +239 -239
  197. package/tests/browser.test.ts +179 -179
  198. package/tests/channels/discord.test.ts +79 -79
  199. package/tests/channels/email.test.ts +148 -148
  200. package/tests/channels/feishu.test.ts +123 -123
  201. package/tests/channels/telegram.test.ts +129 -129
  202. package/tests/channels/websocket.test.ts +53 -53
  203. package/tests/channels/wechat.test.ts +170 -170
  204. package/tests/channels-extra.test.ts +45 -45
  205. package/tests/chat-cli.test.ts +160 -160
  206. package/tests/cli.test.ts +46 -46
  207. package/tests/context-compressor.test.ts +172 -172
  208. package/tests/context-refs.test.ts +121 -121
  209. package/tests/cron-engine.test.ts +101 -101
  210. package/tests/daemon.test.ts +135 -135
  211. package/tests/deepbrain-wire.test.ts +234 -234
  212. package/tests/deploy-and-dag.test.ts +196 -196
  213. package/tests/doctor.test.ts +38 -38
  214. package/tests/document-processor.test.ts +69 -69
  215. package/tests/e2e-nocode.test.ts +442 -442
  216. package/tests/elevated.test.ts +69 -69
  217. package/tests/eval.test.ts +173 -173
  218. package/tests/gateway.test.ts +63 -63
  219. package/tests/guardrails.test.ts +177 -177
  220. package/tests/home-assistant.test.ts +40 -40
  221. package/tests/hooks.test.ts +79 -79
  222. package/tests/ide-bridge.test.ts +38 -38
  223. package/tests/image-generator.test.ts +84 -84
  224. package/tests/init-role.test.ts +124 -124
  225. package/tests/integrations.test.ts +249 -249
  226. package/tests/mcp-client.test.ts +92 -92
  227. package/tests/mcp-server.test.ts +178 -178
  228. package/tests/mcp-servers.test.ts +260 -260
  229. package/tests/node-network.test.ts +74 -74
  230. package/tests/plugin-a2a-enhanced.test.ts +230 -230
  231. package/tests/profiles.test.ts +61 -61
  232. package/tests/publish.test.ts +231 -231
  233. package/tests/rl-tools.test.ts +93 -93
  234. package/tests/sandbox-manager.test.ts +46 -46
  235. package/tests/scheduler.test.ts +200 -200
  236. package/tests/secrets.test.ts +107 -107
  237. package/tests/security-enhanced.test.ts +233 -233
  238. package/tests/settings-api.test.ts +148 -148
  239. package/tests/setup.test.ts +73 -73
  240. package/tests/subagent.test.ts +193 -193
  241. package/tests/telegram-discord.test.ts +60 -60
  242. package/tests/telemetry.test.ts +186 -186
  243. package/tests/user-profiler.test.ts +169 -169
  244. package/tests/v090-features.test.ts +254 -254
  245. package/tests/vision.test.ts +61 -61
  246. package/tests/voice-call.test.ts +47 -47
  247. package/tests/voice-enhanced.test.ts +169 -169
  248. package/tests/voice-interaction.test.ts +38 -38
  249. package/tests/web-search.test.ts +155 -155
  250. package/tests/workflow-graph.test.ts +279 -279
  251. package/tutorial/customer-service-agent/README.md +612 -612
  252. package/tutorial/customer-service-agent/SOUL.md +26 -26
  253. package/tutorial/customer-service-agent/agent.yaml +63 -63
  254. package/tutorial/customer-service-agent/package.json +19 -19
  255. package/tutorial/customer-service-agent/src/index.ts +69 -69
  256. package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
  257. package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
  258. package/tutorial/customer-service-agent/tsconfig.json +14 -14
@@ -1,69 +1,69 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { ElevatedManager } from '../src/security/elevated';
3
-
4
- describe('ElevatedManager', () => {
5
- it('should default to ask mode', () => {
6
- const mgr = new ElevatedManager();
7
- expect(mgr.getMode()).toBe('ask');
8
- expect(mgr.isElevated()).toBe(false);
9
- mgr.destroy();
10
- });
11
-
12
- it('should elevate and revoke', () => {
13
- const mgr = new ElevatedManager();
14
- expect(mgr.elevate('test')).toBe(true);
15
- expect(mgr.isElevated()).toBe(true);
16
- mgr.revoke();
17
- expect(mgr.isElevated()).toBe(false);
18
- mgr.destroy();
19
- });
20
-
21
- it('should not elevate in off mode', () => {
22
- const mgr = new ElevatedManager({ mode: 'off' });
23
- expect(mgr.elevate()).toBe(false);
24
- expect(mgr.isElevated()).toBe(false);
25
- mgr.destroy();
26
- });
27
-
28
- it('should allow commands in allowedCommands list', () => {
29
- const mgr = new ElevatedManager({ allowedCommands: [/^git\s/] });
30
- expect(mgr.isCommandAllowed('git pull')).toBe(true);
31
- expect(mgr.isCommandAllowed('rm -rf /')).toBe(false);
32
- mgr.destroy();
33
- });
34
-
35
- it('should auto-execute in on mode', () => {
36
- const mgr = new ElevatedManager({ mode: 'on' });
37
- const result = mgr.canExecute('sudo reboot');
38
- expect(result.allowed).toBe(true);
39
- expect(mgr.isElevated()).toBe(true);
40
- mgr.destroy();
41
- });
42
-
43
- it('should require elevation in ask mode when not elevated', () => {
44
- const mgr = new ElevatedManager({ mode: 'ask' });
45
- const result = mgr.canExecute('sudo reboot');
46
- expect(result.allowed).toBe(false);
47
- expect(result.needsElevation).toBe(true);
48
- mgr.destroy();
49
- });
50
-
51
- it('should allow execution in ask mode when elevated', () => {
52
- const mgr = new ElevatedManager({ mode: 'ask' });
53
- mgr.elevate();
54
- const result = mgr.canExecute('sudo reboot');
55
- expect(result.allowed).toBe(true);
56
- mgr.destroy();
57
- });
58
-
59
- it('should keep audit log', () => {
60
- const mgr = new ElevatedManager();
61
- mgr.elevate('testing');
62
- mgr.revoke('done');
63
- const log = mgr.getAuditLog();
64
- expect(log).toHaveLength(2);
65
- expect(log[0].action).toBe('elevate');
66
- expect(log[1].action).toBe('revoke');
67
- mgr.destroy();
68
- });
69
- });
1
+ import { describe, it, expect } from 'vitest';
2
+ import { ElevatedManager } from '../src/security/elevated';
3
+
4
+ describe('ElevatedManager', () => {
5
+ it('should default to ask mode', () => {
6
+ const mgr = new ElevatedManager();
7
+ expect(mgr.getMode()).toBe('ask');
8
+ expect(mgr.isElevated()).toBe(false);
9
+ mgr.destroy();
10
+ });
11
+
12
+ it('should elevate and revoke', () => {
13
+ const mgr = new ElevatedManager();
14
+ expect(mgr.elevate('test')).toBe(true);
15
+ expect(mgr.isElevated()).toBe(true);
16
+ mgr.revoke();
17
+ expect(mgr.isElevated()).toBe(false);
18
+ mgr.destroy();
19
+ });
20
+
21
+ it('should not elevate in off mode', () => {
22
+ const mgr = new ElevatedManager({ mode: 'off' });
23
+ expect(mgr.elevate()).toBe(false);
24
+ expect(mgr.isElevated()).toBe(false);
25
+ mgr.destroy();
26
+ });
27
+
28
+ it('should allow commands in allowedCommands list', () => {
29
+ const mgr = new ElevatedManager({ allowedCommands: [/^git\s/] });
30
+ expect(mgr.isCommandAllowed('git pull')).toBe(true);
31
+ expect(mgr.isCommandAllowed('rm -rf /')).toBe(false);
32
+ mgr.destroy();
33
+ });
34
+
35
+ it('should auto-execute in on mode', () => {
36
+ const mgr = new ElevatedManager({ mode: 'on' });
37
+ const result = mgr.canExecute('sudo reboot');
38
+ expect(result.allowed).toBe(true);
39
+ expect(mgr.isElevated()).toBe(true);
40
+ mgr.destroy();
41
+ });
42
+
43
+ it('should require elevation in ask mode when not elevated', () => {
44
+ const mgr = new ElevatedManager({ mode: 'ask' });
45
+ const result = mgr.canExecute('sudo reboot');
46
+ expect(result.allowed).toBe(false);
47
+ expect(result.needsElevation).toBe(true);
48
+ mgr.destroy();
49
+ });
50
+
51
+ it('should allow execution in ask mode when elevated', () => {
52
+ const mgr = new ElevatedManager({ mode: 'ask' });
53
+ mgr.elevate();
54
+ const result = mgr.canExecute('sudo reboot');
55
+ expect(result.allowed).toBe(true);
56
+ mgr.destroy();
57
+ });
58
+
59
+ it('should keep audit log', () => {
60
+ const mgr = new ElevatedManager();
61
+ mgr.elevate('testing');
62
+ mgr.revoke('done');
63
+ const log = mgr.getAuditLog();
64
+ expect(log).toHaveLength(2);
65
+ expect(log[0].action).toBe('elevate');
66
+ expect(log[1].action).toBe('revoke');
67
+ mgr.destroy();
68
+ });
69
+ });
@@ -1,173 +1,173 @@
1
- import { describe, it, expect } from 'vitest';
2
- import { AgentEvaluator } from '../src/eval';
3
- import type { EvalCase, EvalSuite, EvalReport } from '../src/eval';
4
- import * as path from 'path';
5
- import * as fs from 'fs';
6
- import * as os from 'os';
7
-
8
- // Mock agent
9
- const mockAgent = {
10
- chat: async (input: string) => {
11
- if (!input) return 'Hello! How can I help?';
12
- if (input.includes('capital of France')) return 'The capital of France is Paris.';
13
- if (input.includes('Hello')) return 'Hello there! How can I help you?';
14
- return `Response to: ${input}`;
15
- },
16
- };
17
-
18
- describe('AgentEvaluator', () => {
19
- const evaluator = new AgentEvaluator(mockAgent);
20
-
21
- it('should score exact match correctly', async () => {
22
- const result = await evaluator.evalCase({
23
- id: 'test-1',
24
- input: 'What is the capital of France?',
25
- expectedOutput: 'The capital of France is Paris.',
26
- });
27
- expect(result.scores.exact_match).toBe(1);
28
- expect(result.passed).toBe(true);
29
- });
30
-
31
- it('should score exact match failure', async () => {
32
- const result = await evaluator.evalCase({
33
- id: 'test-2',
34
- input: 'Hello!',
35
- expectedOutput: 'Goodbye!',
36
- });
37
- expect(result.scores.exact_match).toBe(0);
38
- expect(result.passed).toBe(false);
39
- });
40
-
41
- it('should score contains correctly', async () => {
42
- const result = await evaluator.evalCase({
43
- id: 'test-3',
44
- input: 'What is the capital of France?',
45
- expectedContains: ['Paris', 'capital'],
46
- });
47
- expect(result.scores.contains).toBe(1);
48
- expect(result.passed).toBe(true);
49
- });
50
-
51
- it('should score partial contains', async () => {
52
- const result = await evaluator.evalCase({
53
- id: 'test-4',
54
- input: 'What is the capital of France?',
55
- expectedContains: ['Paris', 'London'],
56
- });
57
- expect(result.scores.contains).toBe(0.5);
58
- expect(result.passed).toBe(true); // 0.5 >= 0.5 threshold
59
- });
60
-
61
- it('should score not_contains correctly', async () => {
62
- const result = await evaluator.evalCase({
63
- id: 'test-5',
64
- input: 'Hello!',
65
- expectedNotContains: ['error', 'crash'],
66
- });
67
- expect(result.scores.not_contains).toBe(1);
68
- expect(result.passed).toBe(true);
69
- });
70
-
71
- it('should score not_contains failure', async () => {
72
- const result = await evaluator.evalCase({
73
- id: 'test-6',
74
- input: 'Hello!',
75
- expectedNotContains: ['Hello', 'crash'],
76
- });
77
- expect(result.scores.not_contains).toBe(0.5);
78
- });
79
-
80
- it('should load suite from JSON', () => {
81
- const suitePath = path.join(__dirname, '..', 'src', 'eval', 'suites', 'basic.json');
82
- const suite = AgentEvaluator.loadSuite(suitePath);
83
- expect(suite.name).toBe('basic');
84
- expect(suite.cases.length).toBe(10);
85
- });
86
-
87
- it('should load all built-in suites', () => {
88
- const suites = AgentEvaluator.builtinSuites();
89
- expect(suites.length).toBeGreaterThanOrEqual(3);
90
- const names = suites.map(s => s.name);
91
- expect(names).toContain('basic');
92
- expect(names).toContain('safety');
93
- expect(names).toContain('memory');
94
- });
95
-
96
- it('should have correct case counts for built-in suites', () => {
97
- const suites = AgentEvaluator.builtinSuites();
98
- const basic = suites.find(s => s.name === 'basic');
99
- const safety = suites.find(s => s.name === 'safety');
100
- const memory = suites.find(s => s.name === 'memory');
101
- expect(basic?.caseCount).toBe(10);
102
- expect(safety?.caseCount).toBe(8);
103
- expect(memory?.caseCount).toBe(6);
104
- });
105
-
106
- it('should compare reports and detect regression', () => {
107
- const baseline: EvalReport = {
108
- suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
109
- results: [
110
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
111
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
112
- ],
113
- };
114
- const current: EvalReport = {
115
- suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
116
- results: [
117
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
118
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
119
- ],
120
- };
121
- const cmp = AgentEvaluator.compare(baseline, current);
122
- expect(cmp.regressed).toContain('b');
123
- expect(cmp.delta).toBe(-0.5);
124
- });
125
-
126
- it('should compare reports and detect improvement', () => {
127
- const baseline: EvalReport = {
128
- suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
129
- results: [
130
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
131
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
132
- ],
133
- };
134
- const current: EvalReport = {
135
- suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
136
- results: [
137
- { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
138
- { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
139
- ],
140
- };
141
- const cmp = AgentEvaluator.compare(baseline, current);
142
- expect(cmp.improved).toContain('b');
143
- expect(cmp.delta).toBe(0.5);
144
- });
145
-
146
- it('should save and load report', () => {
147
- const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'eval-'));
148
- const reportPath = path.join(tmpDir, 'report.json');
149
- const report: EvalReport = {
150
- suite: 'test', timestamp: new Date().toISOString(), totalCases: 1, passed: 1, failed: 0, passRate: 1, avgLatency: 5, p95Latency: 5, summary: 'ok',
151
- results: [{ caseId: 'x', input: 'hi', output: 'hello', scores: { latency_ms: 5 }, passed: true }],
152
- };
153
- AgentEvaluator.saveReport(report, reportPath);
154
- const loaded = JSON.parse(fs.readFileSync(reportPath, 'utf-8'));
155
- expect(loaded.suite).toBe('test');
156
- expect(loaded.results.length).toBe(1);
157
- fs.rmSync(tmpDir, { recursive: true });
158
- });
159
-
160
- it('should run evalSuite and produce report', async () => {
161
- const suite: EvalSuite = {
162
- name: 'mini',
163
- cases: [
164
- { id: 't1', input: 'Hello!', expectedContains: ['hello', 'hi'] },
165
- { id: 't2', input: 'What is the capital of France?', expectedContains: ['Paris'] },
166
- ],
167
- };
168
- const report = await evaluator.evalSuite(suite);
169
- expect(report.totalCases).toBe(2);
170
- expect(report.passRate).toBeGreaterThanOrEqual(0);
171
- expect(report.summary).toContain('mini');
172
- });
173
- });
1
+ import { describe, it, expect } from 'vitest';
2
+ import { AgentEvaluator } from '../src/eval';
3
+ import type { EvalCase, EvalSuite, EvalReport } from '../src/eval';
4
+ import * as path from 'path';
5
+ import * as fs from 'fs';
6
+ import * as os from 'os';
7
+
8
+ // Mock agent
9
+ const mockAgent = {
10
+ chat: async (input: string) => {
11
+ if (!input) return 'Hello! How can I help?';
12
+ if (input.includes('capital of France')) return 'The capital of France is Paris.';
13
+ if (input.includes('Hello')) return 'Hello there! How can I help you?';
14
+ return `Response to: ${input}`;
15
+ },
16
+ };
17
+
18
+ describe('AgentEvaluator', () => {
19
+ const evaluator = new AgentEvaluator(mockAgent);
20
+
21
+ it('should score exact match correctly', async () => {
22
+ const result = await evaluator.evalCase({
23
+ id: 'test-1',
24
+ input: 'What is the capital of France?',
25
+ expectedOutput: 'The capital of France is Paris.',
26
+ });
27
+ expect(result.scores.exact_match).toBe(1);
28
+ expect(result.passed).toBe(true);
29
+ });
30
+
31
+ it('should score exact match failure', async () => {
32
+ const result = await evaluator.evalCase({
33
+ id: 'test-2',
34
+ input: 'Hello!',
35
+ expectedOutput: 'Goodbye!',
36
+ });
37
+ expect(result.scores.exact_match).toBe(0);
38
+ expect(result.passed).toBe(false);
39
+ });
40
+
41
+ it('should score contains correctly', async () => {
42
+ const result = await evaluator.evalCase({
43
+ id: 'test-3',
44
+ input: 'What is the capital of France?',
45
+ expectedContains: ['Paris', 'capital'],
46
+ });
47
+ expect(result.scores.contains).toBe(1);
48
+ expect(result.passed).toBe(true);
49
+ });
50
+
51
+ it('should score partial contains', async () => {
52
+ const result = await evaluator.evalCase({
53
+ id: 'test-4',
54
+ input: 'What is the capital of France?',
55
+ expectedContains: ['Paris', 'London'],
56
+ });
57
+ expect(result.scores.contains).toBe(0.5);
58
+ expect(result.passed).toBe(true); // 0.5 >= 0.5 threshold
59
+ });
60
+
61
+ it('should score not_contains correctly', async () => {
62
+ const result = await evaluator.evalCase({
63
+ id: 'test-5',
64
+ input: 'Hello!',
65
+ expectedNotContains: ['error', 'crash'],
66
+ });
67
+ expect(result.scores.not_contains).toBe(1);
68
+ expect(result.passed).toBe(true);
69
+ });
70
+
71
+ it('should score not_contains failure', async () => {
72
+ const result = await evaluator.evalCase({
73
+ id: 'test-6',
74
+ input: 'Hello!',
75
+ expectedNotContains: ['Hello', 'crash'],
76
+ });
77
+ expect(result.scores.not_contains).toBe(0.5);
78
+ });
79
+
80
+ it('should load suite from JSON', () => {
81
+ const suitePath = path.join(__dirname, '..', 'src', 'eval', 'suites', 'basic.json');
82
+ const suite = AgentEvaluator.loadSuite(suitePath);
83
+ expect(suite.name).toBe('basic');
84
+ expect(suite.cases.length).toBe(10);
85
+ });
86
+
87
+ it('should load all built-in suites', () => {
88
+ const suites = AgentEvaluator.builtinSuites();
89
+ expect(suites.length).toBeGreaterThanOrEqual(3);
90
+ const names = suites.map(s => s.name);
91
+ expect(names).toContain('basic');
92
+ expect(names).toContain('safety');
93
+ expect(names).toContain('memory');
94
+ });
95
+
96
+ it('should have correct case counts for built-in suites', () => {
97
+ const suites = AgentEvaluator.builtinSuites();
98
+ const basic = suites.find(s => s.name === 'basic');
99
+ const safety = suites.find(s => s.name === 'safety');
100
+ const memory = suites.find(s => s.name === 'memory');
101
+ expect(basic?.caseCount).toBe(10);
102
+ expect(safety?.caseCount).toBe(8);
103
+ expect(memory?.caseCount).toBe(6);
104
+ });
105
+
106
+ it('should compare reports and detect regression', () => {
107
+ const baseline: EvalReport = {
108
+ suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
109
+ results: [
110
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
111
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
112
+ ],
113
+ };
114
+ const current: EvalReport = {
115
+ suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
116
+ results: [
117
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
118
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
119
+ ],
120
+ };
121
+ const cmp = AgentEvaluator.compare(baseline, current);
122
+ expect(cmp.regressed).toContain('b');
123
+ expect(cmp.delta).toBe(-0.5);
124
+ });
125
+
126
+ it('should compare reports and detect improvement', () => {
127
+ const baseline: EvalReport = {
128
+ suite: 'test', timestamp: '', totalCases: 2, passed: 1, failed: 1, passRate: 0.5, avgLatency: 10, p95Latency: 15, summary: '',
129
+ results: [
130
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
131
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: false },
132
+ ],
133
+ };
134
+ const current: EvalReport = {
135
+ suite: 'test', timestamp: '', totalCases: 2, passed: 2, failed: 0, passRate: 1, avgLatency: 10, p95Latency: 15, summary: '',
136
+ results: [
137
+ { caseId: 'a', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
138
+ { caseId: 'b', input: '', output: '', scores: { latency_ms: 10 }, passed: true },
139
+ ],
140
+ };
141
+ const cmp = AgentEvaluator.compare(baseline, current);
142
+ expect(cmp.improved).toContain('b');
143
+ expect(cmp.delta).toBe(0.5);
144
+ });
145
+
146
+ it('should save and load report', () => {
147
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'eval-'));
148
+ const reportPath = path.join(tmpDir, 'report.json');
149
+ const report: EvalReport = {
150
+ suite: 'test', timestamp: new Date().toISOString(), totalCases: 1, passed: 1, failed: 0, passRate: 1, avgLatency: 5, p95Latency: 5, summary: 'ok',
151
+ results: [{ caseId: 'x', input: 'hi', output: 'hello', scores: { latency_ms: 5 }, passed: true }],
152
+ };
153
+ AgentEvaluator.saveReport(report, reportPath);
154
+ const loaded = JSON.parse(fs.readFileSync(reportPath, 'utf-8'));
155
+ expect(loaded.suite).toBe('test');
156
+ expect(loaded.results.length).toBe(1);
157
+ fs.rmSync(tmpDir, { recursive: true });
158
+ });
159
+
160
+ it('should run evalSuite and produce report', async () => {
161
+ const suite: EvalSuite = {
162
+ name: 'mini',
163
+ cases: [
164
+ { id: 't1', input: 'Hello!', expectedContains: ['hello', 'hi'] },
165
+ { id: 't2', input: 'What is the capital of France?', expectedContains: ['Paris'] },
166
+ ],
167
+ };
168
+ const report = await evaluator.evalSuite(suite);
169
+ expect(report.totalCases).toBe(2);
170
+ expect(report.passRate).toBeGreaterThanOrEqual(0);
171
+ expect(report.summary).toContain('mini');
172
+ });
173
+ });
@@ -1,63 +1,63 @@
1
- import { describe, it, expect, beforeEach } from 'vitest';
2
- import { Gateway } from '../src/core/gateway';
3
-
4
- describe('Gateway', () => {
5
- let gw: Gateway;
6
-
7
- beforeEach(() => {
8
- gw = new Gateway({
9
- port: 3000,
10
- agents: [{ id: 'agent-1', name: 'Test Agent' }],
11
- channels: [{ id: 'ch-1', type: 'web' }],
12
- });
13
- });
14
-
15
- it('should start and stop', async () => {
16
- await gw.start();
17
- expect(gw.getStatus().agents).toBe(1);
18
- await gw.stop();
19
- });
20
-
21
- it('should throw on double start', async () => {
22
- await gw.start();
23
- await expect(gw.start()).rejects.toThrow('already running');
24
- await gw.stop();
25
- });
26
-
27
- it('should route messages', async () => {
28
- await gw.start();
29
- const agentId = await gw.routeMessage({ id: '1', content: 'hi', channel: 'web', timestamp: Date.now() }, 'web');
30
- expect(agentId).toBe('agent-1');
31
- await gw.stop();
32
- });
33
-
34
- it('should add and remove agents', async () => {
35
- gw.addAgent({ id: 'agent-2', name: 'Agent 2' });
36
- expect(gw.getStatus().agents).toBe(2);
37
- gw.removeAgent('agent-2');
38
- expect(gw.getStatus().agents).toBe(1);
39
- });
40
-
41
- it('should throw removing unknown agent', () => {
42
- expect(() => gw.removeAgent('unknown')).toThrow('not found');
43
- });
44
-
45
- it('should track status', async () => {
46
- await gw.start();
47
- const status = gw.getStatus();
48
- expect(status.agents).toBe(1);
49
- expect(status.channels).toBe(1);
50
- expect(status.messagesProcessed).toBe(0);
51
- expect(status.uptime).toBeGreaterThanOrEqual(0);
52
- await gw.stop();
53
- });
54
-
55
- it('should report metrics', async () => {
56
- await gw.start();
57
- await gw.routeMessage({ id: '1', content: 'test', channel: 'web', timestamp: Date.now() }, 'web');
58
- const metrics = gw.getMetrics();
59
- expect(metrics.messagesPerMinute).toBeGreaterThan(0);
60
- expect(metrics.errorRate).toBe(0);
61
- await gw.stop();
62
- });
63
- });
1
+ import { describe, it, expect, beforeEach } from 'vitest';
2
+ import { Gateway } from '../src/core/gateway';
3
+
4
+ describe('Gateway', () => {
5
+ let gw: Gateway;
6
+
7
+ beforeEach(() => {
8
+ gw = new Gateway({
9
+ port: 3000,
10
+ agents: [{ id: 'agent-1', name: 'Test Agent' }],
11
+ channels: [{ id: 'ch-1', type: 'web' }],
12
+ });
13
+ });
14
+
15
+ it('should start and stop', async () => {
16
+ await gw.start();
17
+ expect(gw.getStatus().agents).toBe(1);
18
+ await gw.stop();
19
+ });
20
+
21
+ it('should throw on double start', async () => {
22
+ await gw.start();
23
+ await expect(gw.start()).rejects.toThrow('already running');
24
+ await gw.stop();
25
+ });
26
+
27
+ it('should route messages', async () => {
28
+ await gw.start();
29
+ const agentId = await gw.routeMessage({ id: '1', content: 'hi', channel: 'web', timestamp: Date.now() }, 'web');
30
+ expect(agentId).toBe('agent-1');
31
+ await gw.stop();
32
+ });
33
+
34
+ it('should add and remove agents', async () => {
35
+ gw.addAgent({ id: 'agent-2', name: 'Agent 2' });
36
+ expect(gw.getStatus().agents).toBe(2);
37
+ gw.removeAgent('agent-2');
38
+ expect(gw.getStatus().agents).toBe(1);
39
+ });
40
+
41
+ it('should throw removing unknown agent', () => {
42
+ expect(() => gw.removeAgent('unknown')).toThrow('not found');
43
+ });
44
+
45
+ it('should track status', async () => {
46
+ await gw.start();
47
+ const status = gw.getStatus();
48
+ expect(status.agents).toBe(1);
49
+ expect(status.channels).toBe(1);
50
+ expect(status.messagesProcessed).toBe(0);
51
+ expect(status.uptime).toBeGreaterThanOrEqual(0);
52
+ await gw.stop();
53
+ });
54
+
55
+ it('should report metrics', async () => {
56
+ await gw.start();
57
+ await gw.routeMessage({ id: '1', content: 'test', channel: 'web', timestamp: Date.now() }, 'web');
58
+ const metrics = gw.getMetrics();
59
+ expect(metrics.messagesPerMinute).toBeGreaterThan(0);
60
+ expect(metrics.errorRate).toBe(0);
61
+ await gw.stop();
62
+ });
63
+ });