codex-linux 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +10 -0
  2. package/.eslintrc.json +27 -0
  3. package/.github/workflows/ci.yml +156 -0
  4. package/.huskyrc +7 -0
  5. package/.lintstagedrc +13 -0
  6. package/.prettierrc +12 -0
  7. package/CLAUDE.md +163 -0
  8. package/DESIGN_SUPERIOR.md +73 -0
  9. package/Dockerfile +64 -0
  10. package/INSTALLATION.md +152 -0
  11. package/LICENSE +21 -0
  12. package/README.md +245 -0
  13. package/assets/skills/code-review/instructions.md +102 -0
  14. package/assets/skills/code-review/skill.yaml +15 -0
  15. package/assets/skills/refactoring/instructions.md +149 -0
  16. package/assets/skills/refactoring/skill.yaml +15 -0
  17. package/assets/skills/testing/skill.yaml +15 -0
  18. package/commitlint.config.js +23 -0
  19. package/dist/main/DatabaseManager.js +763 -0
  20. package/dist/main/DatabaseManager.js.map +1 -0
  21. package/dist/main/SettingsManager.js +61 -0
  22. package/dist/main/SettingsManager.js.map +1 -0
  23. package/dist/main/agents/AgentOrchestrator.js +787 -0
  24. package/dist/main/agents/AgentOrchestrator.js.map +1 -0
  25. package/dist/main/agents/AgentSDK.js +219 -0
  26. package/dist/main/agents/AgentSDK.js.map +1 -0
  27. package/dist/main/agents/AgentTools.js +348 -0
  28. package/dist/main/agents/AgentTools.js.map +1 -0
  29. package/dist/main/agents/CodeIndex.js +233 -0
  30. package/dist/main/agents/CodeIndex.js.map +1 -0
  31. package/dist/main/agents/EmbeddingService.js +80 -0
  32. package/dist/main/agents/EmbeddingService.js.map +1 -0
  33. package/dist/main/agents/NativeToolCalling.js +206 -0
  34. package/dist/main/agents/NativeToolCalling.js.map +1 -0
  35. package/dist/main/api/APIServer.js +278 -0
  36. package/dist/main/api/APIServer.js.map +1 -0
  37. package/dist/main/api/RateLimiter.js +138 -0
  38. package/dist/main/api/RateLimiter.js.map +1 -0
  39. package/dist/main/api/WebSocketManager.js +300 -0
  40. package/dist/main/api/WebSocketManager.js.map +1 -0
  41. package/dist/main/assistant/ContextOptimizer.js +192 -0
  42. package/dist/main/assistant/ContextOptimizer.js.map +1 -0
  43. package/dist/main/assistant/PredictedOutputManager.js +172 -0
  44. package/dist/main/assistant/PredictedOutputManager.js.map +1 -0
  45. package/dist/main/assistant/PromptCacheManager.js +193 -0
  46. package/dist/main/assistant/PromptCacheManager.js.map +1 -0
  47. package/dist/main/assistant/PromptOptimizer.js +626 -0
  48. package/dist/main/assistant/PromptOptimizer.js.map +1 -0
  49. package/dist/main/assistant/SmartCodeAssistant.js +224 -0
  50. package/dist/main/assistant/SmartCodeAssistant.js.map +1 -0
  51. package/dist/main/auth/SessionManager.js +300 -0
  52. package/dist/main/auth/SessionManager.js.map +1 -0
  53. package/dist/main/automations/AdvancedWebhookSystem.js +212 -0
  54. package/dist/main/automations/AdvancedWebhookSystem.js.map +1 -0
  55. package/dist/main/automations/AutomationScheduler.js +269 -0
  56. package/dist/main/automations/AutomationScheduler.js.map +1 -0
  57. package/dist/main/automations/BatchProcessingSystem.js +159 -0
  58. package/dist/main/automations/BatchProcessingSystem.js.map +1 -0
  59. package/dist/main/automations/BrowserAutomationManager.js +195 -0
  60. package/dist/main/automations/BrowserAutomationManager.js.map +1 -0
  61. package/dist/main/automations/GitHubActionsManager.js +129 -0
  62. package/dist/main/automations/GitHubActionsManager.js.map +1 -0
  63. package/dist/main/automations/GitLabCIManager.js +122 -0
  64. package/dist/main/automations/GitLabCIManager.js.map +1 -0
  65. package/dist/main/automations/PriorityQueueManager.js +240 -0
  66. package/dist/main/automations/PriorityQueueManager.js.map +1 -0
  67. package/dist/main/background/BackgroundModeManager.js +117 -0
  68. package/dist/main/background/BackgroundModeManager.js.map +1 -0
  69. package/dist/main/backup/BackupManager.js +254 -0
  70. package/dist/main/backup/BackupManager.js.map +1 -0
  71. package/dist/main/backup/MigrationManager.js +114 -0
  72. package/dist/main/backup/MigrationManager.js.map +1 -0
  73. package/dist/main/commands/SlashCommandManager.js +399 -0
  74. package/dist/main/commands/SlashCommandManager.js.map +1 -0
  75. package/dist/main/config/ClaudeMdParser.js +519 -0
  76. package/dist/main/config/ClaudeMdParser.js.map +1 -0
  77. package/dist/main/config/CustomizationManager.js +381 -0
  78. package/dist/main/config/CustomizationManager.js.map +1 -0
  79. package/dist/main/config/LaunchConfigManager.js +211 -0
  80. package/dist/main/config/LaunchConfigManager.js.map +1 -0
  81. package/dist/main/config/SettingsManager.js +166 -0
  82. package/dist/main/config/SettingsManager.js.map +1 -0
  83. package/dist/main/connectors/ConnectorManager.js +151 -0
  84. package/dist/main/connectors/ConnectorManager.js.map +1 -0
  85. package/dist/main/connectors/DatabaseConnector.js +222 -0
  86. package/dist/main/connectors/DatabaseConnector.js.map +1 -0
  87. package/dist/main/cowork/CoworkManager.js +324 -0
  88. package/dist/main/cowork/CoworkManager.js.map +1 -0
  89. package/dist/main/evals/AgentEvalFramework.js +538 -0
  90. package/dist/main/evals/AgentEvalFramework.js.map +1 -0
  91. package/dist/main/evals/GraderManager.js +285 -0
  92. package/dist/main/evals/GraderManager.js.map +1 -0
  93. package/dist/main/git/GitWorktreeManager.js +214 -0
  94. package/dist/main/git/GitWorktreeManager.js.map +1 -0
  95. package/dist/main/github/GitHubPRMonitor.js +244 -0
  96. package/dist/main/github/GitHubPRMonitor.js.map +1 -0
  97. package/dist/main/ide/ContinueInManager.js +181 -0
  98. package/dist/main/ide/ContinueInManager.js.map +1 -0
  99. package/dist/main/ide/IDEIntegration.js +277 -0
  100. package/dist/main/ide/IDEIntegration.js.map +1 -0
  101. package/dist/main/integrations/LinearManager.js +252 -0
  102. package/dist/main/integrations/LinearManager.js.map +1 -0
  103. package/dist/main/integrations/SlackBotManager.js +247 -0
  104. package/dist/main/integrations/SlackBotManager.js.map +1 -0
  105. package/dist/main/lsp/LSPManager.js +394 -0
  106. package/dist/main/lsp/LSPManager.js.map +1 -0
  107. package/dist/main/main.js +1087 -0
  108. package/dist/main/main.js.map +1 -0
  109. package/dist/main/mcp/MCPConfigurationManager.js +281 -0
  110. package/dist/main/mcp/MCPConfigurationManager.js.map +1 -0
  111. package/dist/main/mcp/MCPManager.js +710 -0
  112. package/dist/main/mcp/MCPManager.js.map +1 -0
  113. package/dist/main/mcp/MCPRegistry.js +272 -0
  114. package/dist/main/mcp/MCPRegistry.js.map +1 -0
  115. package/dist/main/monitoring/ErrorRecoveryManager.js +268 -0
  116. package/dist/main/monitoring/ErrorRecoveryManager.js.map +1 -0
  117. package/dist/main/monitoring/ErrorTracker.js +57 -0
  118. package/dist/main/monitoring/ErrorTracker.js.map +1 -0
  119. package/dist/main/monitoring/MetricsCollector.js +155 -0
  120. package/dist/main/monitoring/MetricsCollector.js.map +1 -0
  121. package/dist/main/monitoring/TraceGradingSystem.js +148 -0
  122. package/dist/main/monitoring/TraceGradingSystem.js.map +1 -0
  123. package/dist/main/notifications/NotificationManager.js +67 -0
  124. package/dist/main/notifications/NotificationManager.js.map +1 -0
  125. package/dist/main/pair/AIPairProgramming.js +200 -0
  126. package/dist/main/pair/AIPairProgramming.js.map +1 -0
  127. package/dist/main/plugins/PluginManager.js +222 -0
  128. package/dist/main/plugins/PluginManager.js.map +1 -0
  129. package/dist/main/plugins/PluginMarketplace.js +237 -0
  130. package/dist/main/plugins/PluginMarketplace.js.map +1 -0
  131. package/dist/main/preload.js +189 -0
  132. package/dist/main/preload.js.map +1 -0
  133. package/dist/main/preview/PreviewSessionManager.js +170 -0
  134. package/dist/main/preview/PreviewSessionManager.js.map +1 -0
  135. package/dist/main/providers/AIProviderManager.js +327 -0
  136. package/dist/main/providers/AIProviderManager.js.map +1 -0
  137. package/dist/main/providers/FineTuningManager.js +276 -0
  138. package/dist/main/providers/FineTuningManager.js.map +1 -0
  139. package/dist/main/providers/FreeModelsProvider.js +1104 -0
  140. package/dist/main/providers/FreeModelsProvider.js.map +1 -0
  141. package/dist/main/realtime/RealtimeManager.js +116 -0
  142. package/dist/main/realtime/RealtimeManager.js.map +1 -0
  143. package/dist/main/remote/CloudEnvironmentManager.js +232 -0
  144. package/dist/main/remote/CloudEnvironmentManager.js.map +1 -0
  145. package/dist/main/remote/RemoteSessionManager.js +255 -0
  146. package/dist/main/remote/RemoteSessionManager.js.map +1 -0
  147. package/dist/main/search/DeepResearchManager.js +335 -0
  148. package/dist/main/search/DeepResearchManager.js.map +1 -0
  149. package/dist/main/search/WebSearchIntegration.js +147 -0
  150. package/dist/main/search/WebSearchIntegration.js.map +1 -0
  151. package/dist/main/security/AdminConsoleManager.js +223 -0
  152. package/dist/main/security/AdminConsoleManager.js.map +1 -0
  153. package/dist/main/security/AuditLogger.js +136 -0
  154. package/dist/main/security/AuditLogger.js.map +1 -0
  155. package/dist/main/security/PermissionManager.js +144 -0
  156. package/dist/main/security/PermissionManager.js.map +1 -0
  157. package/dist/main/security/SSOManager.js +173 -0
  158. package/dist/main/security/SSOManager.js.map +1 -0
  159. package/dist/main/security/SecurityManager.js +152 -0
  160. package/dist/main/security/SecurityManager.js.map +1 -0
  161. package/dist/main/skills/SkillsManager.js +223 -0
  162. package/dist/main/skills/SkillsManager.js.map +1 -0
  163. package/dist/main/ssh/SSHManager.js +65 -0
  164. package/dist/main/ssh/SSHManager.js.map +1 -0
  165. package/dist/main/streaming/StreamingManager.js +225 -0
  166. package/dist/main/streaming/StreamingManager.js.map +1 -0
  167. package/dist/main/sync/CloudSyncManager.js +422 -0
  168. package/dist/main/sync/CloudSyncManager.js.map +1 -0
  169. package/dist/main/types.js +28 -0
  170. package/dist/main/types.js.map +1 -0
  171. package/dist/main/verification/AutoVerifyManager.js +235 -0
  172. package/dist/main/verification/AutoVerifyManager.js.map +1 -0
  173. package/dist/main/vision/ComputerUseManager.js +376 -0
  174. package/dist/main/vision/ComputerUseManager.js.map +1 -0
  175. package/dist/main/vision/ImageVideoGenerationManager.js +401 -0
  176. package/dist/main/vision/ImageVideoGenerationManager.js.map +1 -0
  177. package/dist/main/vision/VisionManager.js +172 -0
  178. package/dist/main/vision/VisionManager.js.map +1 -0
  179. package/dist/renderer/assets/main-DJlZQBCA.js +304 -0
  180. package/dist/renderer/assets/main-N33ZXEr8.css +1 -0
  181. package/dist/renderer/index.html +21 -0
  182. package/dist/renderer/manifest.json +42 -0
  183. package/dist/renderer/sw.ts +109 -0
  184. package/dist/shared/types.js +35 -0
  185. package/dist/shared/types.js.map +1 -0
  186. package/docker-compose.yml +65 -0
  187. package/docs/API.md +307 -0
  188. package/docs/USER_GUIDE.md +476 -0
  189. package/examples/plugins/sample-plugin/package.json +41 -0
  190. package/examples/plugins/sample-plugin/src/index.ts +75 -0
  191. package/index.html +20 -0
  192. package/jest.config.js +39 -0
  193. package/package.json +180 -0
  194. package/packages/cli/package.json +29 -0
  195. package/packages/cli/src/commands/agents.ts +199 -0
  196. package/packages/cli/src/commands/tasks.ts +61 -0
  197. package/packages/cli/src/index.ts +91 -0
  198. package/packages/cli/src/utils/api.ts +45 -0
  199. package/packages/cli/src/utils/config.ts +61 -0
  200. package/packages/npm-installer/bin/codex-linux +126 -0
  201. package/packages/npm-installer/lib/download.js +273 -0
  202. package/packages/npm-installer/package.json +42 -0
  203. package/packages/vscode-extension/package.json +167 -0
  204. package/packages/vscode-extension/src/api.ts +68 -0
  205. package/packages/vscode-extension/src/extension.ts +161 -0
  206. package/packages/vscode-extension/src/panels/chatPanel.ts +265 -0
  207. package/packages/vscode-extension/src/panels/createAgentPanel.ts +227 -0
  208. package/packages/vscode-extension/src/providers/agentsProvider.ts +80 -0
  209. package/postcss.config.js +6 -0
  210. package/public/manifest.json +42 -0
  211. package/public/sw.ts +109 -0
  212. package/scripts/install-dev.sh +103 -0
  213. package/scripts/install.sh +275 -0
  214. package/src/main/DatabaseManager.ts +950 -0
  215. package/src/main/SettingsManager.ts +63 -0
  216. package/src/main/agents/AgentOrchestrator.ts +930 -0
  217. package/src/main/agents/AgentSDK.ts +269 -0
  218. package/src/main/agents/AgentTools.ts +380 -0
  219. package/src/main/agents/CodeIndex.ts +240 -0
  220. package/src/main/agents/EmbeddingService.ts +88 -0
  221. package/src/main/agents/NativeToolCalling.ts +245 -0
  222. package/src/main/api/APIServer.ts +316 -0
  223. package/src/main/api/RateLimiter.ts +165 -0
  224. package/src/main/api/WebSocketManager.ts +398 -0
  225. package/src/main/assistant/ContextOptimizer.ts +214 -0
  226. package/src/main/assistant/PredictedOutputManager.ts +265 -0
  227. package/src/main/assistant/PromptCacheManager.ts +280 -0
  228. package/src/main/assistant/PromptOptimizer.ts +746 -0
  229. package/src/main/assistant/SmartCodeAssistant.ts +234 -0
  230. package/src/main/auth/SessionManager.ts +415 -0
  231. package/src/main/automations/AdvancedWebhookSystem.ts +281 -0
  232. package/src/main/automations/AutomationScheduler.ts +272 -0
  233. package/src/main/automations/BatchProcessingSystem.ts +207 -0
  234. package/src/main/automations/BrowserAutomationManager.ts +203 -0
  235. package/src/main/automations/GitHubActionsManager.ts +151 -0
  236. package/src/main/automations/GitLabCIManager.ts +206 -0
  237. package/src/main/automations/PriorityQueueManager.ts +328 -0
  238. package/src/main/background/BackgroundModeManager.ts +130 -0
  239. package/src/main/backup/BackupManager.ts +287 -0
  240. package/src/main/backup/MigrationManager.ts +132 -0
  241. package/src/main/commands/SlashCommandManager.ts +407 -0
  242. package/src/main/config/ClaudeMdParser.ts +539 -0
  243. package/src/main/config/CustomizationManager.ts +493 -0
  244. package/src/main/config/LaunchConfigManager.ts +212 -0
  245. package/src/main/config/SettingsManager.ts +163 -0
  246. package/src/main/connectors/ConnectorManager.ts +175 -0
  247. package/src/main/connectors/DatabaseConnector.ts +212 -0
  248. package/src/main/cowork/CoworkManager.ts +431 -0
  249. package/src/main/evals/AgentEvalFramework.ts +665 -0
  250. package/src/main/evals/GraderManager.ts +417 -0
  251. package/src/main/git/GitWorktreeManager.ts +211 -0
  252. package/src/main/github/GitHubPRMonitor.ts +317 -0
  253. package/src/main/ide/ContinueInManager.ts +180 -0
  254. package/src/main/ide/IDEIntegration.ts +288 -0
  255. package/src/main/integrations/LinearManager.ts +327 -0
  256. package/src/main/integrations/SlackBotManager.ts +312 -0
  257. package/src/main/lsp/LSPManager.ts +445 -0
  258. package/src/main/main.ts +1221 -0
  259. package/src/main/mcp/MCPConfigurationManager.ts +281 -0
  260. package/src/main/mcp/MCPManager.ts +799 -0
  261. package/src/main/mcp/MCPRegistry.ts +273 -0
  262. package/src/main/monitoring/ErrorRecoveryManager.ts +359 -0
  263. package/src/main/monitoring/ErrorTracker.ts +60 -0
  264. package/src/main/monitoring/MetricsCollector.ts +196 -0
  265. package/src/main/monitoring/TraceGradingSystem.ts +196 -0
  266. package/src/main/notifications/NotificationManager.ts +96 -0
  267. package/src/main/pair/AIPairProgramming.ts +290 -0
  268. package/src/main/plugins/PluginManager.ts +266 -0
  269. package/src/main/plugins/PluginMarketplace.ts +318 -0
  270. package/src/main/preload.ts +215 -0
  271. package/src/main/preview/PreviewSessionManager.ts +186 -0
  272. package/src/main/providers/AIProviderManager.ts +394 -0
  273. package/src/main/providers/FineTuningManager.ts +390 -0
  274. package/src/main/providers/FreeModelsProvider.ts +1156 -0
  275. package/src/main/realtime/RealtimeManager.ts +147 -0
  276. package/src/main/remote/CloudEnvironmentManager.ts +253 -0
  277. package/src/main/remote/RemoteSessionManager.ts +323 -0
  278. package/src/main/search/DeepResearchManager.ts +458 -0
  279. package/src/main/search/WebSearchIntegration.ts +203 -0
  280. package/src/main/security/AdminConsoleManager.ts +244 -0
  281. package/src/main/security/AuditLogger.ts +143 -0
  282. package/src/main/security/PermissionManager.ts +184 -0
  283. package/src/main/security/SSOManager.ts +241 -0
  284. package/src/main/security/SecurityManager.ts +139 -0
  285. package/src/main/skills/SkillsManager.ts +218 -0
  286. package/src/main/ssh/SSHManager.ts +86 -0
  287. package/src/main/streaming/StreamingManager.ts +306 -0
  288. package/src/main/sync/CloudSyncManager.ts +532 -0
  289. package/src/main/verification/AutoVerifyManager.ts +285 -0
  290. package/src/main/vision/ComputerUseManager.ts +475 -0
  291. package/src/main/vision/ImageVideoGenerationManager.ts +526 -0
  292. package/src/main/vision/VisionManager.ts +186 -0
  293. package/src/renderer/App.tsx +314 -0
  294. package/src/renderer/components/AdvancedSettingsPanel.tsx +225 -0
  295. package/src/renderer/components/AgentPanel.tsx +760 -0
  296. package/src/renderer/components/AppPreview.tsx +220 -0
  297. package/src/renderer/components/AuditTrailPanel.tsx +148 -0
  298. package/src/renderer/components/AutomationPanel.tsx +220 -0
  299. package/src/renderer/components/ChatInterface.tsx +595 -0
  300. package/src/renderer/components/ChatTab.tsx +296 -0
  301. package/src/renderer/components/CodeEditor.tsx +257 -0
  302. package/src/renderer/components/CodeReviewPanel.tsx +256 -0
  303. package/src/renderer/components/CodeWorkspace.tsx +192 -0
  304. package/src/renderer/components/CodebaseDashboard.tsx +295 -0
  305. package/src/renderer/components/ComputerUsePanel.tsx +262 -0
  306. package/src/renderer/components/ConnectorsPanel.tsx +471 -0
  307. package/src/renderer/components/ContextMenu.tsx +155 -0
  308. package/src/renderer/components/ContextUsageDisplay.tsx +248 -0
  309. package/src/renderer/components/CoworkPanel.tsx +415 -0
  310. package/src/renderer/components/DiffViewer.tsx +452 -0
  311. package/src/renderer/components/ErrorBoundary.tsx +273 -0
  312. package/src/renderer/components/ExtendedThinkingToggle.tsx +244 -0
  313. package/src/renderer/components/FileAttachments.tsx +247 -0
  314. package/src/renderer/components/FileExplorer.tsx +242 -0
  315. package/src/renderer/components/FileExplorerPanel.tsx +302 -0
  316. package/src/renderer/components/GitPanel.tsx +154 -0
  317. package/src/renderer/components/Header.tsx +113 -0
  318. package/src/renderer/components/MCPPanel.tsx +326 -0
  319. package/src/renderer/components/MentionAutocomplete.tsx +239 -0
  320. package/src/renderer/components/PermissionPanel.tsx +159 -0
  321. package/src/renderer/components/PermissionSelector.tsx +203 -0
  322. package/src/renderer/components/PluginMarketplace.tsx +325 -0
  323. package/src/renderer/components/PromptOptimizerPanel.tsx +399 -0
  324. package/src/renderer/components/SearchPanel.tsx +173 -0
  325. package/src/renderer/components/SearchReplace.tsx +284 -0
  326. package/src/renderer/components/SessionSidebar.tsx +367 -0
  327. package/src/renderer/components/SettingsPanel.tsx +426 -0
  328. package/src/renderer/components/Sidebar.tsx +100 -0
  329. package/src/renderer/components/SkillsPanel.tsx +245 -0
  330. package/src/renderer/components/SplitPane.tsx +173 -0
  331. package/src/renderer/components/Terminal.tsx +190 -0
  332. package/src/renderer/components/VoiceCommand.tsx +129 -0
  333. package/src/renderer/components/WorktreePanel.tsx +163 -0
  334. package/src/renderer/components/ui/AriaComponents.tsx +193 -0
  335. package/src/renderer/components/ui/Button.tsx +68 -0
  336. package/src/renderer/components/ui/Card.tsx +102 -0
  337. package/src/renderer/components/ui/Input.tsx +44 -0
  338. package/src/renderer/components/ui/Skeleton.tsx +55 -0
  339. package/src/renderer/components/ui/VirtualList.tsx +196 -0
  340. package/src/renderer/i18n/I18nProvider.tsx +101 -0
  341. package/src/renderer/i18n/de.ts +161 -0
  342. package/src/renderer/i18n/en.ts +163 -0
  343. package/src/renderer/i18n/es.ts +161 -0
  344. package/src/renderer/i18n/fr.ts +161 -0
  345. package/src/renderer/i18n/index.ts +44 -0
  346. package/src/renderer/index.css +129 -0
  347. package/src/renderer/lib/accessibility.tsx +287 -0
  348. package/src/renderer/lib/hooks.ts +304 -0
  349. package/src/renderer/lib/utils.ts +6 -0
  350. package/src/renderer/main.tsx +25 -0
  351. package/src/renderer/styles/minimalist.css +539 -0
  352. package/src/renderer/sw.ts +180 -0
  353. package/src/renderer/types.d.ts +138 -0
  354. package/src/shared/types.ts +813 -0
  355. package/supabase/schema.sql +234 -0
  356. package/tailwind.config.js +78 -0
  357. package/tests/e2e/package.json +15 -0
  358. package/tests/e2e/playwright.config.ts +31 -0
  359. package/tests/e2e/specs/app.spec.ts +194 -0
  360. package/tests/setup.ts +99 -0
  361. package/tests/unit/AgentOrchestrator.test.ts +274 -0
  362. package/tests/unit/DatabaseManager.test.ts +262 -0
  363. package/tests/unit/GitWorktreeManager.test.ts +150 -0
  364. package/tests/unit/SecurityManager.test.ts +110 -0
  365. package/tsconfig.main.json +22 -0
  366. package/tsconfig.renderer.json +27 -0
  367. package/vite.config.ts +28 -0
@@ -0,0 +1,665 @@
1
+ import { EventEmitter } from 'events';
2
+ import { v4 as uuidv4 } from 'uuid';
3
+ import log from 'electron-log';
4
+ import { AgentOrchestrator } from '../agents/AgentOrchestrator';
5
+ import { AIProviderManager } from '../providers/AIProviderManager';
6
+ import { Agent, AgentMessage } from '../../shared/types';
7
+ import * as fs from 'fs/promises';
8
+ import * as path from 'path';
9
+
10
+ export enum EvalCategory {
11
+ CODE_GENERATION = 'code_generation',
12
+ CODE_REVIEW = 'code_review',
13
+ REFACTORING = 'refactoring',
14
+ DEBUGGING = 'debugging',
15
+ DOCUMENTATION = 'documentation',
16
+ TESTING = 'testing',
17
+ SECURITY = 'security',
18
+ PERFORMANCE = 'performance',
19
+ }
20
+
21
+ export interface EvalTestCase {
22
+ id: string;
23
+ name: string;
24
+ category: EvalCategory;
25
+ description: string;
26
+ prompt: string;
27
+ context?: {
28
+ files?: Array<{
29
+ path: string;
30
+ content: string;
31
+ }>;
32
+ codebase?: string;
33
+ };
34
+ expectedOutput?: string;
35
+ expectedFiles?: Array<{
36
+ path: string;
37
+ shouldExist: boolean;
38
+ contentPattern?: RegExp;
39
+ }>;
40
+ validationCriteria: EvalCriteria[];
41
+ timeout: number;
42
+ difficulty: 'easy' | 'medium' | 'hard';
43
+ tags: string[];
44
+ }
45
+
46
+ export interface EvalCriteria {
47
+ id: string;
48
+ name: string;
49
+ description: string;
50
+ weight: number;
51
+ validator: (result: EvalResult) => boolean | Promise<boolean>;
52
+ }
53
+
54
+ export interface EvalResult {
55
+ testCaseId: string;
56
+ agentId: string;
57
+ success: boolean;
58
+ score: number;
59
+ maxScore: number;
60
+ duration: number;
61
+ messages: AgentMessage[];
62
+ criteriaResults: Array<{
63
+ criteriaId: string;
64
+ passed: boolean;
65
+ score: number;
66
+ details?: string;
67
+ }>;
68
+ errors: string[];
69
+ warnings: string[];
70
+ metadata: {
71
+ tokensUsed?: number;
72
+ modelUsed?: string;
73
+ cost?: number;
74
+ };
75
+ createdAt: Date;
76
+ }
77
+
78
+ export interface EvalDataset {
79
+ id: string;
80
+ name: string;
81
+ description: string;
82
+ version: string;
83
+ testCases: EvalTestCase[];
84
+ createdAt: Date;
85
+ updatedAt: Date;
86
+ }
87
+
88
+ export interface EvalRun {
89
+ id: string;
90
+ datasetId: string;
91
+ agentConfig: {
92
+ providerId: string;
93
+ model: string;
94
+ skills: string[];
95
+ };
96
+ results: EvalResult[];
97
+ summary: {
98
+ totalTests: number;
99
+ passedTests: number;
100
+ failedTests: number;
101
+ averageScore: number;
102
+ totalDuration: number;
103
+ };
104
+ status: 'pending' | 'running' | 'completed' | 'failed';
105
+ startedAt: Date;
106
+ completedAt?: Date;
107
+ }
108
+
109
+ export class AgentEvalFramework extends EventEmitter {
110
+ private agentOrchestrator: AgentOrchestrator;
111
+ private aiProviderManager: AIProviderManager;
112
+ private datasets: Map<string, EvalDataset> = new Map();
113
+ private activeRuns: Map<string, EvalRun> = new Map();
114
+ private runHistory: EvalRun[] = [];
115
+ private datasetsPath: string;
116
+
117
+ constructor(
118
+ agentOrchestrator: AgentOrchestrator,
119
+ aiProviderManager: AIProviderManager,
120
+ datasetsPath: string = './eval-datasets'
121
+ ) {
122
+ super();
123
+ this.agentOrchestrator = agentOrchestrator;
124
+ this.aiProviderManager = aiProviderManager;
125
+ this.datasetsPath = datasetsPath;
126
+ }
127
+
128
+ async initialize(): Promise<void> {
129
+ await this.loadDefaultDatasets();
130
+ await this.loadCustomDatasets();
131
+ log.info('Agent Evaluation Framework initialized');
132
+ }
133
+
134
+ private async loadDefaultDatasets(): Promise<void> {
135
+ // Code Generation Dataset
136
+ const codeGenDataset: EvalDataset = {
137
+ id: 'code-generation-v1',
138
+ name: 'Code Generation Benchmark',
139
+ description: 'Tests agent ability to generate correct and efficient code',
140
+ version: '1.0.0',
141
+ createdAt: new Date(),
142
+ updatedAt: new Date(),
143
+ testCases: [
144
+ {
145
+ id: 'codegen-001',
146
+ name: 'Generate Fibonacci Function',
147
+ category: EvalCategory.CODE_GENERATION,
148
+ description: 'Generate an efficient Fibonacci function',
149
+ prompt: 'Write a TypeScript function that calculates the nth Fibonacci number efficiently. The function should handle edge cases and large inputs (n up to 1000).',
150
+ expectedFiles: [{
151
+ path: 'fibonacci.ts',
152
+ shouldExist: true,
153
+ contentPattern: /function\s+fibonacci|const\s+fibonacci/,
154
+ }],
155
+ validationCriteria: [
156
+ {
157
+ id: 'syntax-valid',
158
+ name: 'Valid TypeScript Syntax',
159
+ description: 'Generated code must be valid TypeScript',
160
+ weight: 0.2,
161
+ validator: (result) => result.errors.length === 0,
162
+ },
163
+ {
164
+ id: 'efficient',
165
+ name: 'Efficient Implementation',
166
+ description: 'Uses memoization or iterative approach, not naive recursion',
167
+ weight: 0.3,
168
+ validator: (result) => {
169
+ const content = result.messages.map(m => m.content).join('');
170
+ return /memo|cache|iterat|loop|for\s*\(|while\s*\(/.test(content);
171
+ },
172
+ },
173
+ {
174
+ id: 'handles-edge-cases',
175
+ name: 'Edge Case Handling',
176
+ description: 'Handles negative inputs and large numbers',
177
+ weight: 0.3,
178
+ validator: (result) => {
179
+ const content = result.messages.map(m => m.content).join('');
180
+ return /if.*n\s*<\s*0|n\s*===\s*0|n\s*===\s*1|throw|error/i.test(content);
181
+ },
182
+ },
183
+ {
184
+ id: 'includes-tests',
185
+ name: 'Includes Test Cases',
186
+ description: 'Provides example usage or test cases',
187
+ weight: 0.2,
188
+ validator: (result) => {
189
+ const content = result.messages.map(m => m.content).join('');
190
+ return /test|example|console\.log|expect/.test(content);
191
+ },
192
+ },
193
+ ],
194
+ timeout: 60000,
195
+ difficulty: 'medium',
196
+ tags: ['typescript', 'algorithms', 'fibonacci'],
197
+ },
198
+ {
199
+ id: 'codegen-002',
200
+ name: 'API Endpoint Implementation',
201
+ category: EvalCategory.CODE_GENERATION,
202
+ description: 'Generate a REST API endpoint with validation',
203
+ prompt: 'Create an Express.js POST endpoint for creating a user. Include input validation, error handling, and proper response formatting.',
204
+ validationCriteria: [
205
+ {
206
+ id: 'express-route',
207
+ name: 'Express Route Definition',
208
+ description: 'Uses app.post() or router.post()',
209
+ weight: 0.2,
210
+ validator: (result) => {
211
+ const content = result.messages.map(m => m.content).join('');
212
+ return /app\.post|router\.post/.test(content);
213
+ },
214
+ },
215
+ {
216
+ id: 'validation',
217
+ name: 'Input Validation',
218
+ description: 'Validates request body',
219
+ weight: 0.3,
220
+ validator: (result) => {
221
+ const content = result.messages.map(m => m.content).join('');
222
+ return /validate|joi|zod|express-validator|req\.body/.test(content);
223
+ },
224
+ },
225
+ {
226
+ id: 'error-handling',
227
+ name: 'Error Handling',
228
+ description: 'Includes try-catch or error middleware',
229
+ weight: 0.3,
230
+ validator: (result) => {
231
+ const content = result.messages.map(m => m.content).join('');
232
+ return /try\s*{|catch|error|Error/.test(content);
233
+ },
234
+ },
235
+ {
236
+ id: 'response-format',
237
+ name: 'Proper Response Format',
238
+ description: 'Returns JSON with status codes',
239
+ weight: 0.2,
240
+ validator: (result) => {
241
+ const content = result.messages.map(m => m.content).join('');
242
+ return /res\.json|res\.status|200|201|400|500/.test(content);
243
+ },
244
+ },
245
+ ],
246
+ timeout: 90000,
247
+ difficulty: 'medium',
248
+ tags: ['express', 'api', 'validation', 'javascript'],
249
+ },
250
+ ],
251
+ };
252
+
253
+ // Code Review Dataset
254
+ const codeReviewDataset: EvalDataset = {
255
+ id: 'code-review-v1',
256
+ name: 'Code Review Benchmark',
257
+ description: 'Tests agent ability to identify issues and suggest improvements',
258
+ version: '1.0.0',
259
+ createdAt: new Date(),
260
+ updatedAt: new Date(),
261
+ testCases: [
262
+ {
263
+ id: 'review-001',
264
+ name: 'Security Issues Detection',
265
+ category: EvalCategory.CODE_REVIEW,
266
+ description: 'Identify security vulnerabilities in code',
267
+ prompt: 'Review this code for security issues:\n\n```javascript\nfunction authenticateUser(username, password) {\n const query = `SELECT * FROM users WHERE username = \'${username}\' AND password = \'${password}\'`;\n return db.query(query);\n}\n```\n\nIdentify all security issues and suggest fixes.',
268
+ context: {
269
+ files: [{
270
+ path: 'auth.js',
271
+ content: `function authenticateUser(username, password) {
272
+ const query = \`SELECT * FROM users WHERE username = '\${username}' AND password = '\${password}'\`;
273
+ return db.query(query);
274
+ }`,
275
+ }],
276
+ },
277
+ validationCriteria: [
278
+ {
279
+ id: 'sql-injection',
280
+ name: 'SQL Injection Detection',
281
+ description: 'Identifies SQL injection vulnerability',
282
+ weight: 0.4,
283
+ validator: (result) => {
284
+ const content = result.messages.map(m => m.content).join('').toLowerCase();
285
+ return /sql injection|sqli|parameterized|prepared statement/.test(content);
286
+ },
287
+ },
288
+ {
289
+ id: 'plaintext-password',
290
+ name: 'Plaintext Password Detection',
291
+ description: 'Identifies plaintext password storage',
292
+ weight: 0.3,
293
+ validator: (result) => {
294
+ const content = result.messages.map(m => m.content).join('').toLowerCase();
295
+ return /plaintext|hash|bcrypt|scrypt|argon2/.test(content);
296
+ },
297
+ },
298
+ {
299
+ id: 'provides-fix',
300
+ name: 'Provides Fix',
301
+ description: 'Provides corrected code',
302
+ weight: 0.3,
303
+ validator: (result) => {
304
+ const content = result.messages.map(m => m.content).join('');
305
+ return /```[\s\S]*?```/.test(content);
306
+ },
307
+ },
308
+ ],
309
+ timeout: 60000,
310
+ difficulty: 'easy',
311
+ tags: ['security', 'sql-injection', 'review'],
312
+ },
313
+ ],
314
+ };
315
+
316
+ // Refactoring Dataset
317
+ const refactoringDataset: EvalDataset = {
318
+ id: 'refactoring-v1',
319
+ name: 'Refactoring Benchmark',
320
+ description: 'Tests agent ability to improve code quality',
321
+ version: '1.0.0',
322
+ createdAt: new Date(),
323
+ updatedAt: new Date(),
324
+ testCases: [
325
+ {
326
+ id: 'refactor-001',
327
+ name: 'Extract Function Refactoring',
328
+ category: EvalCategory.REFACTORING,
329
+ description: 'Refactor duplicated code into a function',
330
+ prompt: 'Refactor this code to eliminate duplication:\n\n```typescript\nfunction processOrders(orders: Order[]) {\n for (const order of orders) {\n if (order.status === "pending") {\n const tax = order.amount * 0.1;\n const total = order.amount + tax;\n order.total = total;\n order.status = "processed";\n }\n }\n \n for (const order of orders) {\n if (order.priority === "high") {\n const tax = order.amount * 0.1;\n const total = order.amount + tax;\n order.total = total;\n order.priority = "processed";\n }\n }\n}\n```',
331
+ validationCriteria: [
332
+ {
333
+ id: 'extracts-function',
334
+ name: 'Extracts Helper Function',
335
+ description: 'Creates a separate function for tax calculation',
336
+ weight: 0.4,
337
+ validator: (result) => {
338
+ const content = result.messages.map(m => m.content).join('');
339
+ return /function\s+\w+.*tax|calculateTax|computeTax/.test(content);
340
+ },
341
+ },
342
+ {
343
+ id: 'removes-duplication',
344
+ name: 'Removes Code Duplication',
345
+ description: 'Tax calculation appears only once',
346
+ weight: 0.4,
347
+ validator: (result) => {
348
+ const content = result.messages.map(m => m.content).join('');
349
+ const matches = content.match(/\*\s*0\.1/g);
350
+ return !matches || matches.length <= 1;
351
+ },
352
+ },
353
+ {
354
+ id: 'maintains-functionality',
355
+ name: 'Maintains Functionality',
356
+ description: 'Refactored code produces same results',
357
+ weight: 0.2,
358
+ validator: () => true, // Would require actual execution
359
+ },
360
+ ],
361
+ timeout: 90000,
362
+ difficulty: 'medium',
363
+ tags: ['refactoring', 'dry', 'typescript'],
364
+ },
365
+ ],
366
+ };
367
+
368
+ this.datasets.set(codeGenDataset.id, codeGenDataset);
369
+ this.datasets.set(codeReviewDataset.id, codeReviewDataset);
370
+ this.datasets.set(refactoringDataset.id, refactoringDataset);
371
+
372
+ log.info(`Loaded ${this.datasets.size} default evaluation datasets`);
373
+ }
374
+
375
+ private async loadCustomDatasets(): Promise<void> {
376
+ try {
377
+ await fs.mkdir(this.datasetsPath, { recursive: true });
378
+ const files = await fs.readdir(this.datasetsPath);
379
+
380
+ for (const file of files) {
381
+ if (file.endsWith('.json')) {
382
+ try {
383
+ const content = await fs.readFile(
384
+ path.join(this.datasetsPath, file),
385
+ 'utf-8'
386
+ );
387
+ const dataset: EvalDataset = JSON.parse(content);
388
+ this.datasets.set(dataset.id, dataset);
389
+ log.info(`Loaded custom dataset: ${dataset.name}`);
390
+ } catch (error) {
391
+ log.error(`Failed to load dataset ${file}:`, error);
392
+ }
393
+ }
394
+ }
395
+ } catch (error) {
396
+ log.error('Failed to load custom datasets:', error);
397
+ }
398
+ }
399
+
400
+ async createRun(
401
+ datasetId: string,
402
+ agentConfig: EvalRun['agentConfig'],
403
+ options: {
404
+ testCaseIds?: string[];
405
+ parallel?: boolean;
406
+ maxParallel?: number;
407
+ } = {}
408
+ ): Promise<EvalRun> {
409
+ const dataset = this.datasets.get(datasetId);
410
+ if (!dataset) {
411
+ throw new Error(`Dataset ${datasetId} not found`);
412
+ }
413
+
414
+ const runId = uuidv4();
415
+ const testCases = options.testCaseIds
416
+ ? dataset.testCases.filter(tc => options.testCaseIds?.includes(tc.id))
417
+ : dataset.testCases;
418
+
419
+ const run: EvalRun = {
420
+ id: runId,
421
+ datasetId,
422
+ agentConfig,
423
+ results: [],
424
+ summary: {
425
+ totalTests: testCases.length,
426
+ passedTests: 0,
427
+ failedTests: 0,
428
+ averageScore: 0,
429
+ totalDuration: 0,
430
+ },
431
+ status: 'pending',
432
+ startedAt: new Date(),
433
+ };
434
+
435
+ this.activeRuns.set(runId, run);
436
+ this.emit('run:created', { runId, datasetId });
437
+
438
+ // Start evaluation
439
+ this.executeRun(run, testCases, options).catch(error => {
440
+ log.error(`Eval run ${runId} failed:`, error);
441
+ run.status = 'failed';
442
+ this.emit('run:failed', { runId, error });
443
+ });
444
+
445
+ return run;
446
+ }
447
+
448
+ private async executeRun(
449
+ run: EvalRun,
450
+ testCases: EvalTestCase[],
451
+ options: { parallel?: boolean; maxParallel?: number }
452
+ ): Promise<void> {
453
+ run.status = 'running';
454
+ this.emit('run:started', { runId: run.id });
455
+
456
+ const startTime = Date.now();
457
+
458
+ if (options.parallel) {
459
+ // Execute in parallel with limit
460
+ const limit = options.maxParallel || 3;
461
+ const chunks = this.chunkArray(testCases, limit);
462
+
463
+ for (const chunk of chunks) {
464
+ await Promise.all(
465
+ chunk.map(tc => this.executeTestCase(run, tc))
466
+ );
467
+ }
468
+ } else {
469
+ // Execute sequentially
470
+ for (const testCase of testCases) {
471
+ await this.executeTestCase(run, testCase);
472
+ }
473
+ }
474
+
475
+ // Calculate summary
476
+ run.summary.totalDuration = Date.now() - startTime;
477
+ run.summary.passedTests = run.results.filter(r => r.success).length;
478
+ run.summary.failedTests = run.results.filter(r => !r.success).length;
479
+ run.summary.averageScore = run.results.length > 0
480
+ ? run.results.reduce((sum, r) => sum + (r.score / r.maxScore), 0) / run.results.length * 100
481
+ : 0;
482
+
483
+ run.status = 'completed';
484
+ run.completedAt = new Date();
485
+
486
+ this.runHistory.push(run);
487
+ this.activeRuns.delete(run.id);
488
+
489
+ this.emit('run:completed', { runId: run.id, summary: run.summary });
490
+ log.info(`Eval run ${run.id} completed:`, run.summary);
491
+ }
492
+
493
+ private async executeTestCase(
494
+ run: EvalRun,
495
+ testCase: EvalTestCase
496
+ ): Promise<EvalResult> {
497
+ const startTime = Date.now();
498
+ const result: EvalResult = {
499
+ testCaseId: testCase.id,
500
+ agentId: '',
501
+ success: false,
502
+ score: 0,
503
+ maxScore: testCase.validationCriteria.reduce((sum, c) => sum + c.weight, 0),
504
+ duration: 0,
505
+ messages: [],
506
+ criteriaResults: [],
507
+ errors: [],
508
+ warnings: [],
509
+ metadata: {},
510
+ createdAt: new Date(),
511
+ };
512
+
513
+ try {
514
+ // Create temporary agent for evaluation
515
+ const agent = await this.agentOrchestrator.createAgent({
516
+ name: `Eval-${testCase.name}`,
517
+ projectPath: `/tmp/eval-${testCase.id}`,
518
+ providerId: run.agentConfig.providerId,
519
+ model: run.agentConfig.model,
520
+ skills: run.agentConfig.skills,
521
+ });
522
+
523
+ result.agentId = agent.id;
524
+
525
+ // Execute prompt
526
+ const timeoutPromise = new Promise((_, reject) => {
527
+ setTimeout(() => reject(new Error('Test timeout')), testCase.timeout);
528
+ });
529
+
530
+ const messagePromise = this.agentOrchestrator.sendMessage(
531
+ agent.id,
532
+ testCase.prompt
533
+ );
534
+
535
+ const response = await Promise.race([messagePromise, timeoutPromise]) as AgentMessage;
536
+ result.messages = [response];
537
+
538
+ // Evaluate criteria
539
+ for (const criteria of testCase.validationCriteria) {
540
+ try {
541
+ const passed = await criteria.validator(result);
542
+ const criteriaResult = {
543
+ criteriaId: criteria.id,
544
+ passed: !!passed,
545
+ score: passed ? criteria.weight : 0,
546
+ details: passed ? 'Passed' : 'Failed',
547
+ };
548
+ result.criteriaResults.push(criteriaResult);
549
+ result.score += criteriaResult.score;
550
+ } catch (error) {
551
+ result.criteriaResults.push({
552
+ criteriaId: criteria.id,
553
+ passed: false,
554
+ score: 0,
555
+ details: `Validation error: ${error}`,
556
+ });
557
+ }
558
+ }
559
+
560
+ // Check success
561
+ result.success = result.score >= result.maxScore * 0.6; // 60% to pass
562
+ result.duration = Date.now() - startTime;
563
+
564
+ // Cleanup
565
+ await this.agentOrchestrator.deleteAgent(agent.id);
566
+
567
+ } catch (error) {
568
+ result.errors.push(`Test execution failed: ${error}`);
569
+ result.duration = Date.now() - startTime;
570
+ }
571
+
572
+ run.results.push(result);
573
+ this.emit('test:completed', { runId: run.id, testCaseId: testCase.id, result });
574
+
575
+ return result;
576
+ }
577
+
578
+ private chunkArray<T>(array: T[], size: number): T[][] {
579
+ const chunks: T[][] = [];
580
+ for (let i = 0; i < array.length; i += size) {
581
+ chunks.push(array.slice(i, i + size));
582
+ }
583
+ return chunks;
584
+ }
585
+
586
+ getDatasets(): EvalDataset[] {
587
+ return Array.from(this.datasets.values());
588
+ }
589
+
590
+ getDataset(datasetId: string): EvalDataset | undefined {
591
+ return this.datasets.get(datasetId);
592
+ }
593
+
594
+ getActiveRuns(): EvalRun[] {
595
+ return Array.from(this.activeRuns.values());
596
+ }
597
+
598
+ getRunHistory(): EvalRun[] {
599
+ return this.runHistory;
600
+ }
601
+
602
+ async saveDataset(dataset: EvalDataset): Promise<void> {
603
+ this.datasets.set(dataset.id, dataset);
604
+
605
+ // Save to disk
606
+ const filePath = path.join(this.datasetsPath, `${dataset.id}.json`);
607
+ await fs.mkdir(this.datasetsPath, { recursive: true });
608
+ await fs.writeFile(filePath, JSON.stringify(dataset, null, 2));
609
+
610
+ log.info(`Saved dataset: ${dataset.name}`);
611
+ }
612
+
613
+ generateReport(runId: string): string {
614
+ const run = this.activeRuns.get(runId) || this.runHistory.find(r => r.id === runId);
615
+ if (!run) {
616
+ throw new Error(`Run ${runId} not found`);
617
+ }
618
+
619
+ const dataset = this.datasets.get(run.datasetId);
620
+
621
+ let report = `# Evaluation Report\n\n`;
622
+ report += `**Dataset:** ${dataset?.name || run.datasetId}\n`;
623
+ report += `**Agent:** ${run.agentConfig.providerId} - ${run.agentConfig.model}\n`;
624
+ report += `**Date:** ${run.startedAt.toISOString()}\n\n`;
625
+
626
+ report += `## Summary\n\n`;
627
+ report += `- **Total Tests:** ${run.summary.totalTests}\n`;
628
+ report += `- **Passed:** ${run.summary.passedTests} (${((run.summary.passedTests / run.summary.totalTests) * 100).toFixed(1)}%)\n`;
629
+ report += `- **Failed:** ${run.summary.failedTests}\n`;
630
+ report += `- **Average Score:** ${run.summary.averageScore.toFixed(1)}%\n`;
631
+ report += `- **Total Duration:** ${(run.summary.totalDuration / 1000).toFixed(2)}s\n\n`;
632
+
633
+ report += `## Detailed Results\n\n`;
634
+
635
+ for (const result of run.results) {
636
+ const testCase = dataset?.testCases.find(tc => tc.id === result.testCaseId);
637
+ report += `### ${testCase?.name || result.testCaseId}\n\n`;
638
+ report += `- **Status:** ${result.success ? '✅ PASSED' : '❌ FAILED'}\n`;
639
+ report += `- **Score:** ${((result.score / result.maxScore) * 100).toFixed(1)}%\n`;
640
+ report += `- **Duration:** ${(result.duration / 1000).toFixed(2)}s\n\n`;
641
+
642
+ if (result.errors.length > 0) {
643
+ report += `**Errors:**\n`;
644
+ result.errors.forEach(e => report += `- ${e}\n`);
645
+ report += '\n';
646
+ }
647
+
648
+ report += `**Criteria Results:**\n`;
649
+ for (const criteriaResult of result.criteriaResults) {
650
+ const criteria = testCase?.validationCriteria.find(c => c.id === criteriaResult.criteriaId);
651
+ report += `- ${criteriaResult.passed ? '✅' : '❌'} ${criteria?.name || criteriaResult.criteriaId} (${criteriaResult.score.toFixed(2)})\n`;
652
+ }
653
+ report += '\n---\n\n';
654
+ }
655
+
656
+ return report;
657
+ }
658
+
659
+ cleanup(): void {
660
+ this.activeRuns.clear();
661
+ this.removeAllListeners();
662
+ }
663
+ }
664
+
665
+ export default AgentEvalFramework;