PraisonAI 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. praisonai/__init__.py +54 -0
  2. praisonai/__main__.py +15 -0
  3. praisonai/acp/__init__.py +54 -0
  4. praisonai/acp/config.py +159 -0
  5. praisonai/acp/server.py +587 -0
  6. praisonai/acp/session.py +219 -0
  7. praisonai/adapters/__init__.py +50 -0
  8. praisonai/adapters/readers.py +395 -0
  9. praisonai/adapters/rerankers.py +315 -0
  10. praisonai/adapters/retrievers.py +394 -0
  11. praisonai/adapters/vector_stores.py +409 -0
  12. praisonai/agent_scheduler.py +337 -0
  13. praisonai/agents_generator.py +903 -0
  14. praisonai/api/call.py +292 -0
  15. praisonai/auto.py +1197 -0
  16. praisonai/capabilities/__init__.py +275 -0
  17. praisonai/capabilities/a2a.py +140 -0
  18. praisonai/capabilities/assistants.py +283 -0
  19. praisonai/capabilities/audio.py +320 -0
  20. praisonai/capabilities/batches.py +469 -0
  21. praisonai/capabilities/completions.py +336 -0
  22. praisonai/capabilities/container_files.py +155 -0
  23. praisonai/capabilities/containers.py +93 -0
  24. praisonai/capabilities/embeddings.py +158 -0
  25. praisonai/capabilities/files.py +467 -0
  26. praisonai/capabilities/fine_tuning.py +293 -0
  27. praisonai/capabilities/guardrails.py +182 -0
  28. praisonai/capabilities/images.py +330 -0
  29. praisonai/capabilities/mcp.py +190 -0
  30. praisonai/capabilities/messages.py +270 -0
  31. praisonai/capabilities/moderations.py +154 -0
  32. praisonai/capabilities/ocr.py +217 -0
  33. praisonai/capabilities/passthrough.py +204 -0
  34. praisonai/capabilities/rag.py +207 -0
  35. praisonai/capabilities/realtime.py +160 -0
  36. praisonai/capabilities/rerank.py +165 -0
  37. praisonai/capabilities/responses.py +266 -0
  38. praisonai/capabilities/search.py +109 -0
  39. praisonai/capabilities/skills.py +133 -0
  40. praisonai/capabilities/vector_store_files.py +334 -0
  41. praisonai/capabilities/vector_stores.py +304 -0
  42. praisonai/capabilities/videos.py +141 -0
  43. praisonai/chainlit_ui.py +304 -0
  44. praisonai/chat/__init__.py +106 -0
  45. praisonai/chat/app.py +125 -0
  46. praisonai/cli/__init__.py +26 -0
  47. praisonai/cli/app.py +213 -0
  48. praisonai/cli/commands/__init__.py +75 -0
  49. praisonai/cli/commands/acp.py +70 -0
  50. praisonai/cli/commands/completion.py +333 -0
  51. praisonai/cli/commands/config.py +166 -0
  52. praisonai/cli/commands/debug.py +142 -0
  53. praisonai/cli/commands/diag.py +55 -0
  54. praisonai/cli/commands/doctor.py +166 -0
  55. praisonai/cli/commands/environment.py +179 -0
  56. praisonai/cli/commands/lsp.py +112 -0
  57. praisonai/cli/commands/mcp.py +210 -0
  58. praisonai/cli/commands/profile.py +457 -0
  59. praisonai/cli/commands/run.py +228 -0
  60. praisonai/cli/commands/schedule.py +150 -0
  61. praisonai/cli/commands/serve.py +97 -0
  62. praisonai/cli/commands/session.py +212 -0
  63. praisonai/cli/commands/traces.py +145 -0
  64. praisonai/cli/commands/version.py +101 -0
  65. praisonai/cli/configuration/__init__.py +18 -0
  66. praisonai/cli/configuration/loader.py +353 -0
  67. praisonai/cli/configuration/paths.py +114 -0
  68. praisonai/cli/configuration/schema.py +164 -0
  69. praisonai/cli/features/__init__.py +268 -0
  70. praisonai/cli/features/acp.py +236 -0
  71. praisonai/cli/features/action_orchestrator.py +546 -0
  72. praisonai/cli/features/agent_scheduler.py +773 -0
  73. praisonai/cli/features/agent_tools.py +474 -0
  74. praisonai/cli/features/agents.py +375 -0
  75. praisonai/cli/features/at_mentions.py +471 -0
  76. praisonai/cli/features/auto_memory.py +182 -0
  77. praisonai/cli/features/autonomy_mode.py +490 -0
  78. praisonai/cli/features/background.py +356 -0
  79. praisonai/cli/features/base.py +168 -0
  80. praisonai/cli/features/capabilities.py +1326 -0
  81. praisonai/cli/features/checkpoints.py +338 -0
  82. praisonai/cli/features/code_intelligence.py +652 -0
  83. praisonai/cli/features/compaction.py +294 -0
  84. praisonai/cli/features/compare.py +534 -0
  85. praisonai/cli/features/cost_tracker.py +514 -0
  86. praisonai/cli/features/debug.py +810 -0
  87. praisonai/cli/features/deploy.py +517 -0
  88. praisonai/cli/features/diag.py +289 -0
  89. praisonai/cli/features/doctor/__init__.py +63 -0
  90. praisonai/cli/features/doctor/checks/__init__.py +24 -0
  91. praisonai/cli/features/doctor/checks/acp_checks.py +240 -0
  92. praisonai/cli/features/doctor/checks/config_checks.py +366 -0
  93. praisonai/cli/features/doctor/checks/db_checks.py +366 -0
  94. praisonai/cli/features/doctor/checks/env_checks.py +543 -0
  95. praisonai/cli/features/doctor/checks/lsp_checks.py +199 -0
  96. praisonai/cli/features/doctor/checks/mcp_checks.py +349 -0
  97. praisonai/cli/features/doctor/checks/memory_checks.py +268 -0
  98. praisonai/cli/features/doctor/checks/network_checks.py +251 -0
  99. praisonai/cli/features/doctor/checks/obs_checks.py +328 -0
  100. praisonai/cli/features/doctor/checks/performance_checks.py +235 -0
  101. praisonai/cli/features/doctor/checks/permissions_checks.py +259 -0
  102. praisonai/cli/features/doctor/checks/selftest_checks.py +322 -0
  103. praisonai/cli/features/doctor/checks/serve_checks.py +426 -0
  104. praisonai/cli/features/doctor/checks/skills_checks.py +231 -0
  105. praisonai/cli/features/doctor/checks/tools_checks.py +371 -0
  106. praisonai/cli/features/doctor/engine.py +266 -0
  107. praisonai/cli/features/doctor/formatters.py +310 -0
  108. praisonai/cli/features/doctor/handler.py +397 -0
  109. praisonai/cli/features/doctor/models.py +264 -0
  110. praisonai/cli/features/doctor/registry.py +239 -0
  111. praisonai/cli/features/endpoints.py +1019 -0
  112. praisonai/cli/features/eval.py +560 -0
  113. praisonai/cli/features/external_agents.py +231 -0
  114. praisonai/cli/features/fast_context.py +410 -0
  115. praisonai/cli/features/flow_display.py +566 -0
  116. praisonai/cli/features/git_integration.py +651 -0
  117. praisonai/cli/features/guardrail.py +171 -0
  118. praisonai/cli/features/handoff.py +185 -0
  119. praisonai/cli/features/hooks.py +583 -0
  120. praisonai/cli/features/image.py +384 -0
  121. praisonai/cli/features/interactive_runtime.py +585 -0
  122. praisonai/cli/features/interactive_tools.py +380 -0
  123. praisonai/cli/features/interactive_tui.py +603 -0
  124. praisonai/cli/features/jobs.py +632 -0
  125. praisonai/cli/features/knowledge.py +531 -0
  126. praisonai/cli/features/lite.py +244 -0
  127. praisonai/cli/features/lsp_cli.py +225 -0
  128. praisonai/cli/features/mcp.py +169 -0
  129. praisonai/cli/features/message_queue.py +587 -0
  130. praisonai/cli/features/metrics.py +211 -0
  131. praisonai/cli/features/n8n.py +673 -0
  132. praisonai/cli/features/observability.py +293 -0
  133. praisonai/cli/features/ollama.py +361 -0
  134. praisonai/cli/features/output_style.py +273 -0
  135. praisonai/cli/features/package.py +631 -0
  136. praisonai/cli/features/performance.py +308 -0
  137. praisonai/cli/features/persistence.py +636 -0
  138. praisonai/cli/features/profile.py +226 -0
  139. praisonai/cli/features/profiler/__init__.py +81 -0
  140. praisonai/cli/features/profiler/core.py +558 -0
  141. praisonai/cli/features/profiler/optimizations.py +652 -0
  142. praisonai/cli/features/profiler/suite.py +386 -0
  143. praisonai/cli/features/profiling.py +350 -0
  144. praisonai/cli/features/queue/__init__.py +73 -0
  145. praisonai/cli/features/queue/manager.py +395 -0
  146. praisonai/cli/features/queue/models.py +286 -0
  147. praisonai/cli/features/queue/persistence.py +564 -0
  148. praisonai/cli/features/queue/scheduler.py +484 -0
  149. praisonai/cli/features/queue/worker.py +372 -0
  150. praisonai/cli/features/recipe.py +1723 -0
  151. praisonai/cli/features/recipes.py +449 -0
  152. praisonai/cli/features/registry.py +229 -0
  153. praisonai/cli/features/repo_map.py +860 -0
  154. praisonai/cli/features/router.py +466 -0
  155. praisonai/cli/features/sandbox_executor.py +515 -0
  156. praisonai/cli/features/serve.py +829 -0
  157. praisonai/cli/features/session.py +222 -0
  158. praisonai/cli/features/skills.py +856 -0
  159. praisonai/cli/features/slash_commands.py +650 -0
  160. praisonai/cli/features/telemetry.py +179 -0
  161. praisonai/cli/features/templates.py +1384 -0
  162. praisonai/cli/features/thinking.py +305 -0
  163. praisonai/cli/features/todo.py +334 -0
  164. praisonai/cli/features/tools.py +680 -0
  165. praisonai/cli/features/tui/__init__.py +83 -0
  166. praisonai/cli/features/tui/app.py +580 -0
  167. praisonai/cli/features/tui/cli.py +566 -0
  168. praisonai/cli/features/tui/debug.py +511 -0
  169. praisonai/cli/features/tui/events.py +99 -0
  170. praisonai/cli/features/tui/mock_provider.py +328 -0
  171. praisonai/cli/features/tui/orchestrator.py +652 -0
  172. praisonai/cli/features/tui/screens/__init__.py +50 -0
  173. praisonai/cli/features/tui/screens/main.py +245 -0
  174. praisonai/cli/features/tui/screens/queue.py +174 -0
  175. praisonai/cli/features/tui/screens/session.py +124 -0
  176. praisonai/cli/features/tui/screens/settings.py +148 -0
  177. praisonai/cli/features/tui/widgets/__init__.py +56 -0
  178. praisonai/cli/features/tui/widgets/chat.py +261 -0
  179. praisonai/cli/features/tui/widgets/composer.py +224 -0
  180. praisonai/cli/features/tui/widgets/queue_panel.py +200 -0
  181. praisonai/cli/features/tui/widgets/status.py +167 -0
  182. praisonai/cli/features/tui/widgets/tool_panel.py +248 -0
  183. praisonai/cli/features/workflow.py +720 -0
  184. praisonai/cli/legacy.py +236 -0
  185. praisonai/cli/main.py +5559 -0
  186. praisonai/cli/schedule_cli.py +54 -0
  187. praisonai/cli/state/__init__.py +31 -0
  188. praisonai/cli/state/identifiers.py +161 -0
  189. praisonai/cli/state/sessions.py +313 -0
  190. praisonai/code/__init__.py +93 -0
  191. praisonai/code/agent_tools.py +344 -0
  192. praisonai/code/diff/__init__.py +21 -0
  193. praisonai/code/diff/diff_strategy.py +432 -0
  194. praisonai/code/tools/__init__.py +27 -0
  195. praisonai/code/tools/apply_diff.py +221 -0
  196. praisonai/code/tools/execute_command.py +275 -0
  197. praisonai/code/tools/list_files.py +274 -0
  198. praisonai/code/tools/read_file.py +206 -0
  199. praisonai/code/tools/search_replace.py +248 -0
  200. praisonai/code/tools/write_file.py +217 -0
  201. praisonai/code/utils/__init__.py +46 -0
  202. praisonai/code/utils/file_utils.py +307 -0
  203. praisonai/code/utils/ignore_utils.py +308 -0
  204. praisonai/code/utils/text_utils.py +276 -0
  205. praisonai/db/__init__.py +64 -0
  206. praisonai/db/adapter.py +531 -0
  207. praisonai/deploy/__init__.py +62 -0
  208. praisonai/deploy/api.py +231 -0
  209. praisonai/deploy/docker.py +454 -0
  210. praisonai/deploy/doctor.py +367 -0
  211. praisonai/deploy/main.py +327 -0
  212. praisonai/deploy/models.py +179 -0
  213. praisonai/deploy/providers/__init__.py +33 -0
  214. praisonai/deploy/providers/aws.py +331 -0
  215. praisonai/deploy/providers/azure.py +358 -0
  216. praisonai/deploy/providers/base.py +101 -0
  217. praisonai/deploy/providers/gcp.py +314 -0
  218. praisonai/deploy/schema.py +208 -0
  219. praisonai/deploy.py +185 -0
  220. praisonai/endpoints/__init__.py +53 -0
  221. praisonai/endpoints/a2u_server.py +410 -0
  222. praisonai/endpoints/discovery.py +165 -0
  223. praisonai/endpoints/providers/__init__.py +28 -0
  224. praisonai/endpoints/providers/a2a.py +253 -0
  225. praisonai/endpoints/providers/a2u.py +208 -0
  226. praisonai/endpoints/providers/agents_api.py +171 -0
  227. praisonai/endpoints/providers/base.py +231 -0
  228. praisonai/endpoints/providers/mcp.py +263 -0
  229. praisonai/endpoints/providers/recipe.py +206 -0
  230. praisonai/endpoints/providers/tools_mcp.py +150 -0
  231. praisonai/endpoints/registry.py +131 -0
  232. praisonai/endpoints/server.py +161 -0
  233. praisonai/inbuilt_tools/__init__.py +24 -0
  234. praisonai/inbuilt_tools/autogen_tools.py +117 -0
  235. praisonai/inc/__init__.py +2 -0
  236. praisonai/inc/config.py +96 -0
  237. praisonai/inc/models.py +155 -0
  238. praisonai/integrations/__init__.py +56 -0
  239. praisonai/integrations/base.py +303 -0
  240. praisonai/integrations/claude_code.py +270 -0
  241. praisonai/integrations/codex_cli.py +255 -0
  242. praisonai/integrations/cursor_cli.py +195 -0
  243. praisonai/integrations/gemini_cli.py +222 -0
  244. praisonai/jobs/__init__.py +67 -0
  245. praisonai/jobs/executor.py +425 -0
  246. praisonai/jobs/models.py +230 -0
  247. praisonai/jobs/router.py +314 -0
  248. praisonai/jobs/server.py +186 -0
  249. praisonai/jobs/store.py +203 -0
  250. praisonai/llm/__init__.py +66 -0
  251. praisonai/llm/registry.py +382 -0
  252. praisonai/mcp_server/__init__.py +152 -0
  253. praisonai/mcp_server/adapters/__init__.py +74 -0
  254. praisonai/mcp_server/adapters/agents.py +128 -0
  255. praisonai/mcp_server/adapters/capabilities.py +168 -0
  256. praisonai/mcp_server/adapters/cli_tools.py +568 -0
  257. praisonai/mcp_server/adapters/extended_capabilities.py +462 -0
  258. praisonai/mcp_server/adapters/knowledge.py +93 -0
  259. praisonai/mcp_server/adapters/memory.py +104 -0
  260. praisonai/mcp_server/adapters/prompts.py +306 -0
  261. praisonai/mcp_server/adapters/resources.py +124 -0
  262. praisonai/mcp_server/adapters/tools_bridge.py +280 -0
  263. praisonai/mcp_server/auth/__init__.py +48 -0
  264. praisonai/mcp_server/auth/api_key.py +291 -0
  265. praisonai/mcp_server/auth/oauth.py +460 -0
  266. praisonai/mcp_server/auth/oidc.py +289 -0
  267. praisonai/mcp_server/auth/scopes.py +260 -0
  268. praisonai/mcp_server/cli.py +852 -0
  269. praisonai/mcp_server/elicitation.py +445 -0
  270. praisonai/mcp_server/icons.py +302 -0
  271. praisonai/mcp_server/recipe_adapter.py +573 -0
  272. praisonai/mcp_server/recipe_cli.py +824 -0
  273. praisonai/mcp_server/registry.py +703 -0
  274. praisonai/mcp_server/sampling.py +422 -0
  275. praisonai/mcp_server/server.py +490 -0
  276. praisonai/mcp_server/tasks.py +443 -0
  277. praisonai/mcp_server/transports/__init__.py +18 -0
  278. praisonai/mcp_server/transports/http_stream.py +376 -0
  279. praisonai/mcp_server/transports/stdio.py +132 -0
  280. praisonai/persistence/__init__.py +84 -0
  281. praisonai/persistence/config.py +238 -0
  282. praisonai/persistence/conversation/__init__.py +25 -0
  283. praisonai/persistence/conversation/async_mysql.py +427 -0
  284. praisonai/persistence/conversation/async_postgres.py +410 -0
  285. praisonai/persistence/conversation/async_sqlite.py +371 -0
  286. praisonai/persistence/conversation/base.py +151 -0
  287. praisonai/persistence/conversation/json_store.py +250 -0
  288. praisonai/persistence/conversation/mysql.py +387 -0
  289. praisonai/persistence/conversation/postgres.py +401 -0
  290. praisonai/persistence/conversation/singlestore.py +240 -0
  291. praisonai/persistence/conversation/sqlite.py +341 -0
  292. praisonai/persistence/conversation/supabase.py +203 -0
  293. praisonai/persistence/conversation/surrealdb.py +287 -0
  294. praisonai/persistence/factory.py +301 -0
  295. praisonai/persistence/hooks/__init__.py +18 -0
  296. praisonai/persistence/hooks/agent_hooks.py +297 -0
  297. praisonai/persistence/knowledge/__init__.py +26 -0
  298. praisonai/persistence/knowledge/base.py +144 -0
  299. praisonai/persistence/knowledge/cassandra.py +232 -0
  300. praisonai/persistence/knowledge/chroma.py +295 -0
  301. praisonai/persistence/knowledge/clickhouse.py +242 -0
  302. praisonai/persistence/knowledge/cosmosdb_vector.py +438 -0
  303. praisonai/persistence/knowledge/couchbase.py +286 -0
  304. praisonai/persistence/knowledge/lancedb.py +216 -0
  305. praisonai/persistence/knowledge/langchain_adapter.py +291 -0
  306. praisonai/persistence/knowledge/lightrag_adapter.py +212 -0
  307. praisonai/persistence/knowledge/llamaindex_adapter.py +256 -0
  308. praisonai/persistence/knowledge/milvus.py +277 -0
  309. praisonai/persistence/knowledge/mongodb_vector.py +306 -0
  310. praisonai/persistence/knowledge/pgvector.py +335 -0
  311. praisonai/persistence/knowledge/pinecone.py +253 -0
  312. praisonai/persistence/knowledge/qdrant.py +301 -0
  313. praisonai/persistence/knowledge/redis_vector.py +291 -0
  314. praisonai/persistence/knowledge/singlestore_vector.py +299 -0
  315. praisonai/persistence/knowledge/surrealdb_vector.py +309 -0
  316. praisonai/persistence/knowledge/upstash_vector.py +266 -0
  317. praisonai/persistence/knowledge/weaviate.py +223 -0
  318. praisonai/persistence/migrations/__init__.py +10 -0
  319. praisonai/persistence/migrations/manager.py +251 -0
  320. praisonai/persistence/orchestrator.py +406 -0
  321. praisonai/persistence/state/__init__.py +21 -0
  322. praisonai/persistence/state/async_mongodb.py +200 -0
  323. praisonai/persistence/state/base.py +107 -0
  324. praisonai/persistence/state/dynamodb.py +226 -0
  325. praisonai/persistence/state/firestore.py +175 -0
  326. praisonai/persistence/state/gcs.py +155 -0
  327. praisonai/persistence/state/memory.py +245 -0
  328. praisonai/persistence/state/mongodb.py +158 -0
  329. praisonai/persistence/state/redis.py +190 -0
  330. praisonai/persistence/state/upstash.py +144 -0
  331. praisonai/persistence/tests/__init__.py +3 -0
  332. praisonai/persistence/tests/test_all_backends.py +633 -0
  333. praisonai/profiler.py +1214 -0
  334. praisonai/recipe/__init__.py +134 -0
  335. praisonai/recipe/bridge.py +278 -0
  336. praisonai/recipe/core.py +893 -0
  337. praisonai/recipe/exceptions.py +54 -0
  338. praisonai/recipe/history.py +402 -0
  339. praisonai/recipe/models.py +266 -0
  340. praisonai/recipe/operations.py +440 -0
  341. praisonai/recipe/policy.py +422 -0
  342. praisonai/recipe/registry.py +849 -0
  343. praisonai/recipe/runtime.py +214 -0
  344. praisonai/recipe/security.py +711 -0
  345. praisonai/recipe/serve.py +859 -0
  346. praisonai/recipe/server.py +613 -0
  347. praisonai/scheduler/__init__.py +45 -0
  348. praisonai/scheduler/agent_scheduler.py +552 -0
  349. praisonai/scheduler/base.py +124 -0
  350. praisonai/scheduler/daemon_manager.py +225 -0
  351. praisonai/scheduler/state_manager.py +155 -0
  352. praisonai/scheduler/yaml_loader.py +193 -0
  353. praisonai/scheduler.py +194 -0
  354. praisonai/setup/__init__.py +1 -0
  355. praisonai/setup/build.py +21 -0
  356. praisonai/setup/post_install.py +23 -0
  357. praisonai/setup/setup_conda_env.py +25 -0
  358. praisonai/setup.py +16 -0
  359. praisonai/templates/__init__.py +116 -0
  360. praisonai/templates/cache.py +364 -0
  361. praisonai/templates/dependency_checker.py +358 -0
  362. praisonai/templates/discovery.py +391 -0
  363. praisonai/templates/loader.py +564 -0
  364. praisonai/templates/registry.py +511 -0
  365. praisonai/templates/resolver.py +206 -0
  366. praisonai/templates/security.py +327 -0
  367. praisonai/templates/tool_override.py +498 -0
  368. praisonai/templates/tools_doctor.py +256 -0
  369. praisonai/test.py +105 -0
  370. praisonai/train.py +562 -0
  371. praisonai/train_vision.py +306 -0
  372. praisonai/ui/agents.py +824 -0
  373. praisonai/ui/callbacks.py +57 -0
  374. praisonai/ui/chainlit_compat.py +246 -0
  375. praisonai/ui/chat.py +532 -0
  376. praisonai/ui/code.py +717 -0
  377. praisonai/ui/colab.py +474 -0
  378. praisonai/ui/colab_chainlit.py +81 -0
  379. praisonai/ui/components/aicoder.py +284 -0
  380. praisonai/ui/context.py +283 -0
  381. praisonai/ui/database_config.py +56 -0
  382. praisonai/ui/db.py +294 -0
  383. praisonai/ui/realtime.py +488 -0
  384. praisonai/ui/realtimeclient/__init__.py +756 -0
  385. praisonai/ui/realtimeclient/tools.py +242 -0
  386. praisonai/ui/sql_alchemy.py +710 -0
  387. praisonai/upload_vision.py +140 -0
  388. praisonai/version.py +1 -0
  389. praisonai-3.0.0.dist-info/METADATA +3493 -0
  390. praisonai-3.0.0.dist-info/RECORD +393 -0
  391. praisonai-3.0.0.dist-info/WHEEL +5 -0
  392. praisonai-3.0.0.dist-info/entry_points.txt +4 -0
  393. praisonai-3.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,560 @@
1
+ """
2
+ Evaluation CLI feature for PraisonAI.
3
+
4
+ Provides CLI commands for running agent evaluations.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import logging
10
+ from typing import Optional, List, Dict, Any
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class EvalHandler:
16
+ """Handler for evaluation CLI commands."""
17
+
18
+ def __init__(self, verbose: bool = False):
19
+ """
20
+ Initialize the evaluation handler.
21
+
22
+ Args:
23
+ verbose: Enable verbose output
24
+ """
25
+ self.verbose = verbose
26
+
27
+ def run_accuracy(
28
+ self,
29
+ agent_file: Optional[str] = None,
30
+ input_text: str = "",
31
+ expected_output: str = "",
32
+ iterations: int = 1,
33
+ model: Optional[str] = None,
34
+ output_file: Optional[str] = None,
35
+ prompt: Optional[str] = None,
36
+ llm: Optional[str] = None
37
+ ) -> Dict[str, Any]:
38
+ """
39
+ Run accuracy evaluation on an agent.
40
+
41
+ Args:
42
+ agent_file: Path to agents.yaml file (optional if prompt is provided)
43
+ input_text: Input to provide to the agent
44
+ expected_output: Expected output to compare against
45
+ iterations: Number of evaluation iterations
46
+ model: LLM model for judging
47
+ output_file: Path to save results
48
+ prompt: Direct prompt (alternative to agent_file)
49
+ llm: LLM model for the agent (when using prompt)
50
+
51
+ Returns:
52
+ Evaluation result dictionary
53
+ """
54
+ try:
55
+ from praisonaiagents.eval import AccuracyEvaluator
56
+ from praisonaiagents import Agent
57
+ except ImportError as e:
58
+ logger.error(f"Failed to import evaluation modules: {e}")
59
+ return {"error": str(e)}
60
+
61
+ try:
62
+ # Create agent either from file or from prompt
63
+ if prompt:
64
+ # Direct prompt mode - create agent on the fly
65
+ agent = Agent(
66
+ name="EvalAgent",
67
+ role="Assistant",
68
+ goal="Complete the given task",
69
+ backstory="You are a helpful assistant.",
70
+ llm=llm or model or "gpt-4o-mini",
71
+ verbose=False
72
+ )
73
+ # Use prompt as input if input_text not provided
74
+ if not input_text:
75
+ input_text = prompt
76
+ elif agent_file:
77
+ # Load from agents.yaml
78
+ try:
79
+ from praisonai.agents_generator import AgentsGenerator
80
+ generator = AgentsGenerator(agent_file)
81
+ agents = generator.generate_agents()
82
+
83
+ if not agents:
84
+ return {"error": "No agents found in configuration"}
85
+
86
+ agent = agents[0] if isinstance(agents, list) else agents
87
+ except Exception as e:
88
+ return {"error": f"Failed to load agents from {agent_file}: {e}"}
89
+ else:
90
+ return {"error": "Either --agent or --prompt must be provided"}
91
+
92
+ evaluator = AccuracyEvaluator(
93
+ agent=agent,
94
+ input_text=input_text,
95
+ expected_output=expected_output,
96
+ num_iterations=iterations,
97
+ model=model,
98
+ save_results_path=output_file,
99
+ verbose=self.verbose
100
+ )
101
+
102
+ result = evaluator.run(print_summary=True)
103
+ return result.to_dict()
104
+
105
+ except Exception as e:
106
+ logger.error(f"Accuracy evaluation failed: {e}")
107
+ return {"error": str(e)}
108
+
109
+ def run_performance(
110
+ self,
111
+ agent_file: str,
112
+ input_text: str = "Hello",
113
+ iterations: int = 10,
114
+ warmup: int = 2,
115
+ track_memory: bool = True,
116
+ output_file: Optional[str] = None
117
+ ) -> Dict[str, Any]:
118
+ """
119
+ Run performance evaluation on an agent.
120
+
121
+ Args:
122
+ agent_file: Path to agents.yaml file
123
+ input_text: Input to provide to the agent
124
+ iterations: Number of benchmark iterations
125
+ warmup: Number of warmup runs
126
+ track_memory: Whether to track memory usage
127
+ output_file: Path to save results
128
+
129
+ Returns:
130
+ Evaluation result dictionary
131
+ """
132
+ try:
133
+ from praisonaiagents.eval import PerformanceEvaluator
134
+ from praisonai.agents_generator import AgentsGenerator
135
+ except ImportError as e:
136
+ logger.error(f"Failed to import evaluation modules: {e}")
137
+ return {"error": str(e)}
138
+
139
+ try:
140
+ generator = AgentsGenerator(agent_file)
141
+ agents = generator.generate_agents()
142
+
143
+ if not agents:
144
+ return {"error": "No agents found in configuration"}
145
+
146
+ agent = agents[0] if isinstance(agents, list) else agents
147
+
148
+ evaluator = PerformanceEvaluator(
149
+ agent=agent,
150
+ input_text=input_text,
151
+ num_iterations=iterations,
152
+ warmup_runs=warmup,
153
+ track_memory=track_memory,
154
+ save_results_path=output_file,
155
+ verbose=self.verbose
156
+ )
157
+
158
+ result = evaluator.run(print_summary=True)
159
+ return result.to_dict()
160
+
161
+ except Exception as e:
162
+ logger.error(f"Performance evaluation failed: {e}")
163
+ return {"error": str(e)}
164
+
165
+ def run_reliability(
166
+ self,
167
+ agent_file: str,
168
+ input_text: str,
169
+ expected_tools: List[str],
170
+ forbidden_tools: Optional[List[str]] = None,
171
+ output_file: Optional[str] = None
172
+ ) -> Dict[str, Any]:
173
+ """
174
+ Run reliability evaluation on an agent.
175
+
176
+ Args:
177
+ agent_file: Path to agents.yaml file
178
+ input_text: Input to provide to the agent
179
+ expected_tools: List of tools that should be called
180
+ forbidden_tools: List of tools that should NOT be called
181
+ output_file: Path to save results
182
+
183
+ Returns:
184
+ Evaluation result dictionary
185
+ """
186
+ try:
187
+ from praisonaiagents.eval import ReliabilityEvaluator
188
+ from praisonai.agents_generator import AgentsGenerator
189
+ except ImportError as e:
190
+ logger.error(f"Failed to import evaluation modules: {e}")
191
+ return {"error": str(e)}
192
+
193
+ try:
194
+ generator = AgentsGenerator(agent_file)
195
+ agents = generator.generate_agents()
196
+
197
+ if not agents:
198
+ return {"error": "No agents found in configuration"}
199
+
200
+ agent = agents[0] if isinstance(agents, list) else agents
201
+
202
+ evaluator = ReliabilityEvaluator(
203
+ agent=agent,
204
+ input_text=input_text,
205
+ expected_tools=expected_tools,
206
+ forbidden_tools=forbidden_tools,
207
+ save_results_path=output_file,
208
+ verbose=self.verbose
209
+ )
210
+
211
+ result = evaluator.run(print_summary=True)
212
+ return result.to_dict()
213
+
214
+ except Exception as e:
215
+ logger.error(f"Reliability evaluation failed: {e}")
216
+ return {"error": str(e)}
217
+
218
+ def run_criteria(
219
+ self,
220
+ agent_file: str,
221
+ input_text: str,
222
+ criteria: str,
223
+ scoring_type: str = "numeric",
224
+ threshold: float = 7.0,
225
+ iterations: int = 1,
226
+ model: Optional[str] = None,
227
+ output_file: Optional[str] = None
228
+ ) -> Dict[str, Any]:
229
+ """
230
+ Run criteria-based evaluation on an agent.
231
+
232
+ Args:
233
+ agent_file: Path to agents.yaml file
234
+ input_text: Input to provide to the agent
235
+ criteria: Criteria to evaluate against
236
+ scoring_type: "numeric" or "binary"
237
+ threshold: Score threshold for passing (numeric mode)
238
+ iterations: Number of evaluation iterations
239
+ model: LLM model for judging
240
+ output_file: Path to save results
241
+
242
+ Returns:
243
+ Evaluation result dictionary
244
+ """
245
+ try:
246
+ from praisonaiagents.eval import CriteriaEvaluator
247
+ from praisonai.agents_generator import AgentsGenerator
248
+ except ImportError as e:
249
+ logger.error(f"Failed to import evaluation modules: {e}")
250
+ return {"error": str(e)}
251
+
252
+ try:
253
+ generator = AgentsGenerator(agent_file)
254
+ agents = generator.generate_agents()
255
+
256
+ if not agents:
257
+ return {"error": "No agents found in configuration"}
258
+
259
+ agent = agents[0] if isinstance(agents, list) else agents
260
+
261
+ evaluator = CriteriaEvaluator(
262
+ criteria=criteria,
263
+ agent=agent,
264
+ input_text=input_text,
265
+ scoring_type=scoring_type,
266
+ threshold=threshold,
267
+ num_iterations=iterations,
268
+ model=model,
269
+ save_results_path=output_file,
270
+ verbose=self.verbose
271
+ )
272
+
273
+ result = evaluator.run(print_summary=True)
274
+ return result.to_dict()
275
+
276
+ except Exception as e:
277
+ logger.error(f"Criteria evaluation failed: {e}")
278
+ return {"error": str(e)}
279
+
280
+ def run_batch(
281
+ self,
282
+ agent_file: str,
283
+ test_file: str,
284
+ eval_type: str = "accuracy",
285
+ output_file: Optional[str] = None
286
+ ) -> Dict[str, Any]:
287
+ """
288
+ Run batch evaluation from a test file.
289
+
290
+ Args:
291
+ agent_file: Path to agents.yaml file
292
+ test_file: Path to JSON test file with test cases
293
+ eval_type: Type of evaluation ("accuracy", "criteria")
294
+ output_file: Path to save results
295
+
296
+ Returns:
297
+ Batch evaluation results
298
+ """
299
+ try:
300
+ with open(test_file, 'r') as f:
301
+ test_cases = json.load(f)
302
+ except Exception as e:
303
+ return {"error": f"Failed to load test file: {e}"}
304
+
305
+ results = []
306
+ for i, test_case in enumerate(test_cases):
307
+ if self.verbose:
308
+ print(f"Running test case {i + 1}/{len(test_cases)}")
309
+
310
+ if eval_type == "accuracy":
311
+ result = self.run_accuracy(
312
+ agent_file=agent_file,
313
+ input_text=test_case.get("input", ""),
314
+ expected_output=test_case.get("expected", ""),
315
+ iterations=test_case.get("iterations", 1)
316
+ )
317
+ elif eval_type == "criteria":
318
+ result = self.run_criteria(
319
+ agent_file=agent_file,
320
+ input_text=test_case.get("input", ""),
321
+ criteria=test_case.get("criteria", ""),
322
+ scoring_type=test_case.get("scoring_type", "numeric"),
323
+ threshold=test_case.get("threshold", 7.0)
324
+ )
325
+ else:
326
+ result = {"error": f"Unknown eval type: {eval_type}"}
327
+
328
+ results.append({
329
+ "test_case": i + 1,
330
+ "input": test_case.get("input", ""),
331
+ "result": result
332
+ })
333
+
334
+ batch_result = {
335
+ "total_tests": len(test_cases),
336
+ "eval_type": eval_type,
337
+ "results": results
338
+ }
339
+
340
+ if output_file:
341
+ try:
342
+ with open(output_file, 'w') as f:
343
+ json.dump(batch_result, f, indent=2)
344
+ except Exception as e:
345
+ logger.warning(f"Failed to save batch results: {e}")
346
+
347
+ return batch_result
348
+
349
+
350
+ def handle_eval_command(args) -> int:
351
+ """
352
+ Handle the eval CLI command.
353
+
354
+ Args:
355
+ args: Command line arguments (list or parsed namespace)
356
+
357
+ Returns:
358
+ Exit code
359
+ """
360
+ import argparse
361
+
362
+ # If args is a list, parse it first
363
+ if isinstance(args, list):
364
+ parser = argparse.ArgumentParser(prog="praisonai eval")
365
+ subparsers = parser.add_subparsers(dest='eval_type')
366
+ add_eval_parser_subcommands(subparsers)
367
+
368
+ try:
369
+ args = parser.parse_args(args)
370
+ except SystemExit:
371
+ return 1
372
+
373
+ if not args.eval_type:
374
+ parser.print_help()
375
+ print("\n[bold]Examples:[/bold]")
376
+ print(" praisonai eval accuracy --prompt \"What is 2+2?\" --expected \"4\"")
377
+ print(" praisonai eval performance --agent agents.yaml --input \"Hello\"")
378
+ return 0
379
+
380
+ handler = EvalHandler(verbose=getattr(args, 'verbose', False))
381
+
382
+ eval_type = getattr(args, 'eval_type', 'accuracy')
383
+ agent_file = getattr(args, 'agent', None)
384
+ output_file = getattr(args, 'output', None)
385
+ prompt = getattr(args, 'prompt', None)
386
+ llm = getattr(args, 'llm', None)
387
+
388
+ # If no agent file and no prompt, check if agents.yaml exists
389
+ if not agent_file and not prompt:
390
+ import os
391
+ if os.path.exists('agents.yaml'):
392
+ agent_file = 'agents.yaml'
393
+
394
+ if eval_type == 'accuracy':
395
+ result = handler.run_accuracy(
396
+ agent_file=agent_file,
397
+ input_text=getattr(args, 'input', ''),
398
+ expected_output=getattr(args, 'expected', ''),
399
+ iterations=getattr(args, 'iterations', 1),
400
+ model=getattr(args, 'model', None),
401
+ output_file=output_file,
402
+ prompt=prompt,
403
+ llm=llm
404
+ )
405
+ elif eval_type == 'performance':
406
+ result = handler.run_performance(
407
+ agent_file=agent_file,
408
+ input_text=getattr(args, 'input', 'Hello'),
409
+ iterations=getattr(args, 'iterations', 10),
410
+ warmup=getattr(args, 'warmup', 2),
411
+ track_memory=getattr(args, 'memory', True),
412
+ output_file=output_file
413
+ )
414
+ elif eval_type == 'reliability':
415
+ expected_tools = getattr(args, 'expected_tools', '').split(',')
416
+ forbidden_tools = getattr(args, 'forbidden_tools', '')
417
+ forbidden_tools = forbidden_tools.split(',') if forbidden_tools else None
418
+
419
+ result = handler.run_reliability(
420
+ agent_file=agent_file,
421
+ input_text=getattr(args, 'input', ''),
422
+ expected_tools=expected_tools,
423
+ forbidden_tools=forbidden_tools,
424
+ output_file=output_file
425
+ )
426
+ elif eval_type == 'criteria':
427
+ result = handler.run_criteria(
428
+ agent_file=agent_file,
429
+ input_text=getattr(args, 'input', ''),
430
+ criteria=getattr(args, 'criteria', ''),
431
+ scoring_type=getattr(args, 'scoring', 'numeric'),
432
+ threshold=getattr(args, 'threshold', 7.0),
433
+ iterations=getattr(args, 'iterations', 1),
434
+ model=getattr(args, 'model', None),
435
+ output_file=output_file
436
+ )
437
+ elif eval_type == 'batch':
438
+ result = handler.run_batch(
439
+ agent_file=agent_file,
440
+ test_file=getattr(args, 'test_file', ''),
441
+ eval_type=getattr(args, 'batch_type', 'accuracy'),
442
+ output_file=output_file
443
+ )
444
+ else:
445
+ print(f"Unknown evaluation type: {eval_type}")
446
+ return 1
447
+
448
+ if 'error' in result:
449
+ print(f"Error: {result['error']}")
450
+ return 1
451
+ elif not getattr(args, 'quiet', False):
452
+ print(json.dumps(result, indent=2))
453
+
454
+ return 0
455
+
456
+
457
+ def add_eval_parser_subcommands(subparsers) -> None:
458
+ """Add eval subcommand parsers to an existing subparsers object."""
459
+ accuracy_parser = subparsers.add_parser('accuracy', help='Run accuracy evaluation')
460
+ accuracy_parser.add_argument('--agent', '-a', help='Agent config file (optional if --prompt used)')
461
+ accuracy_parser.add_argument('--prompt', '-p', type=str, help='Direct prompt (alternative to --agent)')
462
+ accuracy_parser.add_argument('--llm', help='LLM model for agent (when using --prompt)')
463
+ accuracy_parser.add_argument('--input', '-i', help='Input text (defaults to --prompt if not provided)')
464
+ accuracy_parser.add_argument('--expected', '-e', required=True, help='Expected output')
465
+ accuracy_parser.add_argument('--iterations', '-n', type=int, default=1, help='Number of iterations')
466
+ accuracy_parser.add_argument('--model', '-m', help='Judge model')
467
+ accuracy_parser.add_argument('--output', '-o', help='Output file')
468
+ accuracy_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
469
+ accuracy_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
470
+
471
+ perf_parser = subparsers.add_parser('performance', help='Run performance evaluation')
472
+ perf_parser.add_argument('--agent', '-a', default='agents.yaml', help='Agent config file')
473
+ perf_parser.add_argument('--input', '-i', default='Hello', help='Input text')
474
+ perf_parser.add_argument('--iterations', '-n', type=int, default=10, help='Number of iterations')
475
+ perf_parser.add_argument('--warmup', '-w', type=int, default=2, help='Warmup runs')
476
+ perf_parser.add_argument('--memory', action='store_true', default=True, help='Track memory')
477
+ perf_parser.add_argument('--output', '-o', help='Output file')
478
+ perf_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
479
+ perf_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
480
+
481
+
482
+ def add_eval_parser(subparsers) -> None:
483
+ """
484
+ Add eval subcommand parser.
485
+
486
+ Args:
487
+ subparsers: Argument parser subparsers
488
+ """
489
+ eval_parser = subparsers.add_parser(
490
+ 'eval',
491
+ help='Run agent evaluations'
492
+ )
493
+
494
+ eval_subparsers = eval_parser.add_subparsers(dest='eval_type')
495
+
496
+ accuracy_parser = eval_subparsers.add_parser(
497
+ 'accuracy',
498
+ help='Run accuracy evaluation'
499
+ )
500
+ accuracy_parser.add_argument('--agent', '-a', help='Agent config file (optional if --prompt used)')
501
+ accuracy_parser.add_argument('--prompt', '-p', help='Direct prompt (alternative to --agent)')
502
+ accuracy_parser.add_argument('--llm', help='LLM model for agent (when using --prompt)')
503
+ accuracy_parser.add_argument('--input', '-i', help='Input text (defaults to --prompt if not provided)')
504
+ accuracy_parser.add_argument('--expected', '-e', required=True, help='Expected output')
505
+ accuracy_parser.add_argument('--iterations', '-n', type=int, default=1, help='Number of iterations')
506
+ accuracy_parser.add_argument('--model', '-m', help='Judge model')
507
+ accuracy_parser.add_argument('--output', '-o', help='Output file')
508
+ accuracy_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
509
+ accuracy_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
510
+
511
+ perf_parser = eval_subparsers.add_parser(
512
+ 'performance',
513
+ help='Run performance evaluation'
514
+ )
515
+ perf_parser.add_argument('--agent', '-a', default='agents.yaml', help='Agent config file')
516
+ perf_parser.add_argument('--input', '-i', default='Hello', help='Input text')
517
+ perf_parser.add_argument('--iterations', '-n', type=int, default=10, help='Number of iterations')
518
+ perf_parser.add_argument('--warmup', '-w', type=int, default=2, help='Warmup runs')
519
+ perf_parser.add_argument('--memory', action='store_true', default=True, help='Track memory')
520
+ perf_parser.add_argument('--output', '-o', help='Output file')
521
+ perf_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
522
+ perf_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
523
+
524
+ rel_parser = eval_subparsers.add_parser(
525
+ 'reliability',
526
+ help='Run reliability evaluation'
527
+ )
528
+ rel_parser.add_argument('--agent', '-a', default='agents.yaml', help='Agent config file')
529
+ rel_parser.add_argument('--input', '-i', required=True, help='Input text')
530
+ rel_parser.add_argument('--expected-tools', '-t', required=True, help='Expected tools (comma-separated)')
531
+ rel_parser.add_argument('--forbidden-tools', '-f', help='Forbidden tools (comma-separated)')
532
+ rel_parser.add_argument('--output', '-o', help='Output file')
533
+ rel_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
534
+ rel_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
535
+
536
+ criteria_parser = eval_subparsers.add_parser(
537
+ 'criteria',
538
+ help='Run criteria-based evaluation'
539
+ )
540
+ criteria_parser.add_argument('--agent', '-a', default='agents.yaml', help='Agent config file')
541
+ criteria_parser.add_argument('--input', '-i', required=True, help='Input text')
542
+ criteria_parser.add_argument('--criteria', '-c', required=True, help='Evaluation criteria')
543
+ criteria_parser.add_argument('--scoring', '-s', choices=['numeric', 'binary'], default='numeric', help='Scoring type')
544
+ criteria_parser.add_argument('--threshold', type=float, default=7.0, help='Pass threshold')
545
+ criteria_parser.add_argument('--iterations', '-n', type=int, default=1, help='Number of iterations')
546
+ criteria_parser.add_argument('--model', '-m', help='Judge model')
547
+ criteria_parser.add_argument('--output', '-o', help='Output file')
548
+ criteria_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
549
+ criteria_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')
550
+
551
+ batch_parser = eval_subparsers.add_parser(
552
+ 'batch',
553
+ help='Run batch evaluation from test file'
554
+ )
555
+ batch_parser.add_argument('--agent', '-a', default='agents.yaml', help='Agent config file')
556
+ batch_parser.add_argument('--test-file', '-t', required=True, help='JSON test file')
557
+ batch_parser.add_argument('--batch-type', '-b', choices=['accuracy', 'criteria'], default='accuracy', help='Evaluation type')
558
+ batch_parser.add_argument('--output', '-o', help='Output file')
559
+ batch_parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
560
+ batch_parser.add_argument('--quiet', '-q', action='store_true', help='Suppress JSON output')