flock-core 0.5.0b28__py3-none-any.whl → 0.5.0b51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (469) hide show
  1. flock/__init__.py +12 -217
  2. flock/agent.py +678 -0
  3. flock/api/themes.py +71 -0
  4. flock/artifacts.py +79 -0
  5. flock/cli.py +75 -0
  6. flock/components.py +173 -0
  7. flock/dashboard/__init__.py +28 -0
  8. flock/dashboard/collector.py +283 -0
  9. flock/dashboard/events.py +182 -0
  10. flock/dashboard/launcher.py +230 -0
  11. flock/dashboard/service.py +537 -0
  12. flock/dashboard/websocket.py +235 -0
  13. flock/engines/__init__.py +6 -0
  14. flock/engines/dspy_engine.py +856 -0
  15. flock/examples.py +128 -0
  16. flock/frontend/README.md +678 -0
  17. flock/frontend/docs/DESIGN_SYSTEM.md +1980 -0
  18. flock/frontend/index.html +12 -0
  19. flock/frontend/package-lock.json +4347 -0
  20. flock/frontend/package.json +48 -0
  21. flock/frontend/src/App.tsx +79 -0
  22. flock/frontend/src/__tests__/e2e/critical-scenarios.test.tsx +587 -0
  23. flock/frontend/src/__tests__/integration/filtering-e2e.test.tsx +387 -0
  24. flock/frontend/src/__tests__/integration/graph-rendering.test.tsx +640 -0
  25. flock/frontend/src/__tests__/integration/indexeddb-persistence.test.tsx +699 -0
  26. flock/frontend/src/components/common/BuildInfo.tsx +39 -0
  27. flock/frontend/src/components/common/EmptyState.module.css +115 -0
  28. flock/frontend/src/components/common/EmptyState.tsx +128 -0
  29. flock/frontend/src/components/common/ErrorBoundary.module.css +169 -0
  30. flock/frontend/src/components/common/ErrorBoundary.tsx +118 -0
  31. flock/frontend/src/components/common/KeyboardShortcutsDialog.css +251 -0
  32. flock/frontend/src/components/common/KeyboardShortcutsDialog.tsx +151 -0
  33. flock/frontend/src/components/common/LoadingSpinner.module.css +97 -0
  34. flock/frontend/src/components/common/LoadingSpinner.tsx +29 -0
  35. flock/frontend/src/components/controls/PublishControl.css +547 -0
  36. flock/frontend/src/components/controls/PublishControl.test.tsx +543 -0
  37. flock/frontend/src/components/controls/PublishControl.tsx +432 -0
  38. flock/frontend/src/components/details/DetailWindowContainer.tsx +62 -0
  39. flock/frontend/src/components/details/LiveOutputTab.test.tsx +792 -0
  40. flock/frontend/src/components/details/LiveOutputTab.tsx +220 -0
  41. flock/frontend/src/components/details/MessageHistoryTab.tsx +299 -0
  42. flock/frontend/src/components/details/NodeDetailWindow.test.tsx +501 -0
  43. flock/frontend/src/components/details/NodeDetailWindow.tsx +218 -0
  44. flock/frontend/src/components/details/RunStatusTab.tsx +307 -0
  45. flock/frontend/src/components/details/tabs.test.tsx +1015 -0
  46. flock/frontend/src/components/filters/CorrelationIDFilter.module.css +102 -0
  47. flock/frontend/src/components/filters/CorrelationIDFilter.test.tsx +197 -0
  48. flock/frontend/src/components/filters/CorrelationIDFilter.tsx +121 -0
  49. flock/frontend/src/components/filters/FilterBar.module.css +29 -0
  50. flock/frontend/src/components/filters/FilterBar.test.tsx +133 -0
  51. flock/frontend/src/components/filters/FilterBar.tsx +33 -0
  52. flock/frontend/src/components/filters/FilterPills.module.css +79 -0
  53. flock/frontend/src/components/filters/FilterPills.test.tsx +173 -0
  54. flock/frontend/src/components/filters/FilterPills.tsx +67 -0
  55. flock/frontend/src/components/filters/TimeRangeFilter.module.css +91 -0
  56. flock/frontend/src/components/filters/TimeRangeFilter.test.tsx +154 -0
  57. flock/frontend/src/components/filters/TimeRangeFilter.tsx +105 -0
  58. flock/frontend/src/components/graph/AgentNode.test.tsx +75 -0
  59. flock/frontend/src/components/graph/AgentNode.tsx +322 -0
  60. flock/frontend/src/components/graph/GraphCanvas.tsx +406 -0
  61. flock/frontend/src/components/graph/MessageFlowEdge.tsx +128 -0
  62. flock/frontend/src/components/graph/MessageNode.test.tsx +62 -0
  63. flock/frontend/src/components/graph/MessageNode.tsx +116 -0
  64. flock/frontend/src/components/graph/MiniMap.tsx +47 -0
  65. flock/frontend/src/components/graph/TransformEdge.tsx +123 -0
  66. flock/frontend/src/components/layout/DashboardLayout.css +407 -0
  67. flock/frontend/src/components/layout/DashboardLayout.tsx +300 -0
  68. flock/frontend/src/components/layout/Header.module.css +88 -0
  69. flock/frontend/src/components/layout/Header.tsx +52 -0
  70. flock/frontend/src/components/modules/EventLogModule.test.tsx +401 -0
  71. flock/frontend/src/components/modules/EventLogModule.tsx +396 -0
  72. flock/frontend/src/components/modules/EventLogModuleWrapper.tsx +17 -0
  73. flock/frontend/src/components/modules/ModuleRegistry.test.ts +333 -0
  74. flock/frontend/src/components/modules/ModuleRegistry.ts +85 -0
  75. flock/frontend/src/components/modules/ModuleWindow.tsx +155 -0
  76. flock/frontend/src/components/modules/registerModules.ts +20 -0
  77. flock/frontend/src/components/settings/AdvancedSettings.tsx +175 -0
  78. flock/frontend/src/components/settings/AppearanceSettings.tsx +185 -0
  79. flock/frontend/src/components/settings/GraphSettings.tsx +110 -0
  80. flock/frontend/src/components/settings/SettingsPanel.css +327 -0
  81. flock/frontend/src/components/settings/SettingsPanel.tsx +131 -0
  82. flock/frontend/src/components/settings/ThemeSelector.tsx +298 -0
  83. flock/frontend/src/hooks/useKeyboardShortcuts.ts +148 -0
  84. flock/frontend/src/hooks/useModulePersistence.test.ts +442 -0
  85. flock/frontend/src/hooks/useModulePersistence.ts +154 -0
  86. flock/frontend/src/hooks/useModules.ts +139 -0
  87. flock/frontend/src/hooks/usePersistence.ts +139 -0
  88. flock/frontend/src/main.tsx +13 -0
  89. flock/frontend/src/services/api.ts +213 -0
  90. flock/frontend/src/services/indexeddb.test.ts +793 -0
  91. flock/frontend/src/services/indexeddb.ts +794 -0
  92. flock/frontend/src/services/layout.test.ts +437 -0
  93. flock/frontend/src/services/layout.ts +146 -0
  94. flock/frontend/src/services/themeApplicator.ts +140 -0
  95. flock/frontend/src/services/themeService.ts +77 -0
  96. flock/frontend/src/services/websocket.test.ts +595 -0
  97. flock/frontend/src/services/websocket.ts +685 -0
  98. flock/frontend/src/store/filterStore.test.ts +242 -0
  99. flock/frontend/src/store/filterStore.ts +103 -0
  100. flock/frontend/src/store/graphStore.test.ts +186 -0
  101. flock/frontend/src/store/graphStore.ts +414 -0
  102. flock/frontend/src/store/moduleStore.test.ts +253 -0
  103. flock/frontend/src/store/moduleStore.ts +57 -0
  104. flock/frontend/src/store/settingsStore.ts +188 -0
  105. flock/frontend/src/store/streamStore.ts +68 -0
  106. flock/frontend/src/store/uiStore.test.ts +54 -0
  107. flock/frontend/src/store/uiStore.ts +110 -0
  108. flock/frontend/src/store/wsStore.ts +34 -0
  109. flock/frontend/src/styles/index.css +15 -0
  110. flock/frontend/src/styles/scrollbar.css +47 -0
  111. flock/frontend/src/styles/variables.css +488 -0
  112. flock/frontend/src/test/setup.ts +1 -0
  113. flock/frontend/src/types/filters.ts +14 -0
  114. flock/frontend/src/types/graph.ts +55 -0
  115. flock/frontend/src/types/modules.ts +7 -0
  116. flock/frontend/src/types/theme.ts +55 -0
  117. flock/frontend/src/utils/mockData.ts +85 -0
  118. flock/frontend/src/utils/performance.ts +16 -0
  119. flock/frontend/src/utils/transforms.test.ts +860 -0
  120. flock/frontend/src/utils/transforms.ts +323 -0
  121. flock/frontend/src/vite-env.d.ts +17 -0
  122. flock/frontend/tsconfig.json +27 -0
  123. flock/frontend/tsconfig.node.json +11 -0
  124. flock/frontend/vite.config.ts +25 -0
  125. flock/frontend/vitest.config.ts +11 -0
  126. flock/{core/util → helper}/cli_helper.py +4 -3
  127. flock/{core/logging → logging}/__init__.py +2 -3
  128. flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
  129. flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
  130. flock/{core/logging → logging}/formatters/themed_formatter.py +69 -115
  131. flock/{core/logging → logging}/logging.py +77 -61
  132. flock/{core/logging → logging}/telemetry.py +20 -26
  133. flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
  134. flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +6 -9
  135. flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
  136. flock/{core/logging → logging}/trace_and_logged.py +20 -24
  137. flock/mcp/__init__.py +91 -0
  138. flock/{core/mcp/mcp_client.py → mcp/client.py} +103 -154
  139. flock/{core/mcp/mcp_config.py → mcp/config.py} +62 -117
  140. flock/mcp/manager.py +255 -0
  141. flock/mcp/servers/sse/__init__.py +1 -1
  142. flock/mcp/servers/sse/flock_sse_server.py +11 -53
  143. flock/mcp/servers/stdio/__init__.py +1 -1
  144. flock/mcp/servers/stdio/flock_stdio_server.py +8 -48
  145. flock/mcp/servers/streamable_http/flock_streamable_http_server.py +17 -62
  146. flock/mcp/servers/websockets/flock_websocket_server.py +7 -40
  147. flock/{core/mcp/flock_mcp_tool.py → mcp/tool.py} +16 -26
  148. flock/mcp/types/__init__.py +42 -0
  149. flock/{core/mcp → mcp}/types/callbacks.py +9 -15
  150. flock/{core/mcp → mcp}/types/factories.py +7 -6
  151. flock/{core/mcp → mcp}/types/handlers.py +13 -18
  152. flock/{core/mcp → mcp}/types/types.py +70 -74
  153. flock/{core/mcp → mcp}/util/helpers.py +1 -1
  154. flock/orchestrator.py +645 -0
  155. flock/registry.py +148 -0
  156. flock/runtime.py +262 -0
  157. flock/service.py +140 -0
  158. flock/store.py +69 -0
  159. flock/subscription.py +111 -0
  160. flock/themes/andromeda.toml +1 -1
  161. flock/themes/apple-system-colors.toml +1 -1
  162. flock/themes/arcoiris.toml +1 -1
  163. flock/themes/atomonelight.toml +1 -1
  164. flock/themes/ayu copy.toml +1 -1
  165. flock/themes/ayu-light.toml +1 -1
  166. flock/themes/belafonte-day.toml +1 -1
  167. flock/themes/belafonte-night.toml +1 -1
  168. flock/themes/blulocodark.toml +1 -1
  169. flock/themes/breeze.toml +1 -1
  170. flock/themes/broadcast.toml +1 -1
  171. flock/themes/brogrammer.toml +1 -1
  172. flock/themes/builtin-dark.toml +1 -1
  173. flock/themes/builtin-pastel-dark.toml +1 -1
  174. flock/themes/catppuccin-latte.toml +1 -1
  175. flock/themes/catppuccin-macchiato.toml +1 -1
  176. flock/themes/catppuccin-mocha.toml +1 -1
  177. flock/themes/cga.toml +1 -1
  178. flock/themes/chalk.toml +1 -1
  179. flock/themes/ciapre.toml +1 -1
  180. flock/themes/coffee-theme.toml +1 -1
  181. flock/themes/cyberpunkscarletprotocol.toml +1 -1
  182. flock/themes/dark+.toml +1 -1
  183. flock/themes/darkermatrix.toml +1 -1
  184. flock/themes/darkside.toml +1 -1
  185. flock/themes/desert.toml +1 -1
  186. flock/themes/django.toml +1 -1
  187. flock/themes/djangosmooth.toml +1 -1
  188. flock/themes/doomone.toml +1 -1
  189. flock/themes/dotgov.toml +1 -1
  190. flock/themes/dracula+.toml +1 -1
  191. flock/themes/duckbones.toml +1 -1
  192. flock/themes/encom.toml +1 -1
  193. flock/themes/espresso.toml +1 -1
  194. flock/themes/everblush.toml +1 -1
  195. flock/themes/fairyfloss.toml +1 -1
  196. flock/themes/fideloper.toml +1 -1
  197. flock/themes/fishtank.toml +1 -1
  198. flock/themes/flexoki-light.toml +1 -1
  199. flock/themes/floraverse.toml +1 -1
  200. flock/themes/framer.toml +1 -1
  201. flock/themes/galizur.toml +1 -1
  202. flock/themes/github.toml +1 -1
  203. flock/themes/grass.toml +1 -1
  204. flock/themes/grey-green.toml +1 -1
  205. flock/themes/gruvboxlight.toml +1 -1
  206. flock/themes/guezwhoz.toml +1 -1
  207. flock/themes/harper.toml +1 -1
  208. flock/themes/hax0r-blue.toml +1 -1
  209. flock/themes/hopscotch.256.toml +1 -1
  210. flock/themes/ic-green-ppl.toml +1 -1
  211. flock/themes/iceberg-dark.toml +1 -1
  212. flock/themes/japanesque.toml +1 -1
  213. flock/themes/jubi.toml +1 -1
  214. flock/themes/kibble.toml +1 -1
  215. flock/themes/kolorit.toml +1 -1
  216. flock/themes/kurokula.toml +1 -1
  217. flock/themes/materialdesigncolors.toml +1 -1
  218. flock/themes/matrix.toml +1 -1
  219. flock/themes/mellifluous.toml +1 -1
  220. flock/themes/midnight-in-mojave.toml +1 -1
  221. flock/themes/monokai-remastered.toml +1 -1
  222. flock/themes/monokai-soda.toml +1 -1
  223. flock/themes/neon.toml +1 -1
  224. flock/themes/neopolitan.toml +1 -1
  225. flock/themes/nord-light.toml +1 -1
  226. flock/themes/ocean.toml +1 -1
  227. flock/themes/onehalfdark.toml +1 -1
  228. flock/themes/onehalflight.toml +1 -1
  229. flock/themes/palenighthc.toml +1 -1
  230. flock/themes/paulmillr.toml +1 -1
  231. flock/themes/pencildark.toml +1 -1
  232. flock/themes/pnevma.toml +1 -1
  233. flock/themes/purple-rain.toml +1 -1
  234. flock/themes/purplepeter.toml +1 -1
  235. flock/themes/raycast-dark.toml +1 -1
  236. flock/themes/red-sands.toml +1 -1
  237. flock/themes/relaxed.toml +1 -1
  238. flock/themes/retro.toml +1 -1
  239. flock/themes/rose-pine.toml +1 -1
  240. flock/themes/royal.toml +1 -1
  241. flock/themes/ryuuko.toml +1 -1
  242. flock/themes/sakura.toml +1 -1
  243. flock/themes/scarlet-protocol.toml +1 -1
  244. flock/themes/seoulbones-dark.toml +1 -1
  245. flock/themes/shades-of-purple.toml +1 -1
  246. flock/themes/smyck.toml +1 -1
  247. flock/themes/softserver.toml +1 -1
  248. flock/themes/solarized-darcula.toml +1 -1
  249. flock/themes/square.toml +1 -1
  250. flock/themes/sugarplum.toml +1 -1
  251. flock/themes/thayer-bright.toml +1 -1
  252. flock/themes/tokyonight.toml +1 -1
  253. flock/themes/tomorrow.toml +1 -1
  254. flock/themes/ubuntu.toml +1 -1
  255. flock/themes/ultradark.toml +1 -1
  256. flock/themes/ultraviolent.toml +1 -1
  257. flock/themes/unikitty.toml +1 -1
  258. flock/themes/urple.toml +1 -1
  259. flock/themes/vesper.toml +1 -1
  260. flock/themes/vimbones.toml +1 -1
  261. flock/themes/wildcherry.toml +1 -1
  262. flock/themes/wilmersdorf.toml +1 -1
  263. flock/themes/wryan.toml +1 -1
  264. flock/themes/xcodedarkhc.toml +1 -1
  265. flock/themes/xcodelight.toml +1 -1
  266. flock/themes/zenbones-light.toml +1 -1
  267. flock/themes/zenwritten-dark.toml +1 -1
  268. flock/utilities.py +301 -0
  269. flock/{components/utility → utility}/output_utility_component.py +68 -53
  270. flock/visibility.py +107 -0
  271. flock_core-0.5.0b51.dist-info/METADATA +747 -0
  272. flock_core-0.5.0b51.dist-info/RECORD +508 -0
  273. flock_core-0.5.0b51.dist-info/entry_points.txt +2 -0
  274. {flock_core-0.5.0b28.dist-info → flock_core-0.5.0b51.dist-info}/licenses/LICENSE +1 -1
  275. flock/adapter/__init__.py +0 -14
  276. flock/adapter/azure_adapter.py +0 -68
  277. flock/adapter/chroma_adapter.py +0 -73
  278. flock/adapter/faiss_adapter.py +0 -97
  279. flock/adapter/pinecone_adapter.py +0 -51
  280. flock/adapter/vector_base.py +0 -47
  281. flock/cli/assets/release_notes.md +0 -140
  282. flock/cli/config.py +0 -8
  283. flock/cli/constants.py +0 -36
  284. flock/cli/create_agent.py +0 -1
  285. flock/cli/create_flock.py +0 -280
  286. flock/cli/execute_flock.py +0 -620
  287. flock/cli/load_agent.py +0 -1
  288. flock/cli/load_examples.py +0 -1
  289. flock/cli/load_flock.py +0 -192
  290. flock/cli/load_release_notes.py +0 -20
  291. flock/cli/loaded_flock_cli.py +0 -254
  292. flock/cli/manage_agents.py +0 -459
  293. flock/cli/registry_management.py +0 -889
  294. flock/cli/runner.py +0 -41
  295. flock/cli/settings.py +0 -857
  296. flock/cli/utils.py +0 -135
  297. flock/cli/view_results.py +0 -29
  298. flock/cli/yaml_editor.py +0 -396
  299. flock/components/__init__.py +0 -30
  300. flock/components/evaluation/__init__.py +0 -9
  301. flock/components/evaluation/declarative_evaluation_component.py +0 -606
  302. flock/components/routing/__init__.py +0 -15
  303. flock/components/routing/conditional_routing_component.py +0 -494
  304. flock/components/routing/default_routing_component.py +0 -103
  305. flock/components/routing/llm_routing_component.py +0 -206
  306. flock/components/utility/__init__.py +0 -22
  307. flock/components/utility/example_utility_component.py +0 -250
  308. flock/components/utility/feedback_utility_component.py +0 -206
  309. flock/components/utility/memory_utility_component.py +0 -550
  310. flock/components/utility/metrics_utility_component.py +0 -700
  311. flock/config.py +0 -61
  312. flock/core/__init__.py +0 -110
  313. flock/core/agent/__init__.py +0 -16
  314. flock/core/agent/default_agent.py +0 -216
  315. flock/core/agent/flock_agent_components.py +0 -104
  316. flock/core/agent/flock_agent_execution.py +0 -101
  317. flock/core/agent/flock_agent_integration.py +0 -260
  318. flock/core/agent/flock_agent_lifecycle.py +0 -186
  319. flock/core/agent/flock_agent_serialization.py +0 -381
  320. flock/core/api/__init__.py +0 -10
  321. flock/core/api/custom_endpoint.py +0 -45
  322. flock/core/api/endpoints.py +0 -254
  323. flock/core/api/main.py +0 -162
  324. flock/core/api/models.py +0 -97
  325. flock/core/api/run_store.py +0 -224
  326. flock/core/api/runner.py +0 -44
  327. flock/core/api/service.py +0 -214
  328. flock/core/component/__init__.py +0 -15
  329. flock/core/component/agent_component_base.py +0 -309
  330. flock/core/component/evaluation_component.py +0 -62
  331. flock/core/component/routing_component.py +0 -74
  332. flock/core/component/utility_component.py +0 -69
  333. flock/core/config/flock_agent_config.py +0 -58
  334. flock/core/config/scheduled_agent_config.py +0 -40
  335. flock/core/context/context.py +0 -213
  336. flock/core/context/context_manager.py +0 -37
  337. flock/core/context/context_vars.py +0 -10
  338. flock/core/evaluation/utils.py +0 -396
  339. flock/core/execution/batch_executor.py +0 -369
  340. flock/core/execution/evaluation_executor.py +0 -438
  341. flock/core/execution/local_executor.py +0 -31
  342. flock/core/execution/opik_executor.py +0 -103
  343. flock/core/execution/temporal_executor.py +0 -164
  344. flock/core/flock.py +0 -634
  345. flock/core/flock_agent.py +0 -336
  346. flock/core/flock_factory.py +0 -613
  347. flock/core/flock_scheduler.py +0 -166
  348. flock/core/flock_server_manager.py +0 -136
  349. flock/core/interpreter/python_interpreter.py +0 -689
  350. flock/core/mcp/__init__.py +0 -1
  351. flock/core/mcp/flock_mcp_server.py +0 -680
  352. flock/core/mcp/mcp_client_manager.py +0 -201
  353. flock/core/mcp/types/__init__.py +0 -1
  354. flock/core/mixin/dspy_integration.py +0 -403
  355. flock/core/mixin/prompt_parser.py +0 -125
  356. flock/core/orchestration/__init__.py +0 -15
  357. flock/core/orchestration/flock_batch_processor.py +0 -94
  358. flock/core/orchestration/flock_evaluator.py +0 -113
  359. flock/core/orchestration/flock_execution.py +0 -295
  360. flock/core/orchestration/flock_initialization.py +0 -149
  361. flock/core/orchestration/flock_server_manager.py +0 -67
  362. flock/core/orchestration/flock_web_server.py +0 -117
  363. flock/core/registry/__init__.py +0 -45
  364. flock/core/registry/agent_registry.py +0 -69
  365. flock/core/registry/callable_registry.py +0 -139
  366. flock/core/registry/component_discovery.py +0 -142
  367. flock/core/registry/component_registry.py +0 -64
  368. flock/core/registry/config_mapping.py +0 -64
  369. flock/core/registry/decorators.py +0 -137
  370. flock/core/registry/registry_hub.py +0 -205
  371. flock/core/registry/server_registry.py +0 -57
  372. flock/core/registry/type_registry.py +0 -86
  373. flock/core/serialization/__init__.py +0 -13
  374. flock/core/serialization/callable_registry.py +0 -52
  375. flock/core/serialization/flock_serializer.py +0 -832
  376. flock/core/serialization/json_encoder.py +0 -41
  377. flock/core/serialization/secure_serializer.py +0 -175
  378. flock/core/serialization/serializable.py +0 -342
  379. flock/core/serialization/serialization_utils.py +0 -412
  380. flock/core/util/file_path_utils.py +0 -223
  381. flock/core/util/hydrator.py +0 -309
  382. flock/core/util/input_resolver.py +0 -164
  383. flock/core/util/loader.py +0 -59
  384. flock/core/util/splitter.py +0 -219
  385. flock/di.py +0 -27
  386. flock/platform/docker_tools.py +0 -49
  387. flock/platform/jaeger_install.py +0 -86
  388. flock/webapp/__init__.py +0 -1
  389. flock/webapp/app/__init__.py +0 -0
  390. flock/webapp/app/api/__init__.py +0 -0
  391. flock/webapp/app/api/agent_management.py +0 -241
  392. flock/webapp/app/api/execution.py +0 -709
  393. flock/webapp/app/api/flock_management.py +0 -129
  394. flock/webapp/app/api/registry_viewer.py +0 -30
  395. flock/webapp/app/chat.py +0 -665
  396. flock/webapp/app/config.py +0 -104
  397. flock/webapp/app/dependencies.py +0 -117
  398. flock/webapp/app/main.py +0 -1070
  399. flock/webapp/app/middleware.py +0 -113
  400. flock/webapp/app/models_ui.py +0 -7
  401. flock/webapp/app/services/__init__.py +0 -0
  402. flock/webapp/app/services/feedback_file_service.py +0 -363
  403. flock/webapp/app/services/flock_service.py +0 -337
  404. flock/webapp/app/services/sharing_models.py +0 -81
  405. flock/webapp/app/services/sharing_store.py +0 -762
  406. flock/webapp/app/templates/theme_mapper.html +0 -326
  407. flock/webapp/app/theme_mapper.py +0 -812
  408. flock/webapp/app/utils.py +0 -85
  409. flock/webapp/run.py +0 -215
  410. flock/webapp/static/css/chat.css +0 -301
  411. flock/webapp/static/css/components.css +0 -167
  412. flock/webapp/static/css/header.css +0 -39
  413. flock/webapp/static/css/layout.css +0 -46
  414. flock/webapp/static/css/sidebar.css +0 -127
  415. flock/webapp/static/css/two-pane.css +0 -48
  416. flock/webapp/templates/base.html +0 -200
  417. flock/webapp/templates/chat.html +0 -152
  418. flock/webapp/templates/chat_settings.html +0 -19
  419. flock/webapp/templates/flock_editor.html +0 -16
  420. flock/webapp/templates/index.html +0 -12
  421. flock/webapp/templates/partials/_agent_detail_form.html +0 -93
  422. flock/webapp/templates/partials/_agent_list.html +0 -18
  423. flock/webapp/templates/partials/_agent_manager_view.html +0 -51
  424. flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
  425. flock/webapp/templates/partials/_chat_container.html +0 -15
  426. flock/webapp/templates/partials/_chat_messages.html +0 -57
  427. flock/webapp/templates/partials/_chat_settings_form.html +0 -85
  428. flock/webapp/templates/partials/_create_flock_form.html +0 -50
  429. flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
  430. flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
  431. flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
  432. flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
  433. flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
  434. flock/webapp/templates/partials/_env_vars_table.html +0 -23
  435. flock/webapp/templates/partials/_execution_form.html +0 -118
  436. flock/webapp/templates/partials/_execution_view_container.html +0 -28
  437. flock/webapp/templates/partials/_flock_file_list.html +0 -23
  438. flock/webapp/templates/partials/_flock_properties_form.html +0 -52
  439. flock/webapp/templates/partials/_flock_upload_form.html +0 -16
  440. flock/webapp/templates/partials/_header_flock_status.html +0 -5
  441. flock/webapp/templates/partials/_load_manager_view.html +0 -49
  442. flock/webapp/templates/partials/_registry_table.html +0 -25
  443. flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
  444. flock/webapp/templates/partials/_results_display.html +0 -78
  445. flock/webapp/templates/partials/_settings_env_content.html +0 -9
  446. flock/webapp/templates/partials/_settings_theme_content.html +0 -14
  447. flock/webapp/templates/partials/_settings_view.html +0 -36
  448. flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
  449. flock/webapp/templates/partials/_share_link_snippet.html +0 -35
  450. flock/webapp/templates/partials/_sidebar.html +0 -74
  451. flock/webapp/templates/partials/_streaming_results_container.html +0 -195
  452. flock/webapp/templates/partials/_structured_data_view.html +0 -40
  453. flock/webapp/templates/partials/_theme_preview.html +0 -36
  454. flock/webapp/templates/registry_viewer.html +0 -84
  455. flock/webapp/templates/shared_run_page.html +0 -140
  456. flock/workflow/__init__.py +0 -0
  457. flock/workflow/activities.py +0 -196
  458. flock/workflow/agent_activities.py +0 -24
  459. flock/workflow/agent_execution_activity.py +0 -202
  460. flock/workflow/flock_workflow.py +0 -214
  461. flock/workflow/temporal_config.py +0 -96
  462. flock/workflow/temporal_setup.py +0 -68
  463. flock_core-0.5.0b28.dist-info/METADATA +0 -274
  464. flock_core-0.5.0b28.dist-info/RECORD +0 -561
  465. flock_core-0.5.0b28.dist-info/entry_points.txt +0 -2
  466. /flock/{core/logging → logging}/formatters/themes.py +0 -0
  467. /flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
  468. /flock/{core/mcp → mcp}/util/__init__.py +0 -0
  469. {flock_core-0.5.0b28.dist-info → flock_core-0.5.0b51.dist-info}/WHEEL +0 -0
@@ -1,213 +0,0 @@
1
- import uuid
2
- from dataclasses import asdict
3
- from datetime import datetime
4
- from typing import Any, Literal
5
-
6
- from opentelemetry import trace
7
- from pydantic import BaseModel, Field
8
-
9
- from flock.core.context.context_vars import FLOCK_LAST_AGENT, FLOCK_LAST_RESULT
10
- from flock.core.logging.logging import get_logger
11
- from flock.core.serialization.serializable import Serializable
12
-
13
- logger = get_logger("context")
14
- tracer = trace.get_tracer(__name__)
15
-
16
-
17
- class AgentRunRecord(BaseModel):
18
- id: str = Field(default="")
19
- agent: str = Field(default="")
20
- data: dict[str, Any] = Field(default_factory=dict)
21
- timestamp: str = Field(default="")
22
- hand_off: dict | None = Field(default_factory=dict)
23
- called_from: str | None = Field(default=None)
24
-
25
-
26
- class AgentDefinition(BaseModel):
27
- agent_type: str = Field(default="")
28
- agent_name: str = Field(default="")
29
- agent_data: dict = Field(default_factory=dict)
30
- serializer: Literal["json", "cloudpickle", "msgpack"] = Field(
31
- default="cloudpickle"
32
- )
33
-
34
-
35
- class FlockContext(Serializable, BaseModel):
36
- state: dict[str, Any] = Field(default_factory=dict)
37
- history: list[AgentRunRecord] = Field(default_factory=list)
38
- agent_definitions: dict[str, AgentDefinition] = Field(default_factory=dict)
39
- run_id: str = Field(default="")
40
- workflow_id: str = Field(default="")
41
- workflow_timestamp: str = Field(default="")
42
-
43
- def record(
44
- self,
45
- agent_name: str,
46
- data: dict[str, Any],
47
- timestamp: str,
48
- hand_off: str,
49
- called_from: str,
50
- ) -> None:
51
- record = AgentRunRecord(
52
- id=agent_name + "_" + uuid.uuid4().hex[:4],
53
- agent=agent_name,
54
- data=data.copy(),
55
- timestamp=timestamp,
56
- hand_off=hand_off,
57
- called_from=called_from,
58
- )
59
- self.history.append(record)
60
- for key, value in data.items():
61
- self.set_variable(f"{agent_name}.{key}", value)
62
- self.set_variable(FLOCK_LAST_RESULT, data)
63
- self.set_variable(FLOCK_LAST_AGENT, agent_name)
64
- logger.info(
65
- f"Agent run recorded - run_id '{record.id}'",
66
- agent=agent_name,
67
- timestamp=timestamp,
68
- data=data,
69
- )
70
- current_span = trace.get_current_span()
71
- if current_span.get_span_context().is_valid:
72
- current_span.add_event(
73
- "record",
74
- attributes={"agent": agent_name, "timestamp": timestamp},
75
- )
76
-
77
- def get_variable(self, key: str, default: Any = None) -> Any:
78
- return self.state.get(key, default)
79
-
80
- def set_variable(self, key: str, value: Any) -> None:
81
- old_value = self.state.get(key)
82
- self.state[key] = value
83
- if old_value != value:
84
- escaped_value = str(value).replace("{", "{{").replace("}", "}}")
85
-
86
- logger.info(
87
- "Context variable updated - {} -> {}",
88
- key,
89
- escaped_value, # Arguments in order
90
- )
91
-
92
- current_span = trace.get_current_span()
93
- if current_span.get_span_context().is_valid:
94
- current_span.add_event(
95
- "set_variable",
96
- attributes={
97
- "key": key,
98
- "old": str(old_value),
99
- "new": str(value),
100
- },
101
- )
102
-
103
- def deepcopy(self) -> "FlockContext":
104
- return FlockContext.from_dict(self.to_dict())
105
-
106
- def get_agent_history(self, agent_name: str) -> list[AgentRunRecord]:
107
- return [record for record in self.history if record.agent == agent_name]
108
-
109
- def next_input_for(self, agent) -> Any:
110
- try:
111
- if hasattr(agent, "input") and isinstance(agent.input, str):
112
- keys = [k.strip() for k in agent.input.split(",") if k.strip()]
113
- if len(keys) == 1:
114
- return self.get_variable(keys[0])
115
- else:
116
- return {key: self.get_variable(key) for key in keys}
117
- else:
118
- return self.get_variable("init_input")
119
- except Exception as e:
120
- logger.error(
121
- "Error getting next input for agent",
122
- agent=agent.name,
123
- error=str(e),
124
- )
125
- raise
126
-
127
- def get_most_recent_value(self, variable_name: str) -> Any:
128
- for history_record in reversed(self.history):
129
- if variable_name in history_record.data:
130
- return history_record.data[variable_name]
131
-
132
- def get_agent_definition(self, agent_name: str) -> AgentDefinition | None:
133
- return self.agent_definitions.get(agent_name)
134
-
135
- def get_last_agent_name(self) -> str | None:
136
- """Returns the name of the agent from the most recent history record."""
137
- if not self.history:
138
- return None
139
- last_record = self.history[-1]
140
- # The 'called_from' field in the *next* record is the previous agent.
141
- # However, to get the name of the *last executed agent*, we look at the 'agent' field.
142
- return last_record.agent
143
-
144
- def add_agent_definition(
145
- self, agent_type: type, agent_name: str, agent_data: Any
146
- ) -> None:
147
- definition = AgentDefinition(
148
- agent_type=agent_type.__name__,
149
- agent_name=agent_name,
150
- agent_data=agent_data,
151
- )
152
- self.agent_definitions[agent_name] = definition
153
-
154
- # Use the reactive setter for dict-like access.
155
- def __getitem__(self, key: str) -> Any:
156
- return self.get_variable(key)
157
-
158
- def __setitem__(self, key: str, value: Any) -> None:
159
- self.set_variable(key, value)
160
-
161
- def to_dict(self) -> dict[str, Any]:
162
- def convert(obj):
163
- if isinstance(obj, datetime):
164
- return obj.isoformat()
165
- if hasattr(obj, "__dataclass_fields__"):
166
- return asdict(
167
- obj, dict_factory=lambda x: {k: convert(v) for k, v in x}
168
- )
169
- return obj
170
-
171
- return convert(asdict(self))
172
-
173
- @classmethod
174
- def from_dict(cls, data: dict[str, Any]) -> "FlockContext":
175
- def convert(obj):
176
- if isinstance(obj, dict):
177
- if "timestamp" in obj:
178
- return AgentRunRecord(
179
- **{
180
- **obj,
181
- "timestamp": obj["timestamp"]
182
- ,
183
- }
184
- )
185
- if "agent_type" in obj:
186
- return AgentDefinition(**obj)
187
- return {k: convert(v) for k, v in obj.items()}
188
- if isinstance(obj, list):
189
- return [convert(v) for v in obj]
190
- return obj
191
-
192
- converted = convert(data)
193
- return cls(**converted)
194
-
195
- def resolve(self, svc_type):
196
- """Resolve a service from the request-scoped DI container if present.
197
-
198
- The bootstrap code is expected to store the active `ServiceProvider` from
199
- `wd.di` in the context variable key ``di.container``. This helper
200
- provides a convenient façade so that Flock components can simply call
201
- ``context.resolve(SomeType)`` regardless of whether a container is
202
- available. When the container is missing or the service cannot be
203
- resolved, ``None`` is returned instead of raising to keep backward
204
- compatibility.
205
- """
206
- container = self.get_variable("di.container")
207
- if container is None:
208
- return None
209
- try:
210
- return container.get_service(svc_type)
211
- except Exception:
212
- # Service not registered or other resolution error – fall back to None
213
- return None
@@ -1,37 +0,0 @@
1
- """Module for managing the FlockContext."""
2
-
3
- from flock.core.context.context import FlockContext
4
- from flock.core.context.context_vars import (
5
- FLOCK_CURRENT_AGENT,
6
- FLOCK_INITIAL_INPUT,
7
- FLOCK_LOCAL_DEBUG,
8
- FLOCK_MODEL,
9
- FLOCK_RUN_ID,
10
- )
11
-
12
-
13
- def initialize_context(
14
- context: FlockContext,
15
- agent_name: str,
16
- input_data: dict,
17
- run_id: str,
18
- local_debug: bool,
19
- model: str,
20
- ) -> None:
21
- """Initialize the FlockContext with standard variables before running an agent.
22
-
23
- Args:
24
- context: The FlockContext instance.
25
- agent_name: The name of the current agent.
26
- input_data: A dictionary of inputs for the agent.
27
- run_id: A unique identifier for the run.
28
- local_debug: Flag indicating whether local debugging is enabled.
29
- """
30
- context.set_variable(FLOCK_CURRENT_AGENT, agent_name)
31
- for key, value in input_data.items():
32
- context.set_variable("flock." + key, value)
33
- context.set_variable(FLOCK_INITIAL_INPUT, input_data)
34
- context.set_variable(FLOCK_LOCAL_DEBUG, local_debug)
35
- context.run_id = run_id
36
- context.set_variable(FLOCK_RUN_ID, run_id)
37
- context.set_variable(FLOCK_MODEL, model)
@@ -1,10 +0,0 @@
1
- """Context variables for Flock."""
2
-
3
- FLOCK_CURRENT_AGENT = "flock.current_agent"
4
- FLOCK_INITIAL_INPUT = "flock.initial_input"
5
- FLOCK_LOCAL_DEBUG = "flock.local_debug"
6
- FLOCK_RUN_ID = "flock.run_id"
7
- FLOCK_LAST_AGENT = "flock.last_agent"
8
- FLOCK_LAST_RESULT = "flock.last_result"
9
- FLOCK_MODEL = "flock.model"
10
- FLOCK_BATCH_SILENT_MODE = "flock.batch_silent"
@@ -1,396 +0,0 @@
1
- # src/flock/core/util/evaluation_helpers.py
2
- import inspect
3
- import sys
4
- from collections.abc import Callable
5
- from pathlib import Path
6
- from typing import Any, Union
7
-
8
- import pandas as pd
9
- from box import Box
10
- from datasets import (
11
- Dataset as HFDataset,
12
- get_dataset_config_names,
13
- load_dataset,
14
- )
15
- from opik import Opik
16
- from opik.evaluation import evaluate
17
-
18
- from flock.core.flock import Flock
19
- from flock.core.flock_agent import FlockAgent
20
-
21
- # Legacy FlockEvaluator import removed
22
- from flock.core.logging.logging import get_logger
23
-
24
- # Potentially import metrics libraries like rouge_score, nltk, sentence_transformers
25
-
26
- logger_helpers = get_logger("util.evaluation")
27
-
28
-
29
- def evaluate_with_opik(
30
- dataset: str | Path | list[dict[str, Any]] | pd.DataFrame | HFDataset,
31
- dataset_name: str,
32
- experiment_name: str,
33
- start_agent: FlockAgent | str,
34
- input_mapping: dict[str, str],
35
- answer_mapping: dict[str, str],
36
- metrics: list[
37
- str
38
- | Callable[[Any, Any], bool | float | dict[str, Any]]
39
- | FlockAgent
40
- | FlockEvaluator
41
- ],
42
- ):
43
- df = normalize_dataset(dataset)
44
- client = Opik()
45
- dataset = client.get_or_create_dataset(name=dataset_name)
46
-
47
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
48
-
49
- # Create a single Flock instance outside the task function
50
- shared_flock = Flock(
51
- name="opik_eval", model="azure/gpt-4.1", show_flock_banner=False
52
- )
53
- shared_flock.add_agent(start_agent)
54
-
55
- def evaluation_task(dataset_item):
56
- agent_input = {
57
- value: dataset_item[key] for key, value in input_mapping.items()
58
- }
59
-
60
- # Use the shared Flock instance instead of creating a new one
61
- result_flock = shared_flock.run(
62
- agent=start_agent, input=agent_input, box_result=False
63
- )
64
-
65
- # agent_output = result_flock.get(answer_mapping[key], "No answer found")
66
-
67
- key = next(iter(answer_mapping.keys()))
68
- reference = dataset_item[key]
69
- answer = result_flock.get(answer_mapping[key], "No answer found")
70
-
71
- result = {
72
- "input": agent_input,
73
- "output": answer,
74
- "reference": reference,
75
- }
76
-
77
- return result
78
-
79
- eval_results = evaluate(
80
- experiment_name=experiment_name,
81
- dataset=dataset,
82
- task=evaluation_task,
83
- scoring_metrics=metrics,
84
- )
85
-
86
-
87
- def load_and_merge_all_configs(dataset_name: str) -> pd.DataFrame:
88
- all_configs = get_dataset_config_names(dataset_name)
89
- all_dfs = []
90
-
91
- for config in all_configs:
92
- dataset_dict = load_dataset(dataset_name, config)
93
- for split_name, split_dataset in dataset_dict.items():
94
- df = split_dataset.to_pandas()
95
- df["config"] = config
96
- df["split"] = split_name
97
- all_dfs.append(df)
98
-
99
- merged_df = pd.concat(all_dfs, ignore_index=True)
100
- logger_helpers.info(f"merged_df.head(): {merged_df.head()}")
101
- return merged_df
102
-
103
-
104
- def import_hf_dataset_to_opik(dataset_name: str) -> pd.DataFrame:
105
- df = load_and_merge_all_configs(dataset_name)
106
- logger_helpers.info(
107
- f"type(df): {type(df)}"
108
- ) # ➜ <class 'pandas.core.frame.DataFrame'>
109
- logger_helpers.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
110
- logger_helpers.info(
111
- f"df['split'].value_counts(): {df['split'].value_counts()}"
112
- )
113
- logger_helpers.info(f"df['config'].unique(): {df['config'].unique()}")
114
- client = Opik()
115
- dataset = client.get_or_create_dataset(name=dataset_name)
116
-
117
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
118
- return df
119
-
120
-
121
- def normalize_dataset(dataset: Any) -> pd.DataFrame:
122
- """Converts various dataset formats into a pandas DataFrame."""
123
- if isinstance(dataset, pd.DataFrame):
124
- return dataset.copy()
125
- elif isinstance(dataset, str | Path):
126
- path = Path(dataset)
127
- if not path.exists():
128
- try:
129
- return load_and_merge_all_configs(dataset)
130
- except Exception as e:
131
- raise FileNotFoundError(
132
- f"Dataset file not found: {path}"
133
- ) from e
134
- if path.suffix.lower() == ".csv":
135
- return pd.read_csv(path)
136
- # Add support for json, jsonl etc. if needed
137
- else:
138
- raise ValueError(
139
- f"Unsupported file type for dataset: {path.suffix}"
140
- )
141
- elif isinstance(dataset, list):
142
- if not dataset or not isinstance(dataset[0], dict):
143
- raise ValueError("Dataset list must contain dictionaries.")
144
- return pd.DataFrame(dataset)
145
- elif "datasets" in sys.modules and isinstance(
146
- dataset, sys.modules["datasets"].Dataset
147
- ):
148
- # Requires 'datasets' library to be installed
149
- return dataset.to_pandas()
150
- else:
151
- raise TypeError(f"Unsupported dataset type: {type(dataset)}")
152
-
153
-
154
- def extract_value_by_dot_notation(data: dict | Box, key: str) -> Any:
155
- """Retrieves a value from a nested dictionary or Box object using dot notation."""
156
- if not key:
157
- return None
158
- keys = key.split(".")
159
- value = data
160
- try:
161
- for k in keys:
162
- if isinstance(value, (dict, Box)):
163
- value = value.get(k)
164
- # Add list index handling if needed: e.g., 'results[0].field'
165
- # elif isinstance(value, list) and k.isdigit():
166
- # value = value[int(k)]
167
- else:
168
- return None # Cannot traverse further
169
- if value is None:
170
- return None # Key not found at this level
171
- return value
172
- except (KeyError, IndexError, AttributeError):
173
- return None
174
-
175
-
176
- def calculate_evaluation_metrics(
177
- metrics: list[Union[str, Callable, "FlockAgent", "FlockEvaluator"]],
178
- metric_configs: dict[str, dict[str, Any]],
179
- predicted_answers: dict[str, Any],
180
- expected_answers: dict[str, Any],
181
- agent_inputs: dict[str, Any], # For context
182
- agent_output: Any, # For context
183
- ) -> dict[str, Any]:
184
- """Calculates all specified metrics for a single evaluation item."""
185
- results = {}
186
- for metric in metrics:
187
- metric_name = ""
188
- metric_result = None
189
- try:
190
- if isinstance(metric, str):
191
- metric_name = metric
192
- # Find predicted/expected values relevant to this metric string
193
- # Simple case: metric name matches an answer_mapping key
194
- if (
195
- metric_name in predicted_answers
196
- and metric_name in expected_answers
197
- ):
198
- predicted = predicted_answers[metric_name]
199
- expected = expected_answers[metric_name]
200
- metric_func = _get_metric_function(metric_name)
201
- config = metric_configs.get(metric_name, {})
202
- metric_result = metric_func(predicted, expected, **config)
203
- else:
204
- logger_helpers.warning(
205
- f"Could not find matching predicted/expected values for metric '{metric_name}' based on answer_mapping keys."
206
- )
207
- metric_result = None # Or some error indicator
208
-
209
- elif isinstance(metric, Callable):
210
- metric_name = getattr(metric, "__name__", "custom_function")
211
- # Custom functions might need specific predicted/expected pairs, or all of them
212
- # Let's pass all for flexibility, user function needs to handle it
213
- config = metric_configs.get(metric_name, {})
214
- # Allow passing context if function signature supports it
215
- sig = inspect.signature(metric)
216
- call_kwargs = config.copy()
217
- if "agent_inputs" in sig.parameters:
218
- call_kwargs["agent_inputs"] = agent_inputs
219
- if "agent_output" in sig.parameters:
220
- call_kwargs["agent_output"] = agent_output
221
-
222
- metric_result = metric(
223
- predicted_answers, expected_answers, **call_kwargs
224
- )
225
-
226
- # --- Placeholder for Agent/Evaluator based metrics ---
227
- elif "FlockAgent" in str(
228
- type(metric)
229
- ): # Avoid hard import if possible
230
- metric_name = getattr(metric, "name", "judge_agent")
231
- config = metric_configs.get(metric_name, {})
232
- # Requires running the judge agent - needs async context
233
- # metric_result = asyncio.run(_run_judge_agent(metric, predicted_answers, expected_answers, config))
234
- logger_helpers.warning(
235
- f"Agent-based metric '{metric_name}' execution not implemented in this sketch."
236
- )
237
- metric_result = "[Agent Judge Not Implemented]"
238
-
239
- elif "FlockEvaluator" in str(
240
- type(metric)
241
- ): # Avoid hard import if possible
242
- metric_name = getattr(metric, "name", "judge_evaluator")
243
- config = metric_configs.get(metric_name, {})
244
- # Requires running the evaluator - needs async context
245
- # metric_result = asyncio.run(_run_judge_evaluator(metric, predicted_answers, expected_answers, config))
246
- logger_helpers.warning(
247
- f"Evaluator-based metric '{metric_name}' execution not implemented in this sketch."
248
- )
249
- metric_result = "[Evaluator Judge Not Implemented]"
250
- # --- End Placeholder ---
251
-
252
- else:
253
- logger_helpers.warning(
254
- f"Unsupported metric type: {type(metric)}"
255
- )
256
- continue
257
-
258
- # Store result - handle dict results from metrics
259
- if isinstance(metric_result, dict):
260
- for sub_key, sub_value in metric_result.items():
261
- results[f"{metric_name}_{sub_key}"] = sub_value
262
- else:
263
- results[metric_name] = metric_result
264
-
265
- except Exception as e:
266
- logger_helpers.error(
267
- f"Error calculating metric '{metric_name}': {e}"
268
- )
269
- results[metric_name] = f"[Error: {e}]"
270
-
271
- return results
272
-
273
-
274
- def _get_metric_function(metric_name: str) -> Callable:
275
- """Maps metric names to their implementation functions."""
276
- # Lazy load metric libraries
277
- if metric_name == "exact_match":
278
- return lambda pred, act, **kw: str(pred).strip() == str(act).strip()
279
- elif metric_name == "fuzzy_match":
280
- try:
281
- from thefuzz import fuzz
282
-
283
- return (
284
- lambda pred, act, threshold=85, **kw: fuzz.ratio(
285
- str(pred), str(act)
286
- )
287
- >= threshold
288
- )
289
- except ImportError:
290
- logger_helpers.warning(
291
- "fuzzy_match requires 'thefuzz': pip install thefuzz[speedup]"
292
- )
293
- return lambda p, a, **kw: None
294
- elif metric_name.startswith("rouge"): # rouge_1, rouge_2, rouge_l
295
- try:
296
- from rouge_score import rouge_scorer
297
-
298
- scorer = rouge_scorer.RougeScorer(
299
- [metric_name.replace("_", "")], use_stemmer=True
300
- )
301
-
302
- def calculate_rouge(pred, act, score_type="fmeasure", **kw):
303
- scores = scorer.score(str(act), str(pred))
304
- return (
305
- scores[metric_name.replace("_", "")]
306
- ._asdict()
307
- .get(score_type, 0.0)
308
- )
309
-
310
- return calculate_rouge
311
- except ImportError:
312
- logger_helpers.warning(
313
- "rouge requires 'rouge-score': pip install rouge-score"
314
- )
315
- return lambda p, a, **kw: None
316
- elif metric_name == "semantic_similarity":
317
- try:
318
- from sentence_transformers import SentenceTransformer, util
319
-
320
- # Cache the model? Maybe pass it in via config?
321
- model = SentenceTransformer("all-MiniLM-L6-v2")
322
-
323
- def calculate_similarity(pred, act, **kw):
324
- emb1 = model.encode(str(pred), convert_to_tensor=True)
325
- emb2 = model.encode(str(act), convert_to_tensor=True)
326
- return util.pytorch_cos_sim(emb1, emb2).item()
327
-
328
- return calculate_similarity
329
- except ImportError:
330
- logger_helpers.warning(
331
- "semantic_similarity requires 'sentence-transformers': pip install sentence-transformers"
332
- )
333
- return lambda p, a, **kw: None
334
- # Add bleu, f1 etc.
335
- elif metric_name == "llm_judge":
336
- # This is handled by checking type in calculate_evaluation_metrics
337
- # but we need a placeholder callable here if we map by string first
338
- return lambda p, a, **kw: "[LLM Judge Not Implemented Directly]"
339
- else:
340
- raise ValueError(f"Unknown built-in metric: {metric_name}")
341
-
342
-
343
- def aggregate_results(results_list: list[dict[str, Any]]) -> dict[str, Any]:
344
- """Aggregates evaluation results across all items."""
345
- summary = {"total_items": len(results_list), "errors": 0}
346
- metric_values: dict[str, list[float | bool]] = {}
347
-
348
- for item in results_list:
349
- if item.get("error"):
350
- summary["errors"] += 1
351
- metrics = item.get("metrics", {})
352
- for name, value in metrics.items():
353
- if isinstance(
354
- value, (float, int, bool)
355
- ): # Only aggregate numerics/bools
356
- if name not in metric_values:
357
- metric_values[name] = []
358
- metric_values[name].append(value)
359
-
360
- summary["metrics_summary"] = {}
361
- for name, values in metric_values.items():
362
- if not values:
363
- continue
364
- # Calculate different stats based on value type
365
- if all(isinstance(v, bool) for v in values):
366
- summary["metrics_summary"][name] = {
367
- "accuracy": sum(values) / len(values)
368
- }
369
- elif all(isinstance(v, (int, float)) for v in values):
370
- numeric_values = [v for v in values if isinstance(v, (int, float))]
371
- if numeric_values:
372
- summary["metrics_summary"][name] = {
373
- "mean": sum(numeric_values) / len(numeric_values),
374
- "count": len(numeric_values),
375
- # Add min, max, stddev if needed
376
- }
377
-
378
- return summary
379
-
380
-
381
- # --- Placeholder for async judge execution ---
382
- # Need to run these within the main async context or manage loops carefully
383
- async def _run_judge_agent(judge_agent, predicted, expected, config):
384
- # Prepare input for the judge agent based on its signature
385
- # E.g., judge_input = {"prediction": predicted_value, "reference": expected_value, "criteria": ...}
386
- # judge_result = await judge_agent.run_async(judge_input)
387
- # return judge_result # Or extract specific score/judgement
388
- return "[Agent Judge Not Implemented]"
389
-
390
-
391
- async def _run_judge_evaluator(judge_evaluator, predicted, expected, config):
392
- # Prepare input for the judge evaluator based on its signature
393
- # judge_input = {"prediction": predicted_value, "reference": expected_value, **config}
394
- # judge_result = await judge_evaluator.evaluate(None, judge_input, []) # Agent might not be needed
395
- # return judge_result # Or extract specific score/judgement
396
- return "[Evaluator Judge Not Implemented]"