flock-core 0.5.0b28__py3-none-any.whl → 0.5.56b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (359) hide show
  1. flock/__init__.py +12 -217
  2. flock/agent.py +678 -0
  3. flock/api/themes.py +71 -0
  4. flock/artifacts.py +79 -0
  5. flock/cli.py +75 -0
  6. flock/components.py +173 -0
  7. flock/dashboard/__init__.py +28 -0
  8. flock/dashboard/collector.py +283 -0
  9. flock/dashboard/events.py +182 -0
  10. flock/dashboard/launcher.py +230 -0
  11. flock/dashboard/service.py +537 -0
  12. flock/dashboard/websocket.py +235 -0
  13. flock/engines/__init__.py +6 -0
  14. flock/engines/dspy_engine.py +856 -0
  15. flock/examples.py +128 -0
  16. flock/{core/util → helper}/cli_helper.py +4 -3
  17. flock/{core/logging → logging}/__init__.py +2 -3
  18. flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
  19. flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
  20. flock/{core/logging → logging}/formatters/themed_formatter.py +69 -115
  21. flock/{core/logging → logging}/logging.py +77 -61
  22. flock/{core/logging → logging}/telemetry.py +20 -26
  23. flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
  24. flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +6 -9
  25. flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
  26. flock/{core/logging → logging}/trace_and_logged.py +20 -24
  27. flock/mcp/__init__.py +91 -0
  28. flock/{core/mcp/mcp_client.py → mcp/client.py} +103 -154
  29. flock/{core/mcp/mcp_config.py → mcp/config.py} +62 -117
  30. flock/mcp/manager.py +255 -0
  31. flock/mcp/servers/sse/__init__.py +1 -1
  32. flock/mcp/servers/sse/flock_sse_server.py +11 -53
  33. flock/mcp/servers/stdio/__init__.py +1 -1
  34. flock/mcp/servers/stdio/flock_stdio_server.py +8 -48
  35. flock/mcp/servers/streamable_http/flock_streamable_http_server.py +17 -62
  36. flock/mcp/servers/websockets/flock_websocket_server.py +7 -40
  37. flock/{core/mcp/flock_mcp_tool.py → mcp/tool.py} +16 -26
  38. flock/mcp/types/__init__.py +42 -0
  39. flock/{core/mcp → mcp}/types/callbacks.py +9 -15
  40. flock/{core/mcp → mcp}/types/factories.py +7 -6
  41. flock/{core/mcp → mcp}/types/handlers.py +13 -18
  42. flock/{core/mcp → mcp}/types/types.py +70 -74
  43. flock/{core/mcp → mcp}/util/helpers.py +1 -1
  44. flock/orchestrator.py +645 -0
  45. flock/registry.py +148 -0
  46. flock/runtime.py +262 -0
  47. flock/service.py +140 -0
  48. flock/store.py +69 -0
  49. flock/subscription.py +111 -0
  50. flock/themes/andromeda.toml +1 -1
  51. flock/themes/apple-system-colors.toml +1 -1
  52. flock/themes/arcoiris.toml +1 -1
  53. flock/themes/atomonelight.toml +1 -1
  54. flock/themes/ayu copy.toml +1 -1
  55. flock/themes/ayu-light.toml +1 -1
  56. flock/themes/belafonte-day.toml +1 -1
  57. flock/themes/belafonte-night.toml +1 -1
  58. flock/themes/blulocodark.toml +1 -1
  59. flock/themes/breeze.toml +1 -1
  60. flock/themes/broadcast.toml +1 -1
  61. flock/themes/brogrammer.toml +1 -1
  62. flock/themes/builtin-dark.toml +1 -1
  63. flock/themes/builtin-pastel-dark.toml +1 -1
  64. flock/themes/catppuccin-latte.toml +1 -1
  65. flock/themes/catppuccin-macchiato.toml +1 -1
  66. flock/themes/catppuccin-mocha.toml +1 -1
  67. flock/themes/cga.toml +1 -1
  68. flock/themes/chalk.toml +1 -1
  69. flock/themes/ciapre.toml +1 -1
  70. flock/themes/coffee-theme.toml +1 -1
  71. flock/themes/cyberpunkscarletprotocol.toml +1 -1
  72. flock/themes/dark+.toml +1 -1
  73. flock/themes/darkermatrix.toml +1 -1
  74. flock/themes/darkside.toml +1 -1
  75. flock/themes/desert.toml +1 -1
  76. flock/themes/django.toml +1 -1
  77. flock/themes/djangosmooth.toml +1 -1
  78. flock/themes/doomone.toml +1 -1
  79. flock/themes/dotgov.toml +1 -1
  80. flock/themes/dracula+.toml +1 -1
  81. flock/themes/duckbones.toml +1 -1
  82. flock/themes/encom.toml +1 -1
  83. flock/themes/espresso.toml +1 -1
  84. flock/themes/everblush.toml +1 -1
  85. flock/themes/fairyfloss.toml +1 -1
  86. flock/themes/fideloper.toml +1 -1
  87. flock/themes/fishtank.toml +1 -1
  88. flock/themes/flexoki-light.toml +1 -1
  89. flock/themes/floraverse.toml +1 -1
  90. flock/themes/framer.toml +1 -1
  91. flock/themes/galizur.toml +1 -1
  92. flock/themes/github.toml +1 -1
  93. flock/themes/grass.toml +1 -1
  94. flock/themes/grey-green.toml +1 -1
  95. flock/themes/gruvboxlight.toml +1 -1
  96. flock/themes/guezwhoz.toml +1 -1
  97. flock/themes/harper.toml +1 -1
  98. flock/themes/hax0r-blue.toml +1 -1
  99. flock/themes/hopscotch.256.toml +1 -1
  100. flock/themes/ic-green-ppl.toml +1 -1
  101. flock/themes/iceberg-dark.toml +1 -1
  102. flock/themes/japanesque.toml +1 -1
  103. flock/themes/jubi.toml +1 -1
  104. flock/themes/kibble.toml +1 -1
  105. flock/themes/kolorit.toml +1 -1
  106. flock/themes/kurokula.toml +1 -1
  107. flock/themes/materialdesigncolors.toml +1 -1
  108. flock/themes/matrix.toml +1 -1
  109. flock/themes/mellifluous.toml +1 -1
  110. flock/themes/midnight-in-mojave.toml +1 -1
  111. flock/themes/monokai-remastered.toml +1 -1
  112. flock/themes/monokai-soda.toml +1 -1
  113. flock/themes/neon.toml +1 -1
  114. flock/themes/neopolitan.toml +1 -1
  115. flock/themes/nord-light.toml +1 -1
  116. flock/themes/ocean.toml +1 -1
  117. flock/themes/onehalfdark.toml +1 -1
  118. flock/themes/onehalflight.toml +1 -1
  119. flock/themes/palenighthc.toml +1 -1
  120. flock/themes/paulmillr.toml +1 -1
  121. flock/themes/pencildark.toml +1 -1
  122. flock/themes/pnevma.toml +1 -1
  123. flock/themes/purple-rain.toml +1 -1
  124. flock/themes/purplepeter.toml +1 -1
  125. flock/themes/raycast-dark.toml +1 -1
  126. flock/themes/red-sands.toml +1 -1
  127. flock/themes/relaxed.toml +1 -1
  128. flock/themes/retro.toml +1 -1
  129. flock/themes/rose-pine.toml +1 -1
  130. flock/themes/royal.toml +1 -1
  131. flock/themes/ryuuko.toml +1 -1
  132. flock/themes/sakura.toml +1 -1
  133. flock/themes/scarlet-protocol.toml +1 -1
  134. flock/themes/seoulbones-dark.toml +1 -1
  135. flock/themes/shades-of-purple.toml +1 -1
  136. flock/themes/smyck.toml +1 -1
  137. flock/themes/softserver.toml +1 -1
  138. flock/themes/solarized-darcula.toml +1 -1
  139. flock/themes/square.toml +1 -1
  140. flock/themes/sugarplum.toml +1 -1
  141. flock/themes/thayer-bright.toml +1 -1
  142. flock/themes/tokyonight.toml +1 -1
  143. flock/themes/tomorrow.toml +1 -1
  144. flock/themes/ubuntu.toml +1 -1
  145. flock/themes/ultradark.toml +1 -1
  146. flock/themes/ultraviolent.toml +1 -1
  147. flock/themes/unikitty.toml +1 -1
  148. flock/themes/urple.toml +1 -1
  149. flock/themes/vesper.toml +1 -1
  150. flock/themes/vimbones.toml +1 -1
  151. flock/themes/wildcherry.toml +1 -1
  152. flock/themes/wilmersdorf.toml +1 -1
  153. flock/themes/wryan.toml +1 -1
  154. flock/themes/xcodedarkhc.toml +1 -1
  155. flock/themes/xcodelight.toml +1 -1
  156. flock/themes/zenbones-light.toml +1 -1
  157. flock/themes/zenwritten-dark.toml +1 -1
  158. flock/utilities.py +301 -0
  159. flock/{components/utility → utility}/output_utility_component.py +68 -53
  160. flock/visibility.py +107 -0
  161. flock_core-0.5.56b0.dist-info/METADATA +747 -0
  162. flock_core-0.5.56b0.dist-info/RECORD +398 -0
  163. flock_core-0.5.56b0.dist-info/entry_points.txt +2 -0
  164. {flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/licenses/LICENSE +1 -1
  165. flock/adapter/__init__.py +0 -14
  166. flock/adapter/azure_adapter.py +0 -68
  167. flock/adapter/chroma_adapter.py +0 -73
  168. flock/adapter/faiss_adapter.py +0 -97
  169. flock/adapter/pinecone_adapter.py +0 -51
  170. flock/adapter/vector_base.py +0 -47
  171. flock/cli/assets/release_notes.md +0 -140
  172. flock/cli/config.py +0 -8
  173. flock/cli/constants.py +0 -36
  174. flock/cli/create_agent.py +0 -1
  175. flock/cli/create_flock.py +0 -280
  176. flock/cli/execute_flock.py +0 -620
  177. flock/cli/load_agent.py +0 -1
  178. flock/cli/load_examples.py +0 -1
  179. flock/cli/load_flock.py +0 -192
  180. flock/cli/load_release_notes.py +0 -20
  181. flock/cli/loaded_flock_cli.py +0 -254
  182. flock/cli/manage_agents.py +0 -459
  183. flock/cli/registry_management.py +0 -889
  184. flock/cli/runner.py +0 -41
  185. flock/cli/settings.py +0 -857
  186. flock/cli/utils.py +0 -135
  187. flock/cli/view_results.py +0 -29
  188. flock/cli/yaml_editor.py +0 -396
  189. flock/components/__init__.py +0 -30
  190. flock/components/evaluation/__init__.py +0 -9
  191. flock/components/evaluation/declarative_evaluation_component.py +0 -606
  192. flock/components/routing/__init__.py +0 -15
  193. flock/components/routing/conditional_routing_component.py +0 -494
  194. flock/components/routing/default_routing_component.py +0 -103
  195. flock/components/routing/llm_routing_component.py +0 -206
  196. flock/components/utility/__init__.py +0 -22
  197. flock/components/utility/example_utility_component.py +0 -250
  198. flock/components/utility/feedback_utility_component.py +0 -206
  199. flock/components/utility/memory_utility_component.py +0 -550
  200. flock/components/utility/metrics_utility_component.py +0 -700
  201. flock/config.py +0 -61
  202. flock/core/__init__.py +0 -110
  203. flock/core/agent/__init__.py +0 -16
  204. flock/core/agent/default_agent.py +0 -216
  205. flock/core/agent/flock_agent_components.py +0 -104
  206. flock/core/agent/flock_agent_execution.py +0 -101
  207. flock/core/agent/flock_agent_integration.py +0 -260
  208. flock/core/agent/flock_agent_lifecycle.py +0 -186
  209. flock/core/agent/flock_agent_serialization.py +0 -381
  210. flock/core/api/__init__.py +0 -10
  211. flock/core/api/custom_endpoint.py +0 -45
  212. flock/core/api/endpoints.py +0 -254
  213. flock/core/api/main.py +0 -162
  214. flock/core/api/models.py +0 -97
  215. flock/core/api/run_store.py +0 -224
  216. flock/core/api/runner.py +0 -44
  217. flock/core/api/service.py +0 -214
  218. flock/core/component/__init__.py +0 -15
  219. flock/core/component/agent_component_base.py +0 -309
  220. flock/core/component/evaluation_component.py +0 -62
  221. flock/core/component/routing_component.py +0 -74
  222. flock/core/component/utility_component.py +0 -69
  223. flock/core/config/flock_agent_config.py +0 -58
  224. flock/core/config/scheduled_agent_config.py +0 -40
  225. flock/core/context/context.py +0 -213
  226. flock/core/context/context_manager.py +0 -37
  227. flock/core/context/context_vars.py +0 -10
  228. flock/core/evaluation/utils.py +0 -396
  229. flock/core/execution/batch_executor.py +0 -369
  230. flock/core/execution/evaluation_executor.py +0 -438
  231. flock/core/execution/local_executor.py +0 -31
  232. flock/core/execution/opik_executor.py +0 -103
  233. flock/core/execution/temporal_executor.py +0 -164
  234. flock/core/flock.py +0 -634
  235. flock/core/flock_agent.py +0 -336
  236. flock/core/flock_factory.py +0 -613
  237. flock/core/flock_scheduler.py +0 -166
  238. flock/core/flock_server_manager.py +0 -136
  239. flock/core/interpreter/python_interpreter.py +0 -689
  240. flock/core/mcp/__init__.py +0 -1
  241. flock/core/mcp/flock_mcp_server.py +0 -680
  242. flock/core/mcp/mcp_client_manager.py +0 -201
  243. flock/core/mcp/types/__init__.py +0 -1
  244. flock/core/mixin/dspy_integration.py +0 -403
  245. flock/core/mixin/prompt_parser.py +0 -125
  246. flock/core/orchestration/__init__.py +0 -15
  247. flock/core/orchestration/flock_batch_processor.py +0 -94
  248. flock/core/orchestration/flock_evaluator.py +0 -113
  249. flock/core/orchestration/flock_execution.py +0 -295
  250. flock/core/orchestration/flock_initialization.py +0 -149
  251. flock/core/orchestration/flock_server_manager.py +0 -67
  252. flock/core/orchestration/flock_web_server.py +0 -117
  253. flock/core/registry/__init__.py +0 -45
  254. flock/core/registry/agent_registry.py +0 -69
  255. flock/core/registry/callable_registry.py +0 -139
  256. flock/core/registry/component_discovery.py +0 -142
  257. flock/core/registry/component_registry.py +0 -64
  258. flock/core/registry/config_mapping.py +0 -64
  259. flock/core/registry/decorators.py +0 -137
  260. flock/core/registry/registry_hub.py +0 -205
  261. flock/core/registry/server_registry.py +0 -57
  262. flock/core/registry/type_registry.py +0 -86
  263. flock/core/serialization/__init__.py +0 -13
  264. flock/core/serialization/callable_registry.py +0 -52
  265. flock/core/serialization/flock_serializer.py +0 -832
  266. flock/core/serialization/json_encoder.py +0 -41
  267. flock/core/serialization/secure_serializer.py +0 -175
  268. flock/core/serialization/serializable.py +0 -342
  269. flock/core/serialization/serialization_utils.py +0 -412
  270. flock/core/util/file_path_utils.py +0 -223
  271. flock/core/util/hydrator.py +0 -309
  272. flock/core/util/input_resolver.py +0 -164
  273. flock/core/util/loader.py +0 -59
  274. flock/core/util/splitter.py +0 -219
  275. flock/di.py +0 -27
  276. flock/platform/docker_tools.py +0 -49
  277. flock/platform/jaeger_install.py +0 -86
  278. flock/webapp/__init__.py +0 -1
  279. flock/webapp/app/__init__.py +0 -0
  280. flock/webapp/app/api/__init__.py +0 -0
  281. flock/webapp/app/api/agent_management.py +0 -241
  282. flock/webapp/app/api/execution.py +0 -709
  283. flock/webapp/app/api/flock_management.py +0 -129
  284. flock/webapp/app/api/registry_viewer.py +0 -30
  285. flock/webapp/app/chat.py +0 -665
  286. flock/webapp/app/config.py +0 -104
  287. flock/webapp/app/dependencies.py +0 -117
  288. flock/webapp/app/main.py +0 -1070
  289. flock/webapp/app/middleware.py +0 -113
  290. flock/webapp/app/models_ui.py +0 -7
  291. flock/webapp/app/services/__init__.py +0 -0
  292. flock/webapp/app/services/feedback_file_service.py +0 -363
  293. flock/webapp/app/services/flock_service.py +0 -337
  294. flock/webapp/app/services/sharing_models.py +0 -81
  295. flock/webapp/app/services/sharing_store.py +0 -762
  296. flock/webapp/app/templates/theme_mapper.html +0 -326
  297. flock/webapp/app/theme_mapper.py +0 -812
  298. flock/webapp/app/utils.py +0 -85
  299. flock/webapp/run.py +0 -215
  300. flock/webapp/static/css/chat.css +0 -301
  301. flock/webapp/static/css/components.css +0 -167
  302. flock/webapp/static/css/header.css +0 -39
  303. flock/webapp/static/css/layout.css +0 -46
  304. flock/webapp/static/css/sidebar.css +0 -127
  305. flock/webapp/static/css/two-pane.css +0 -48
  306. flock/webapp/templates/base.html +0 -200
  307. flock/webapp/templates/chat.html +0 -152
  308. flock/webapp/templates/chat_settings.html +0 -19
  309. flock/webapp/templates/flock_editor.html +0 -16
  310. flock/webapp/templates/index.html +0 -12
  311. flock/webapp/templates/partials/_agent_detail_form.html +0 -93
  312. flock/webapp/templates/partials/_agent_list.html +0 -18
  313. flock/webapp/templates/partials/_agent_manager_view.html +0 -51
  314. flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
  315. flock/webapp/templates/partials/_chat_container.html +0 -15
  316. flock/webapp/templates/partials/_chat_messages.html +0 -57
  317. flock/webapp/templates/partials/_chat_settings_form.html +0 -85
  318. flock/webapp/templates/partials/_create_flock_form.html +0 -50
  319. flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
  320. flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
  321. flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
  322. flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
  323. flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
  324. flock/webapp/templates/partials/_env_vars_table.html +0 -23
  325. flock/webapp/templates/partials/_execution_form.html +0 -118
  326. flock/webapp/templates/partials/_execution_view_container.html +0 -28
  327. flock/webapp/templates/partials/_flock_file_list.html +0 -23
  328. flock/webapp/templates/partials/_flock_properties_form.html +0 -52
  329. flock/webapp/templates/partials/_flock_upload_form.html +0 -16
  330. flock/webapp/templates/partials/_header_flock_status.html +0 -5
  331. flock/webapp/templates/partials/_load_manager_view.html +0 -49
  332. flock/webapp/templates/partials/_registry_table.html +0 -25
  333. flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
  334. flock/webapp/templates/partials/_results_display.html +0 -78
  335. flock/webapp/templates/partials/_settings_env_content.html +0 -9
  336. flock/webapp/templates/partials/_settings_theme_content.html +0 -14
  337. flock/webapp/templates/partials/_settings_view.html +0 -36
  338. flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
  339. flock/webapp/templates/partials/_share_link_snippet.html +0 -35
  340. flock/webapp/templates/partials/_sidebar.html +0 -74
  341. flock/webapp/templates/partials/_streaming_results_container.html +0 -195
  342. flock/webapp/templates/partials/_structured_data_view.html +0 -40
  343. flock/webapp/templates/partials/_theme_preview.html +0 -36
  344. flock/webapp/templates/registry_viewer.html +0 -84
  345. flock/webapp/templates/shared_run_page.html +0 -140
  346. flock/workflow/__init__.py +0 -0
  347. flock/workflow/activities.py +0 -196
  348. flock/workflow/agent_activities.py +0 -24
  349. flock/workflow/agent_execution_activity.py +0 -202
  350. flock/workflow/flock_workflow.py +0 -214
  351. flock/workflow/temporal_config.py +0 -96
  352. flock/workflow/temporal_setup.py +0 -68
  353. flock_core-0.5.0b28.dist-info/METADATA +0 -274
  354. flock_core-0.5.0b28.dist-info/RECORD +0 -561
  355. flock_core-0.5.0b28.dist-info/entry_points.txt +0 -2
  356. /flock/{core/logging → logging}/formatters/themes.py +0 -0
  357. /flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
  358. /flock/{core/mcp → mcp}/util/__init__.py +0 -0
  359. {flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/WHEEL +0 -0
@@ -1,438 +0,0 @@
1
- # src/flock/core/execution/evaluation_processor.py
2
- """Contains the EvaluationProcessor class responsible for evaluating Flock agents
3
- against datasets using various metrics.
4
- """
5
-
6
- import asyncio
7
- import json
8
- from collections.abc import Callable
9
- from pathlib import Path
10
- from typing import (
11
- TYPE_CHECKING,
12
- Any,
13
- Literal,
14
- Union,
15
- )
16
-
17
- from pandas import DataFrame
18
-
19
- # Conditional pandas import
20
- try:
21
- import pandas as pd
22
-
23
- PANDAS_AVAILABLE = True
24
- except ImportError:
25
- pd = None # type: ignore
26
- PANDAS_AVAILABLE = False
27
-
28
- # Box for results
29
- from box import Box
30
- from datasets import Dataset as HFDataset
31
-
32
- from flock.core.evaluation.utils import (
33
- aggregate_results,
34
- calculate_evaluation_metrics,
35
- extract_value_by_dot_notation,
36
- normalize_dataset,
37
- # Import metric calculation/aggregation helpers
38
- )
39
-
40
- # Flock core imports
41
- from flock.core.logging.logging import get_logger
42
-
43
- if TYPE_CHECKING:
44
- from flock.core.flock import Flock
45
- from flock.core.flock_agent import FlockAgent
46
- # Legacy FlockEvaluator import removed
47
- # Conditional types
48
-
49
-
50
- logger = get_logger("execution.evaluation")
51
-
52
-
53
- class EvaluationExecutor:
54
- """Handles the evaluation of Flock agents against datasets."""
55
-
56
- def __init__(self, flock_instance: "Flock"):
57
- """Initializes the EvaluationProcessor.
58
-
59
- Args:
60
- flock_instance: The Flock instance this processor will use.
61
- """
62
- self.flock = flock_instance
63
-
64
- async def evaluate_async(
65
- self,
66
- dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
67
- start_agent: Union["FlockAgent", str],
68
- input_mapping: dict[str, str],
69
- answer_mapping: dict[str, str],
70
- metrics: list[
71
- Union[
72
- str,
73
- Callable[[Any, Any], bool | float | dict[str, Any]],
74
- "FlockAgent",
75
- "FlockEvaluator",
76
- ]
77
- ],
78
- metric_configs: dict[str, dict[str, Any]] | None = None,
79
- static_inputs: dict[str, Any] | None = None,
80
- parallel: bool = True,
81
- max_workers: int = 5,
82
- use_temporal: bool | None = None,
83
- error_handling: Literal["raise", "skip", "log"] = "log",
84
- output_file: str | Path | None = None,
85
- return_dataframe: bool = True,
86
- silent_mode: bool = False,
87
- metadata_columns: list[str] | None = None, # Columns to pass through
88
- # dataset_split: Optional[str] = None # TODO: Add split support in normalize_dataset
89
- ) -> DataFrame | list[dict[str, Any]]:
90
- """Evaluates the Flock's performance against a dataset asynchronously."""
91
- effective_use_temporal = (
92
- use_temporal
93
- if use_temporal is not None
94
- else self.flock.enable_temporal
95
- )
96
- exec_mode = (
97
- "Temporal"
98
- if effective_use_temporal
99
- else ("Parallel Local" if parallel else "Sequential Local")
100
- )
101
- start_agent_name = (
102
- start_agent.name if hasattr(start_agent, "name") else start_agent
103
- )
104
- logger.info(
105
- f"Starting evaluation for agent '{start_agent_name}'. Execution: {exec_mode}, Silent: {silent_mode}"
106
- )
107
-
108
- # --- 1. Normalize Dataset ---
109
- try:
110
- df = normalize_dataset(dataset) # Uses helper
111
- if df is None or df.empty:
112
- raise ValueError(
113
- "Provided dataset is empty or could not be normalized."
114
- )
115
- logger.info(f"Normalized dataset with {len(df)} items.")
116
- except Exception as e:
117
- logger.error(
118
- f"Failed to load or normalize dataset: {e}", exc_info=True
119
- )
120
- raise ValueError(f"Dataset processing failed: {e}") from e
121
-
122
- # --- 2. Prepare Batch Items ---
123
- batch_items = []
124
- required_input_cols = list(input_mapping.keys())
125
- required_answer_cols = list(answer_mapping.values())
126
- required_metadata_cols = metadata_columns or []
127
- all_required_cols = set(
128
- required_input_cols + required_answer_cols + required_metadata_cols
129
- )
130
-
131
- missing_cols = all_required_cols - set(df.columns)
132
- if missing_cols:
133
- raise ValueError(
134
- f"Dataset missing required columns: {', '.join(missing_cols)}"
135
- )
136
-
137
- for index, row in df.iterrows():
138
- agent_input = {
139
- agent_key: row[df_col]
140
- for df_col, agent_key in input_mapping.items()
141
- }
142
- expected_answers = {
143
- agent_out_key: row[answer_col]
144
- for agent_out_key, answer_col in answer_mapping.items()
145
- }
146
- metadata = {col: row[col] for col in required_metadata_cols}
147
- batch_items.append(
148
- {
149
- "_original_index": index, # Store original DF index
150
- "_agent_input": agent_input,
151
- "_expected_answers": expected_answers,
152
- "_metadata": metadata,
153
- }
154
- )
155
-
156
- if not batch_items:
157
- logger.warning("No items prepared for evaluation.")
158
- return pd.DataFrame() if return_dataframe else []
159
-
160
- # --- 3. Execute Workers ---
161
- results_dict = {} # Store results keyed by original index
162
- tasks = []
163
- semaphore = asyncio.Semaphore(
164
- max_workers if parallel and not effective_use_temporal else 1
165
- )
166
-
167
- # --- Worker Function ---
168
- async def evaluate_worker(item_index: int, item_data: dict[str, Any]):
169
- nonlocal results_dict
170
- original_index = item_data["_original_index"]
171
- item_result_details = {
172
- "index": original_index, # Use original index in result
173
- "inputs": item_data["_agent_input"],
174
- "expected_answers": item_data["_expected_answers"],
175
- "agent_output": None,
176
- "metrics": {},
177
- "error": None,
178
- **(item_data["_metadata"]), # Include pass-through metadata
179
- }
180
- agent_inputs_with_static = {
181
- **(static_inputs or {}),
182
- **item_data["_agent_input"],
183
- }
184
-
185
- async with semaphore: # Acquire semaphore
186
- run_desc = f"Evaluation item (original index: {original_index})"
187
- logger.debug(f"{run_desc} starting.")
188
- try:
189
- # Run the agent/flock for this item
190
- agent_output = await self.flock.run_async(
191
- agent=start_agent, # Name or instance
192
- input=agent_inputs_with_static,
193
- box_result=True, # Use Box for easier access via dot notation
194
- # context=... # Assuming isolated context for now
195
- )
196
- item_result_details["agent_output"] = (
197
- agent_output # Store Box or dict
198
- )
199
-
200
- # Extract predicted values based on answer_mapping
201
- predicted_answers = {}
202
- for agent_out_key in answer_mapping:
203
- # Use helper to handle dot notation
204
- predicted_answers[agent_out_key] = (
205
- extract_value_by_dot_notation(
206
- agent_output, agent_out_key
207
- )
208
- )
209
-
210
- # Calculate metrics using helper
211
- item_result_details["metrics"] = (
212
- calculate_evaluation_metrics(
213
- metrics=metrics,
214
- metric_configs=metric_configs or {},
215
- predicted_answers=predicted_answers,
216
- expected_answers=item_data["_expected_answers"],
217
- agent_inputs=agent_inputs_with_static, # Pass context if needed
218
- agent_output=agent_output, # Pass context if needed
219
- )
220
- )
221
- logger.debug(f"{run_desc} finished successfully.")
222
-
223
- except Exception as e:
224
- logger.warning(
225
- f"Error processing item {original_index}: {e}"
226
- )
227
- item_result_details["error"] = str(e)
228
- if error_handling == "raise":
229
- raise # Re-raise to stop processing (if parallel, stops gather)
230
- elif error_handling == "skip":
231
- item_result_details["_skip"] = (
232
- True # Mark for filtering
233
- )
234
-
235
- # Store result associated with original index
236
- results_dict[original_index] = item_result_details
237
-
238
- # Update progress bar if applicable (inside the worker is okay)
239
- if progress_context:
240
- progress.update(progress_task_id, advance=1)
241
-
242
- # --- Setup Progress Bar if Silent ---
243
- progress_context = None
244
- progress_task_id = None
245
- if silent_mode:
246
- from rich.progress import (
247
- BarColumn,
248
- Progress,
249
- SpinnerColumn,
250
- TextColumn,
251
- TimeElapsedColumn,
252
- )
253
-
254
- progress = Progress(
255
- SpinnerColumn(),
256
- TextColumn("[progress.description]{task.description}"),
257
- BarColumn(),
258
- TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
259
- TextColumn("({task.completed}/{task.total})"),
260
- TimeElapsedColumn(),
261
- )
262
- progress_context = progress
263
- progress_task_id = progress.add_task(
264
- f"Evaluating {len(batch_items)} items...",
265
- total=len(batch_items),
266
- )
267
- progress.start()
268
-
269
- # --- Execute Tasks ---
270
- try:
271
- if effective_use_temporal:
272
- # TODO: Implement parallel Temporal evaluation
273
- logger.info(
274
- "Running evaluation using Temporal (executing sequentially for now)..."
275
- )
276
- for i, item_data in enumerate(batch_items):
277
- await evaluate_worker(i, item_data) # Pass sequential index
278
- elif parallel:
279
- logger.info(
280
- f"Running evaluation in parallel with max_workers={max_workers}..."
281
- )
282
- for i, item_data in enumerate(batch_items):
283
- # Pass sequential index i, and the item_data which contains original_index
284
- tasks.append(
285
- asyncio.create_task(evaluate_worker(i, item_data))
286
- )
287
- await asyncio.gather(*tasks)
288
- else: # Sequential Local
289
- logger.info("Running evaluation sequentially...")
290
- for i, item_data in enumerate(batch_items):
291
- await evaluate_worker(i, item_data)
292
-
293
- logger.info("Evaluation execution finished.")
294
-
295
- except Exception as batch_error:
296
- logger.error(
297
- f"Evaluation stopped due to an error in one of the items: {batch_error}"
298
- )
299
- if (
300
- not error_handling == "skip"
301
- ): # If skipping, we continue; otherwise, re-raise if required
302
- if error_handling == "raise":
303
- raise
304
- finally:
305
- if progress_context:
306
- progress.stop()
307
-
308
- # --- 4. Process Results ---
309
- # Reconstruct results list based on original order and filtering
310
- final_results_list = []
311
- for idx in df.index: # Iterate through original DataFrame index
312
- res = results_dict.get(idx)
313
- if res:
314
- if error_handling == "skip" and res.get("_skip"):
315
- continue # Skip items marked for skipping
316
- # Remove internal skip flag if present
317
- res.pop("_skip", None)
318
- final_results_list.append(res)
319
-
320
- # Calculate aggregate summary using helper
321
- summary = aggregate_results(final_results_list)
322
- logger.info(
323
- "Evaluation Summary:", extra=summary
324
- ) # Log summary automatically
325
-
326
- # --- 5. Save and Return ---
327
- if output_file:
328
- output_path = Path(output_file)
329
- output_path.parent.mkdir(parents=True, exist_ok=True)
330
- try:
331
- results_df = pd.DataFrame(final_results_list)
332
- # Handle complex objects before saving
333
- if "agent_output" in results_df.columns:
334
- results_df["agent_output"] = results_df[
335
- "agent_output"
336
- ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
337
- if (
338
- "expected_answers" in results_df.columns
339
- ): # Flatten dicts for CSV
340
- results_df = pd.concat(
341
- [
342
- results_df.drop(["expected_answers"], axis=1),
343
- pd.json_normalize(
344
- results_df["expected_answers"]
345
- ).add_prefix("expected_"),
346
- ],
347
- axis=1,
348
- )
349
- if "metrics" in results_df.columns: # Flatten dicts for CSV
350
- results_df = pd.concat(
351
- [
352
- results_df.drop(["metrics"], axis=1),
353
- pd.json_normalize(results_df["metrics"]).add_prefix(
354
- "metric_"
355
- ),
356
- ],
357
- axis=1,
358
- )
359
- if "inputs" in results_df.columns: # Flatten dicts for CSV
360
- results_df = pd.concat(
361
- [
362
- results_df.drop(["inputs"], axis=1),
363
- pd.json_normalize(results_df["inputs"]).add_prefix(
364
- "input_"
365
- ),
366
- ],
367
- axis=1,
368
- )
369
-
370
- # Convert lists/dicts in metadata columns for CSV saving
371
- for col in metadata_columns or []:
372
- if col in results_df.columns:
373
- # Check if column contains lists/dicts before converting
374
- if (
375
- results_df[col]
376
- .apply(lambda x: isinstance(x, (list, dict)))
377
- .any()
378
- ):
379
- results_df[col] = results_df[col].apply(json.dumps)
380
-
381
- if output_path.suffix.lower() == ".csv":
382
- results_df.to_csv(output_path, index=False)
383
- elif output_path.suffix.lower() == ".json":
384
- # Save list of dicts directly (before potential DataFrame manipulation)
385
- # Need to handle non-serializable types like Box
386
- serializable_results = []
387
- for res_dict in final_results_list:
388
- if "agent_output" in res_dict and isinstance(
389
- res_dict["agent_output"], Box
390
- ):
391
- res_dict["agent_output"] = res_dict[
392
- "agent_output"
393
- ].to_dict()
394
- serializable_results.append(res_dict)
395
- with open(output_path, "w", encoding="utf-8") as f:
396
- json.dump(
397
- serializable_results, f, indent=2, default=str
398
- ) # Use default=str for safety
399
- else:
400
- logger.warning(
401
- f"Unsupported output file format: {output_path.suffix}. Use .csv or .json."
402
- )
403
- logger.info(
404
- f"Detailed evaluation results saved to {output_path}"
405
- )
406
- except Exception as e:
407
- logger.error(
408
- f"Failed to save evaluation results to {output_file}: {e}",
409
- exc_info=True,
410
- )
411
-
412
- if return_dataframe:
413
- if not PANDAS_AVAILABLE:
414
- logger.error(
415
- "Cannot return DataFrame: pandas library not installed."
416
- )
417
- return final_results_list # Fallback to list
418
- # Ensure DataFrame is created if not done for saving
419
- if "results_df" not in locals():
420
- results_df = pd.DataFrame(final_results_list)
421
- # Convert Box if needed
422
- if "agent_output" in results_df.columns:
423
- results_df["agent_output"] = results_df[
424
- "agent_output"
425
- ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
426
- return results_df
427
- else:
428
- # Ensure Box objects are converted if returning list
429
- final_list = []
430
- for res_dict in final_results_list:
431
- if "agent_output" in res_dict and isinstance(
432
- res_dict["agent_output"], Box
433
- ):
434
- res_dict["agent_output"] = res_dict[
435
- "agent_output"
436
- ].to_dict()
437
- final_list.append(res_dict)
438
- return final_list
@@ -1,31 +0,0 @@
1
- # src/flock/core/execution/local_executor.py
2
- from flock.core.context.context import FlockContext
3
- from flock.core.logging.logging import get_logger
4
- from flock.workflow.activities import (
5
- run_agent, # This should be the local activity function
6
- )
7
-
8
- logger = get_logger("flock")
9
-
10
-
11
- async def run_local_workflow(
12
- context: FlockContext, box_result: bool = True
13
- ) -> dict:
14
- """Execute the agent workflow locally (for debugging).
15
-
16
- Args:
17
- context: The FlockContext instance with state and history.
18
- output_formatter: Formatter options for displaying results.
19
- box_result: If True, wraps the result in a Box for nicer display.
20
-
21
- Returns:
22
- A dictionary containing the workflow result.
23
- """
24
- logger.info("Running local debug workflow")
25
- result = await run_agent(context)
26
- if box_result:
27
- from box import Box
28
-
29
- logger.debug("Boxing result")
30
- return Box(result)
31
- return result
@@ -1,103 +0,0 @@
1
- # src/flock/core/execution/evaluation_processor.py
2
- """Contains the EvaluationProcessor class responsible for evaluating Flock agents
3
- against datasets using various metrics.
4
- """
5
-
6
- from pathlib import Path
7
- from typing import (
8
- TYPE_CHECKING,
9
- Any,
10
- Union,
11
- )
12
-
13
- from opik import Opik
14
- from pandas import DataFrame
15
-
16
- # Conditional pandas import
17
- try:
18
- import pandas as pd
19
-
20
- PANDAS_AVAILABLE = True
21
- except ImportError:
22
- pd = None # type: ignore
23
- PANDAS_AVAILABLE = False
24
-
25
- # Box for results
26
- from datasets import Dataset as HFDataset
27
-
28
- from flock.core.evaluation.utils import (
29
- normalize_dataset,
30
- # Import metric calculation/aggregation helpers
31
- )
32
-
33
- # Flock core imports
34
- from flock.core.logging.logging import get_logger
35
-
36
- if TYPE_CHECKING:
37
- from flock.core.flock import Flock
38
- from flock.core.flock_agent import FlockAgent
39
- # Conditional types
40
-
41
-
42
- logger = get_logger("execution.opik")
43
-
44
-
45
- class OpikExecutor:
46
- """Handles the evaluation of Flock agents against datasets."""
47
-
48
- def __init__(self, flock_instance: "Flock"):
49
- """Initializes the EvaluationProcessor.
50
-
51
- Args:
52
- flock_instance: The Flock instance this processor will use.
53
- """
54
- self.flock = flock_instance
55
-
56
- async def evaluate_with_opik(
57
- self,
58
- dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
59
- start_agent: Union["FlockAgent", str],
60
- input_mapping: dict[str, str],
61
- answer_mapping: dict[str, str],) -> DataFrame | list[dict[str, Any]]:
62
- """Evaluates the Flock's performance against a dataset asynchronously."""
63
- logger.info(f"Evaluating Flock's performance against dataset: {dataset}")
64
-
65
- # Evaluation task
66
- def evaluation_task(dataset_item):
67
- flock_result = self.flock.run(agent=start_agent, input=dataset_item, box_result=False)
68
-
69
- result = {
70
- "input": dataset_item.get("test"),
71
- "output": flock_result.get("answer"),
72
- "context": ["placeholder string"]
73
- }
74
-
75
- return result
76
-
77
- start_agent_name = (
78
- start_agent.name if hasattr(start_agent, "name") else start_agent
79
- )
80
- dataset_name = str(dataset)
81
-
82
- # --- 1. Normalize Dataset ---
83
- try:
84
- df = normalize_dataset(dataset) # Uses helper
85
- if df is None or df.empty:
86
- raise ValueError(
87
- "Provided dataset is empty or could not be normalized."
88
- )
89
- logger.info(f"Normalized dataset with {len(df)} items.")
90
- except Exception as e:
91
- logger.error(
92
- f"Failed to load or normalize dataset: {e}", exc_info=True
93
- )
94
- raise ValueError(f"Dataset processing failed: {e}") from e
95
-
96
- logger.info(f"type(df): {type(df)}") # ➜ <class 'pandas.core.frame.DataFrame'>
97
- logger.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
98
- logger.info(f"df['split'].value_counts(): {df['split'].value_counts()}")
99
- logger.info(f"df['config'].unique(): {df['config'].unique()}")
100
- client = Opik()
101
- dataset = client.get_or_create_dataset(name=dataset_name)
102
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
103
- logger.info(f"Imported dataset to Opik")