flock-core 0.5.0b28__py3-none-any.whl → 0.5.56b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (359) hide show
  1. flock/__init__.py +12 -217
  2. flock/agent.py +678 -0
  3. flock/api/themes.py +71 -0
  4. flock/artifacts.py +79 -0
  5. flock/cli.py +75 -0
  6. flock/components.py +173 -0
  7. flock/dashboard/__init__.py +28 -0
  8. flock/dashboard/collector.py +283 -0
  9. flock/dashboard/events.py +182 -0
  10. flock/dashboard/launcher.py +230 -0
  11. flock/dashboard/service.py +537 -0
  12. flock/dashboard/websocket.py +235 -0
  13. flock/engines/__init__.py +6 -0
  14. flock/engines/dspy_engine.py +856 -0
  15. flock/examples.py +128 -0
  16. flock/{core/util → helper}/cli_helper.py +4 -3
  17. flock/{core/logging → logging}/__init__.py +2 -3
  18. flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
  19. flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
  20. flock/{core/logging → logging}/formatters/themed_formatter.py +69 -115
  21. flock/{core/logging → logging}/logging.py +77 -61
  22. flock/{core/logging → logging}/telemetry.py +20 -26
  23. flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
  24. flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +6 -9
  25. flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
  26. flock/{core/logging → logging}/trace_and_logged.py +20 -24
  27. flock/mcp/__init__.py +91 -0
  28. flock/{core/mcp/mcp_client.py → mcp/client.py} +103 -154
  29. flock/{core/mcp/mcp_config.py → mcp/config.py} +62 -117
  30. flock/mcp/manager.py +255 -0
  31. flock/mcp/servers/sse/__init__.py +1 -1
  32. flock/mcp/servers/sse/flock_sse_server.py +11 -53
  33. flock/mcp/servers/stdio/__init__.py +1 -1
  34. flock/mcp/servers/stdio/flock_stdio_server.py +8 -48
  35. flock/mcp/servers/streamable_http/flock_streamable_http_server.py +17 -62
  36. flock/mcp/servers/websockets/flock_websocket_server.py +7 -40
  37. flock/{core/mcp/flock_mcp_tool.py → mcp/tool.py} +16 -26
  38. flock/mcp/types/__init__.py +42 -0
  39. flock/{core/mcp → mcp}/types/callbacks.py +9 -15
  40. flock/{core/mcp → mcp}/types/factories.py +7 -6
  41. flock/{core/mcp → mcp}/types/handlers.py +13 -18
  42. flock/{core/mcp → mcp}/types/types.py +70 -74
  43. flock/{core/mcp → mcp}/util/helpers.py +1 -1
  44. flock/orchestrator.py +645 -0
  45. flock/registry.py +148 -0
  46. flock/runtime.py +262 -0
  47. flock/service.py +140 -0
  48. flock/store.py +69 -0
  49. flock/subscription.py +111 -0
  50. flock/themes/andromeda.toml +1 -1
  51. flock/themes/apple-system-colors.toml +1 -1
  52. flock/themes/arcoiris.toml +1 -1
  53. flock/themes/atomonelight.toml +1 -1
  54. flock/themes/ayu copy.toml +1 -1
  55. flock/themes/ayu-light.toml +1 -1
  56. flock/themes/belafonte-day.toml +1 -1
  57. flock/themes/belafonte-night.toml +1 -1
  58. flock/themes/blulocodark.toml +1 -1
  59. flock/themes/breeze.toml +1 -1
  60. flock/themes/broadcast.toml +1 -1
  61. flock/themes/brogrammer.toml +1 -1
  62. flock/themes/builtin-dark.toml +1 -1
  63. flock/themes/builtin-pastel-dark.toml +1 -1
  64. flock/themes/catppuccin-latte.toml +1 -1
  65. flock/themes/catppuccin-macchiato.toml +1 -1
  66. flock/themes/catppuccin-mocha.toml +1 -1
  67. flock/themes/cga.toml +1 -1
  68. flock/themes/chalk.toml +1 -1
  69. flock/themes/ciapre.toml +1 -1
  70. flock/themes/coffee-theme.toml +1 -1
  71. flock/themes/cyberpunkscarletprotocol.toml +1 -1
  72. flock/themes/dark+.toml +1 -1
  73. flock/themes/darkermatrix.toml +1 -1
  74. flock/themes/darkside.toml +1 -1
  75. flock/themes/desert.toml +1 -1
  76. flock/themes/django.toml +1 -1
  77. flock/themes/djangosmooth.toml +1 -1
  78. flock/themes/doomone.toml +1 -1
  79. flock/themes/dotgov.toml +1 -1
  80. flock/themes/dracula+.toml +1 -1
  81. flock/themes/duckbones.toml +1 -1
  82. flock/themes/encom.toml +1 -1
  83. flock/themes/espresso.toml +1 -1
  84. flock/themes/everblush.toml +1 -1
  85. flock/themes/fairyfloss.toml +1 -1
  86. flock/themes/fideloper.toml +1 -1
  87. flock/themes/fishtank.toml +1 -1
  88. flock/themes/flexoki-light.toml +1 -1
  89. flock/themes/floraverse.toml +1 -1
  90. flock/themes/framer.toml +1 -1
  91. flock/themes/galizur.toml +1 -1
  92. flock/themes/github.toml +1 -1
  93. flock/themes/grass.toml +1 -1
  94. flock/themes/grey-green.toml +1 -1
  95. flock/themes/gruvboxlight.toml +1 -1
  96. flock/themes/guezwhoz.toml +1 -1
  97. flock/themes/harper.toml +1 -1
  98. flock/themes/hax0r-blue.toml +1 -1
  99. flock/themes/hopscotch.256.toml +1 -1
  100. flock/themes/ic-green-ppl.toml +1 -1
  101. flock/themes/iceberg-dark.toml +1 -1
  102. flock/themes/japanesque.toml +1 -1
  103. flock/themes/jubi.toml +1 -1
  104. flock/themes/kibble.toml +1 -1
  105. flock/themes/kolorit.toml +1 -1
  106. flock/themes/kurokula.toml +1 -1
  107. flock/themes/materialdesigncolors.toml +1 -1
  108. flock/themes/matrix.toml +1 -1
  109. flock/themes/mellifluous.toml +1 -1
  110. flock/themes/midnight-in-mojave.toml +1 -1
  111. flock/themes/monokai-remastered.toml +1 -1
  112. flock/themes/monokai-soda.toml +1 -1
  113. flock/themes/neon.toml +1 -1
  114. flock/themes/neopolitan.toml +1 -1
  115. flock/themes/nord-light.toml +1 -1
  116. flock/themes/ocean.toml +1 -1
  117. flock/themes/onehalfdark.toml +1 -1
  118. flock/themes/onehalflight.toml +1 -1
  119. flock/themes/palenighthc.toml +1 -1
  120. flock/themes/paulmillr.toml +1 -1
  121. flock/themes/pencildark.toml +1 -1
  122. flock/themes/pnevma.toml +1 -1
  123. flock/themes/purple-rain.toml +1 -1
  124. flock/themes/purplepeter.toml +1 -1
  125. flock/themes/raycast-dark.toml +1 -1
  126. flock/themes/red-sands.toml +1 -1
  127. flock/themes/relaxed.toml +1 -1
  128. flock/themes/retro.toml +1 -1
  129. flock/themes/rose-pine.toml +1 -1
  130. flock/themes/royal.toml +1 -1
  131. flock/themes/ryuuko.toml +1 -1
  132. flock/themes/sakura.toml +1 -1
  133. flock/themes/scarlet-protocol.toml +1 -1
  134. flock/themes/seoulbones-dark.toml +1 -1
  135. flock/themes/shades-of-purple.toml +1 -1
  136. flock/themes/smyck.toml +1 -1
  137. flock/themes/softserver.toml +1 -1
  138. flock/themes/solarized-darcula.toml +1 -1
  139. flock/themes/square.toml +1 -1
  140. flock/themes/sugarplum.toml +1 -1
  141. flock/themes/thayer-bright.toml +1 -1
  142. flock/themes/tokyonight.toml +1 -1
  143. flock/themes/tomorrow.toml +1 -1
  144. flock/themes/ubuntu.toml +1 -1
  145. flock/themes/ultradark.toml +1 -1
  146. flock/themes/ultraviolent.toml +1 -1
  147. flock/themes/unikitty.toml +1 -1
  148. flock/themes/urple.toml +1 -1
  149. flock/themes/vesper.toml +1 -1
  150. flock/themes/vimbones.toml +1 -1
  151. flock/themes/wildcherry.toml +1 -1
  152. flock/themes/wilmersdorf.toml +1 -1
  153. flock/themes/wryan.toml +1 -1
  154. flock/themes/xcodedarkhc.toml +1 -1
  155. flock/themes/xcodelight.toml +1 -1
  156. flock/themes/zenbones-light.toml +1 -1
  157. flock/themes/zenwritten-dark.toml +1 -1
  158. flock/utilities.py +301 -0
  159. flock/{components/utility → utility}/output_utility_component.py +68 -53
  160. flock/visibility.py +107 -0
  161. flock_core-0.5.56b0.dist-info/METADATA +747 -0
  162. flock_core-0.5.56b0.dist-info/RECORD +398 -0
  163. flock_core-0.5.56b0.dist-info/entry_points.txt +2 -0
  164. {flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/licenses/LICENSE +1 -1
  165. flock/adapter/__init__.py +0 -14
  166. flock/adapter/azure_adapter.py +0 -68
  167. flock/adapter/chroma_adapter.py +0 -73
  168. flock/adapter/faiss_adapter.py +0 -97
  169. flock/adapter/pinecone_adapter.py +0 -51
  170. flock/adapter/vector_base.py +0 -47
  171. flock/cli/assets/release_notes.md +0 -140
  172. flock/cli/config.py +0 -8
  173. flock/cli/constants.py +0 -36
  174. flock/cli/create_agent.py +0 -1
  175. flock/cli/create_flock.py +0 -280
  176. flock/cli/execute_flock.py +0 -620
  177. flock/cli/load_agent.py +0 -1
  178. flock/cli/load_examples.py +0 -1
  179. flock/cli/load_flock.py +0 -192
  180. flock/cli/load_release_notes.py +0 -20
  181. flock/cli/loaded_flock_cli.py +0 -254
  182. flock/cli/manage_agents.py +0 -459
  183. flock/cli/registry_management.py +0 -889
  184. flock/cli/runner.py +0 -41
  185. flock/cli/settings.py +0 -857
  186. flock/cli/utils.py +0 -135
  187. flock/cli/view_results.py +0 -29
  188. flock/cli/yaml_editor.py +0 -396
  189. flock/components/__init__.py +0 -30
  190. flock/components/evaluation/__init__.py +0 -9
  191. flock/components/evaluation/declarative_evaluation_component.py +0 -606
  192. flock/components/routing/__init__.py +0 -15
  193. flock/components/routing/conditional_routing_component.py +0 -494
  194. flock/components/routing/default_routing_component.py +0 -103
  195. flock/components/routing/llm_routing_component.py +0 -206
  196. flock/components/utility/__init__.py +0 -22
  197. flock/components/utility/example_utility_component.py +0 -250
  198. flock/components/utility/feedback_utility_component.py +0 -206
  199. flock/components/utility/memory_utility_component.py +0 -550
  200. flock/components/utility/metrics_utility_component.py +0 -700
  201. flock/config.py +0 -61
  202. flock/core/__init__.py +0 -110
  203. flock/core/agent/__init__.py +0 -16
  204. flock/core/agent/default_agent.py +0 -216
  205. flock/core/agent/flock_agent_components.py +0 -104
  206. flock/core/agent/flock_agent_execution.py +0 -101
  207. flock/core/agent/flock_agent_integration.py +0 -260
  208. flock/core/agent/flock_agent_lifecycle.py +0 -186
  209. flock/core/agent/flock_agent_serialization.py +0 -381
  210. flock/core/api/__init__.py +0 -10
  211. flock/core/api/custom_endpoint.py +0 -45
  212. flock/core/api/endpoints.py +0 -254
  213. flock/core/api/main.py +0 -162
  214. flock/core/api/models.py +0 -97
  215. flock/core/api/run_store.py +0 -224
  216. flock/core/api/runner.py +0 -44
  217. flock/core/api/service.py +0 -214
  218. flock/core/component/__init__.py +0 -15
  219. flock/core/component/agent_component_base.py +0 -309
  220. flock/core/component/evaluation_component.py +0 -62
  221. flock/core/component/routing_component.py +0 -74
  222. flock/core/component/utility_component.py +0 -69
  223. flock/core/config/flock_agent_config.py +0 -58
  224. flock/core/config/scheduled_agent_config.py +0 -40
  225. flock/core/context/context.py +0 -213
  226. flock/core/context/context_manager.py +0 -37
  227. flock/core/context/context_vars.py +0 -10
  228. flock/core/evaluation/utils.py +0 -396
  229. flock/core/execution/batch_executor.py +0 -369
  230. flock/core/execution/evaluation_executor.py +0 -438
  231. flock/core/execution/local_executor.py +0 -31
  232. flock/core/execution/opik_executor.py +0 -103
  233. flock/core/execution/temporal_executor.py +0 -164
  234. flock/core/flock.py +0 -634
  235. flock/core/flock_agent.py +0 -336
  236. flock/core/flock_factory.py +0 -613
  237. flock/core/flock_scheduler.py +0 -166
  238. flock/core/flock_server_manager.py +0 -136
  239. flock/core/interpreter/python_interpreter.py +0 -689
  240. flock/core/mcp/__init__.py +0 -1
  241. flock/core/mcp/flock_mcp_server.py +0 -680
  242. flock/core/mcp/mcp_client_manager.py +0 -201
  243. flock/core/mcp/types/__init__.py +0 -1
  244. flock/core/mixin/dspy_integration.py +0 -403
  245. flock/core/mixin/prompt_parser.py +0 -125
  246. flock/core/orchestration/__init__.py +0 -15
  247. flock/core/orchestration/flock_batch_processor.py +0 -94
  248. flock/core/orchestration/flock_evaluator.py +0 -113
  249. flock/core/orchestration/flock_execution.py +0 -295
  250. flock/core/orchestration/flock_initialization.py +0 -149
  251. flock/core/orchestration/flock_server_manager.py +0 -67
  252. flock/core/orchestration/flock_web_server.py +0 -117
  253. flock/core/registry/__init__.py +0 -45
  254. flock/core/registry/agent_registry.py +0 -69
  255. flock/core/registry/callable_registry.py +0 -139
  256. flock/core/registry/component_discovery.py +0 -142
  257. flock/core/registry/component_registry.py +0 -64
  258. flock/core/registry/config_mapping.py +0 -64
  259. flock/core/registry/decorators.py +0 -137
  260. flock/core/registry/registry_hub.py +0 -205
  261. flock/core/registry/server_registry.py +0 -57
  262. flock/core/registry/type_registry.py +0 -86
  263. flock/core/serialization/__init__.py +0 -13
  264. flock/core/serialization/callable_registry.py +0 -52
  265. flock/core/serialization/flock_serializer.py +0 -832
  266. flock/core/serialization/json_encoder.py +0 -41
  267. flock/core/serialization/secure_serializer.py +0 -175
  268. flock/core/serialization/serializable.py +0 -342
  269. flock/core/serialization/serialization_utils.py +0 -412
  270. flock/core/util/file_path_utils.py +0 -223
  271. flock/core/util/hydrator.py +0 -309
  272. flock/core/util/input_resolver.py +0 -164
  273. flock/core/util/loader.py +0 -59
  274. flock/core/util/splitter.py +0 -219
  275. flock/di.py +0 -27
  276. flock/platform/docker_tools.py +0 -49
  277. flock/platform/jaeger_install.py +0 -86
  278. flock/webapp/__init__.py +0 -1
  279. flock/webapp/app/__init__.py +0 -0
  280. flock/webapp/app/api/__init__.py +0 -0
  281. flock/webapp/app/api/agent_management.py +0 -241
  282. flock/webapp/app/api/execution.py +0 -709
  283. flock/webapp/app/api/flock_management.py +0 -129
  284. flock/webapp/app/api/registry_viewer.py +0 -30
  285. flock/webapp/app/chat.py +0 -665
  286. flock/webapp/app/config.py +0 -104
  287. flock/webapp/app/dependencies.py +0 -117
  288. flock/webapp/app/main.py +0 -1070
  289. flock/webapp/app/middleware.py +0 -113
  290. flock/webapp/app/models_ui.py +0 -7
  291. flock/webapp/app/services/__init__.py +0 -0
  292. flock/webapp/app/services/feedback_file_service.py +0 -363
  293. flock/webapp/app/services/flock_service.py +0 -337
  294. flock/webapp/app/services/sharing_models.py +0 -81
  295. flock/webapp/app/services/sharing_store.py +0 -762
  296. flock/webapp/app/templates/theme_mapper.html +0 -326
  297. flock/webapp/app/theme_mapper.py +0 -812
  298. flock/webapp/app/utils.py +0 -85
  299. flock/webapp/run.py +0 -215
  300. flock/webapp/static/css/chat.css +0 -301
  301. flock/webapp/static/css/components.css +0 -167
  302. flock/webapp/static/css/header.css +0 -39
  303. flock/webapp/static/css/layout.css +0 -46
  304. flock/webapp/static/css/sidebar.css +0 -127
  305. flock/webapp/static/css/two-pane.css +0 -48
  306. flock/webapp/templates/base.html +0 -200
  307. flock/webapp/templates/chat.html +0 -152
  308. flock/webapp/templates/chat_settings.html +0 -19
  309. flock/webapp/templates/flock_editor.html +0 -16
  310. flock/webapp/templates/index.html +0 -12
  311. flock/webapp/templates/partials/_agent_detail_form.html +0 -93
  312. flock/webapp/templates/partials/_agent_list.html +0 -18
  313. flock/webapp/templates/partials/_agent_manager_view.html +0 -51
  314. flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
  315. flock/webapp/templates/partials/_chat_container.html +0 -15
  316. flock/webapp/templates/partials/_chat_messages.html +0 -57
  317. flock/webapp/templates/partials/_chat_settings_form.html +0 -85
  318. flock/webapp/templates/partials/_create_flock_form.html +0 -50
  319. flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
  320. flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
  321. flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
  322. flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
  323. flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
  324. flock/webapp/templates/partials/_env_vars_table.html +0 -23
  325. flock/webapp/templates/partials/_execution_form.html +0 -118
  326. flock/webapp/templates/partials/_execution_view_container.html +0 -28
  327. flock/webapp/templates/partials/_flock_file_list.html +0 -23
  328. flock/webapp/templates/partials/_flock_properties_form.html +0 -52
  329. flock/webapp/templates/partials/_flock_upload_form.html +0 -16
  330. flock/webapp/templates/partials/_header_flock_status.html +0 -5
  331. flock/webapp/templates/partials/_load_manager_view.html +0 -49
  332. flock/webapp/templates/partials/_registry_table.html +0 -25
  333. flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
  334. flock/webapp/templates/partials/_results_display.html +0 -78
  335. flock/webapp/templates/partials/_settings_env_content.html +0 -9
  336. flock/webapp/templates/partials/_settings_theme_content.html +0 -14
  337. flock/webapp/templates/partials/_settings_view.html +0 -36
  338. flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
  339. flock/webapp/templates/partials/_share_link_snippet.html +0 -35
  340. flock/webapp/templates/partials/_sidebar.html +0 -74
  341. flock/webapp/templates/partials/_streaming_results_container.html +0 -195
  342. flock/webapp/templates/partials/_structured_data_view.html +0 -40
  343. flock/webapp/templates/partials/_theme_preview.html +0 -36
  344. flock/webapp/templates/registry_viewer.html +0 -84
  345. flock/webapp/templates/shared_run_page.html +0 -140
  346. flock/workflow/__init__.py +0 -0
  347. flock/workflow/activities.py +0 -196
  348. flock/workflow/agent_activities.py +0 -24
  349. flock/workflow/agent_execution_activity.py +0 -202
  350. flock/workflow/flock_workflow.py +0 -214
  351. flock/workflow/temporal_config.py +0 -96
  352. flock/workflow/temporal_setup.py +0 -68
  353. flock_core-0.5.0b28.dist-info/METADATA +0 -274
  354. flock_core-0.5.0b28.dist-info/RECORD +0 -561
  355. flock_core-0.5.0b28.dist-info/entry_points.txt +0 -2
  356. /flock/{core/logging → logging}/formatters/themes.py +0 -0
  357. /flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
  358. /flock/{core/mcp → mcp}/util/__init__.py +0 -0
  359. {flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/WHEEL +0 -0
@@ -1,213 +0,0 @@
1
- import uuid
2
- from dataclasses import asdict
3
- from datetime import datetime
4
- from typing import Any, Literal
5
-
6
- from opentelemetry import trace
7
- from pydantic import BaseModel, Field
8
-
9
- from flock.core.context.context_vars import FLOCK_LAST_AGENT, FLOCK_LAST_RESULT
10
- from flock.core.logging.logging import get_logger
11
- from flock.core.serialization.serializable import Serializable
12
-
13
- logger = get_logger("context")
14
- tracer = trace.get_tracer(__name__)
15
-
16
-
17
- class AgentRunRecord(BaseModel):
18
- id: str = Field(default="")
19
- agent: str = Field(default="")
20
- data: dict[str, Any] = Field(default_factory=dict)
21
- timestamp: str = Field(default="")
22
- hand_off: dict | None = Field(default_factory=dict)
23
- called_from: str | None = Field(default=None)
24
-
25
-
26
- class AgentDefinition(BaseModel):
27
- agent_type: str = Field(default="")
28
- agent_name: str = Field(default="")
29
- agent_data: dict = Field(default_factory=dict)
30
- serializer: Literal["json", "cloudpickle", "msgpack"] = Field(
31
- default="cloudpickle"
32
- )
33
-
34
-
35
- class FlockContext(Serializable, BaseModel):
36
- state: dict[str, Any] = Field(default_factory=dict)
37
- history: list[AgentRunRecord] = Field(default_factory=list)
38
- agent_definitions: dict[str, AgentDefinition] = Field(default_factory=dict)
39
- run_id: str = Field(default="")
40
- workflow_id: str = Field(default="")
41
- workflow_timestamp: str = Field(default="")
42
-
43
- def record(
44
- self,
45
- agent_name: str,
46
- data: dict[str, Any],
47
- timestamp: str,
48
- hand_off: str,
49
- called_from: str,
50
- ) -> None:
51
- record = AgentRunRecord(
52
- id=agent_name + "_" + uuid.uuid4().hex[:4],
53
- agent=agent_name,
54
- data=data.copy(),
55
- timestamp=timestamp,
56
- hand_off=hand_off,
57
- called_from=called_from,
58
- )
59
- self.history.append(record)
60
- for key, value in data.items():
61
- self.set_variable(f"{agent_name}.{key}", value)
62
- self.set_variable(FLOCK_LAST_RESULT, data)
63
- self.set_variable(FLOCK_LAST_AGENT, agent_name)
64
- logger.info(
65
- f"Agent run recorded - run_id '{record.id}'",
66
- agent=agent_name,
67
- timestamp=timestamp,
68
- data=data,
69
- )
70
- current_span = trace.get_current_span()
71
- if current_span.get_span_context().is_valid:
72
- current_span.add_event(
73
- "record",
74
- attributes={"agent": agent_name, "timestamp": timestamp},
75
- )
76
-
77
- def get_variable(self, key: str, default: Any = None) -> Any:
78
- return self.state.get(key, default)
79
-
80
- def set_variable(self, key: str, value: Any) -> None:
81
- old_value = self.state.get(key)
82
- self.state[key] = value
83
- if old_value != value:
84
- escaped_value = str(value).replace("{", "{{").replace("}", "}}")
85
-
86
- logger.info(
87
- "Context variable updated - {} -> {}",
88
- key,
89
- escaped_value, # Arguments in order
90
- )
91
-
92
- current_span = trace.get_current_span()
93
- if current_span.get_span_context().is_valid:
94
- current_span.add_event(
95
- "set_variable",
96
- attributes={
97
- "key": key,
98
- "old": str(old_value),
99
- "new": str(value),
100
- },
101
- )
102
-
103
- def deepcopy(self) -> "FlockContext":
104
- return FlockContext.from_dict(self.to_dict())
105
-
106
- def get_agent_history(self, agent_name: str) -> list[AgentRunRecord]:
107
- return [record for record in self.history if record.agent == agent_name]
108
-
109
- def next_input_for(self, agent) -> Any:
110
- try:
111
- if hasattr(agent, "input") and isinstance(agent.input, str):
112
- keys = [k.strip() for k in agent.input.split(",") if k.strip()]
113
- if len(keys) == 1:
114
- return self.get_variable(keys[0])
115
- else:
116
- return {key: self.get_variable(key) for key in keys}
117
- else:
118
- return self.get_variable("init_input")
119
- except Exception as e:
120
- logger.error(
121
- "Error getting next input for agent",
122
- agent=agent.name,
123
- error=str(e),
124
- )
125
- raise
126
-
127
- def get_most_recent_value(self, variable_name: str) -> Any:
128
- for history_record in reversed(self.history):
129
- if variable_name in history_record.data:
130
- return history_record.data[variable_name]
131
-
132
- def get_agent_definition(self, agent_name: str) -> AgentDefinition | None:
133
- return self.agent_definitions.get(agent_name)
134
-
135
- def get_last_agent_name(self) -> str | None:
136
- """Returns the name of the agent from the most recent history record."""
137
- if not self.history:
138
- return None
139
- last_record = self.history[-1]
140
- # The 'called_from' field in the *next* record is the previous agent.
141
- # However, to get the name of the *last executed agent*, we look at the 'agent' field.
142
- return last_record.agent
143
-
144
- def add_agent_definition(
145
- self, agent_type: type, agent_name: str, agent_data: Any
146
- ) -> None:
147
- definition = AgentDefinition(
148
- agent_type=agent_type.__name__,
149
- agent_name=agent_name,
150
- agent_data=agent_data,
151
- )
152
- self.agent_definitions[agent_name] = definition
153
-
154
- # Use the reactive setter for dict-like access.
155
- def __getitem__(self, key: str) -> Any:
156
- return self.get_variable(key)
157
-
158
- def __setitem__(self, key: str, value: Any) -> None:
159
- self.set_variable(key, value)
160
-
161
- def to_dict(self) -> dict[str, Any]:
162
- def convert(obj):
163
- if isinstance(obj, datetime):
164
- return obj.isoformat()
165
- if hasattr(obj, "__dataclass_fields__"):
166
- return asdict(
167
- obj, dict_factory=lambda x: {k: convert(v) for k, v in x}
168
- )
169
- return obj
170
-
171
- return convert(asdict(self))
172
-
173
- @classmethod
174
- def from_dict(cls, data: dict[str, Any]) -> "FlockContext":
175
- def convert(obj):
176
- if isinstance(obj, dict):
177
- if "timestamp" in obj:
178
- return AgentRunRecord(
179
- **{
180
- **obj,
181
- "timestamp": obj["timestamp"]
182
- ,
183
- }
184
- )
185
- if "agent_type" in obj:
186
- return AgentDefinition(**obj)
187
- return {k: convert(v) for k, v in obj.items()}
188
- if isinstance(obj, list):
189
- return [convert(v) for v in obj]
190
- return obj
191
-
192
- converted = convert(data)
193
- return cls(**converted)
194
-
195
- def resolve(self, svc_type):
196
- """Resolve a service from the request-scoped DI container if present.
197
-
198
- The bootstrap code is expected to store the active `ServiceProvider` from
199
- `wd.di` in the context variable key ``di.container``. This helper
200
- provides a convenient façade so that Flock components can simply call
201
- ``context.resolve(SomeType)`` regardless of whether a container is
202
- available. When the container is missing or the service cannot be
203
- resolved, ``None`` is returned instead of raising to keep backward
204
- compatibility.
205
- """
206
- container = self.get_variable("di.container")
207
- if container is None:
208
- return None
209
- try:
210
- return container.get_service(svc_type)
211
- except Exception:
212
- # Service not registered or other resolution error – fall back to None
213
- return None
@@ -1,37 +0,0 @@
1
- """Module for managing the FlockContext."""
2
-
3
- from flock.core.context.context import FlockContext
4
- from flock.core.context.context_vars import (
5
- FLOCK_CURRENT_AGENT,
6
- FLOCK_INITIAL_INPUT,
7
- FLOCK_LOCAL_DEBUG,
8
- FLOCK_MODEL,
9
- FLOCK_RUN_ID,
10
- )
11
-
12
-
13
- def initialize_context(
14
- context: FlockContext,
15
- agent_name: str,
16
- input_data: dict,
17
- run_id: str,
18
- local_debug: bool,
19
- model: str,
20
- ) -> None:
21
- """Initialize the FlockContext with standard variables before running an agent.
22
-
23
- Args:
24
- context: The FlockContext instance.
25
- agent_name: The name of the current agent.
26
- input_data: A dictionary of inputs for the agent.
27
- run_id: A unique identifier for the run.
28
- local_debug: Flag indicating whether local debugging is enabled.
29
- """
30
- context.set_variable(FLOCK_CURRENT_AGENT, agent_name)
31
- for key, value in input_data.items():
32
- context.set_variable("flock." + key, value)
33
- context.set_variable(FLOCK_INITIAL_INPUT, input_data)
34
- context.set_variable(FLOCK_LOCAL_DEBUG, local_debug)
35
- context.run_id = run_id
36
- context.set_variable(FLOCK_RUN_ID, run_id)
37
- context.set_variable(FLOCK_MODEL, model)
@@ -1,10 +0,0 @@
1
- """Context variables for Flock."""
2
-
3
- FLOCK_CURRENT_AGENT = "flock.current_agent"
4
- FLOCK_INITIAL_INPUT = "flock.initial_input"
5
- FLOCK_LOCAL_DEBUG = "flock.local_debug"
6
- FLOCK_RUN_ID = "flock.run_id"
7
- FLOCK_LAST_AGENT = "flock.last_agent"
8
- FLOCK_LAST_RESULT = "flock.last_result"
9
- FLOCK_MODEL = "flock.model"
10
- FLOCK_BATCH_SILENT_MODE = "flock.batch_silent"
@@ -1,396 +0,0 @@
1
- # src/flock/core/util/evaluation_helpers.py
2
- import inspect
3
- import sys
4
- from collections.abc import Callable
5
- from pathlib import Path
6
- from typing import Any, Union
7
-
8
- import pandas as pd
9
- from box import Box
10
- from datasets import (
11
- Dataset as HFDataset,
12
- get_dataset_config_names,
13
- load_dataset,
14
- )
15
- from opik import Opik
16
- from opik.evaluation import evaluate
17
-
18
- from flock.core.flock import Flock
19
- from flock.core.flock_agent import FlockAgent
20
-
21
- # Legacy FlockEvaluator import removed
22
- from flock.core.logging.logging import get_logger
23
-
24
- # Potentially import metrics libraries like rouge_score, nltk, sentence_transformers
25
-
26
- logger_helpers = get_logger("util.evaluation")
27
-
28
-
29
- def evaluate_with_opik(
30
- dataset: str | Path | list[dict[str, Any]] | pd.DataFrame | HFDataset,
31
- dataset_name: str,
32
- experiment_name: str,
33
- start_agent: FlockAgent | str,
34
- input_mapping: dict[str, str],
35
- answer_mapping: dict[str, str],
36
- metrics: list[
37
- str
38
- | Callable[[Any, Any], bool | float | dict[str, Any]]
39
- | FlockAgent
40
- | FlockEvaluator
41
- ],
42
- ):
43
- df = normalize_dataset(dataset)
44
- client = Opik()
45
- dataset = client.get_or_create_dataset(name=dataset_name)
46
-
47
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
48
-
49
- # Create a single Flock instance outside the task function
50
- shared_flock = Flock(
51
- name="opik_eval", model="azure/gpt-4.1", show_flock_banner=False
52
- )
53
- shared_flock.add_agent(start_agent)
54
-
55
- def evaluation_task(dataset_item):
56
- agent_input = {
57
- value: dataset_item[key] for key, value in input_mapping.items()
58
- }
59
-
60
- # Use the shared Flock instance instead of creating a new one
61
- result_flock = shared_flock.run(
62
- agent=start_agent, input=agent_input, box_result=False
63
- )
64
-
65
- # agent_output = result_flock.get(answer_mapping[key], "No answer found")
66
-
67
- key = next(iter(answer_mapping.keys()))
68
- reference = dataset_item[key]
69
- answer = result_flock.get(answer_mapping[key], "No answer found")
70
-
71
- result = {
72
- "input": agent_input,
73
- "output": answer,
74
- "reference": reference,
75
- }
76
-
77
- return result
78
-
79
- eval_results = evaluate(
80
- experiment_name=experiment_name,
81
- dataset=dataset,
82
- task=evaluation_task,
83
- scoring_metrics=metrics,
84
- )
85
-
86
-
87
- def load_and_merge_all_configs(dataset_name: str) -> pd.DataFrame:
88
- all_configs = get_dataset_config_names(dataset_name)
89
- all_dfs = []
90
-
91
- for config in all_configs:
92
- dataset_dict = load_dataset(dataset_name, config)
93
- for split_name, split_dataset in dataset_dict.items():
94
- df = split_dataset.to_pandas()
95
- df["config"] = config
96
- df["split"] = split_name
97
- all_dfs.append(df)
98
-
99
- merged_df = pd.concat(all_dfs, ignore_index=True)
100
- logger_helpers.info(f"merged_df.head(): {merged_df.head()}")
101
- return merged_df
102
-
103
-
104
- def import_hf_dataset_to_opik(dataset_name: str) -> pd.DataFrame:
105
- df = load_and_merge_all_configs(dataset_name)
106
- logger_helpers.info(
107
- f"type(df): {type(df)}"
108
- ) # ➜ <class 'pandas.core.frame.DataFrame'>
109
- logger_helpers.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
110
- logger_helpers.info(
111
- f"df['split'].value_counts(): {df['split'].value_counts()}"
112
- )
113
- logger_helpers.info(f"df['config'].unique(): {df['config'].unique()}")
114
- client = Opik()
115
- dataset = client.get_or_create_dataset(name=dataset_name)
116
-
117
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
118
- return df
119
-
120
-
121
- def normalize_dataset(dataset: Any) -> pd.DataFrame:
122
- """Converts various dataset formats into a pandas DataFrame."""
123
- if isinstance(dataset, pd.DataFrame):
124
- return dataset.copy()
125
- elif isinstance(dataset, str | Path):
126
- path = Path(dataset)
127
- if not path.exists():
128
- try:
129
- return load_and_merge_all_configs(dataset)
130
- except Exception as e:
131
- raise FileNotFoundError(
132
- f"Dataset file not found: {path}"
133
- ) from e
134
- if path.suffix.lower() == ".csv":
135
- return pd.read_csv(path)
136
- # Add support for json, jsonl etc. if needed
137
- else:
138
- raise ValueError(
139
- f"Unsupported file type for dataset: {path.suffix}"
140
- )
141
- elif isinstance(dataset, list):
142
- if not dataset or not isinstance(dataset[0], dict):
143
- raise ValueError("Dataset list must contain dictionaries.")
144
- return pd.DataFrame(dataset)
145
- elif "datasets" in sys.modules and isinstance(
146
- dataset, sys.modules["datasets"].Dataset
147
- ):
148
- # Requires 'datasets' library to be installed
149
- return dataset.to_pandas()
150
- else:
151
- raise TypeError(f"Unsupported dataset type: {type(dataset)}")
152
-
153
-
154
- def extract_value_by_dot_notation(data: dict | Box, key: str) -> Any:
155
- """Retrieves a value from a nested dictionary or Box object using dot notation."""
156
- if not key:
157
- return None
158
- keys = key.split(".")
159
- value = data
160
- try:
161
- for k in keys:
162
- if isinstance(value, (dict, Box)):
163
- value = value.get(k)
164
- # Add list index handling if needed: e.g., 'results[0].field'
165
- # elif isinstance(value, list) and k.isdigit():
166
- # value = value[int(k)]
167
- else:
168
- return None # Cannot traverse further
169
- if value is None:
170
- return None # Key not found at this level
171
- return value
172
- except (KeyError, IndexError, AttributeError):
173
- return None
174
-
175
-
176
- def calculate_evaluation_metrics(
177
- metrics: list[Union[str, Callable, "FlockAgent", "FlockEvaluator"]],
178
- metric_configs: dict[str, dict[str, Any]],
179
- predicted_answers: dict[str, Any],
180
- expected_answers: dict[str, Any],
181
- agent_inputs: dict[str, Any], # For context
182
- agent_output: Any, # For context
183
- ) -> dict[str, Any]:
184
- """Calculates all specified metrics for a single evaluation item."""
185
- results = {}
186
- for metric in metrics:
187
- metric_name = ""
188
- metric_result = None
189
- try:
190
- if isinstance(metric, str):
191
- metric_name = metric
192
- # Find predicted/expected values relevant to this metric string
193
- # Simple case: metric name matches an answer_mapping key
194
- if (
195
- metric_name in predicted_answers
196
- and metric_name in expected_answers
197
- ):
198
- predicted = predicted_answers[metric_name]
199
- expected = expected_answers[metric_name]
200
- metric_func = _get_metric_function(metric_name)
201
- config = metric_configs.get(metric_name, {})
202
- metric_result = metric_func(predicted, expected, **config)
203
- else:
204
- logger_helpers.warning(
205
- f"Could not find matching predicted/expected values for metric '{metric_name}' based on answer_mapping keys."
206
- )
207
- metric_result = None # Or some error indicator
208
-
209
- elif isinstance(metric, Callable):
210
- metric_name = getattr(metric, "__name__", "custom_function")
211
- # Custom functions might need specific predicted/expected pairs, or all of them
212
- # Let's pass all for flexibility, user function needs to handle it
213
- config = metric_configs.get(metric_name, {})
214
- # Allow passing context if function signature supports it
215
- sig = inspect.signature(metric)
216
- call_kwargs = config.copy()
217
- if "agent_inputs" in sig.parameters:
218
- call_kwargs["agent_inputs"] = agent_inputs
219
- if "agent_output" in sig.parameters:
220
- call_kwargs["agent_output"] = agent_output
221
-
222
- metric_result = metric(
223
- predicted_answers, expected_answers, **call_kwargs
224
- )
225
-
226
- # --- Placeholder for Agent/Evaluator based metrics ---
227
- elif "FlockAgent" in str(
228
- type(metric)
229
- ): # Avoid hard import if possible
230
- metric_name = getattr(metric, "name", "judge_agent")
231
- config = metric_configs.get(metric_name, {})
232
- # Requires running the judge agent - needs async context
233
- # metric_result = asyncio.run(_run_judge_agent(metric, predicted_answers, expected_answers, config))
234
- logger_helpers.warning(
235
- f"Agent-based metric '{metric_name}' execution not implemented in this sketch."
236
- )
237
- metric_result = "[Agent Judge Not Implemented]"
238
-
239
- elif "FlockEvaluator" in str(
240
- type(metric)
241
- ): # Avoid hard import if possible
242
- metric_name = getattr(metric, "name", "judge_evaluator")
243
- config = metric_configs.get(metric_name, {})
244
- # Requires running the evaluator - needs async context
245
- # metric_result = asyncio.run(_run_judge_evaluator(metric, predicted_answers, expected_answers, config))
246
- logger_helpers.warning(
247
- f"Evaluator-based metric '{metric_name}' execution not implemented in this sketch."
248
- )
249
- metric_result = "[Evaluator Judge Not Implemented]"
250
- # --- End Placeholder ---
251
-
252
- else:
253
- logger_helpers.warning(
254
- f"Unsupported metric type: {type(metric)}"
255
- )
256
- continue
257
-
258
- # Store result - handle dict results from metrics
259
- if isinstance(metric_result, dict):
260
- for sub_key, sub_value in metric_result.items():
261
- results[f"{metric_name}_{sub_key}"] = sub_value
262
- else:
263
- results[metric_name] = metric_result
264
-
265
- except Exception as e:
266
- logger_helpers.error(
267
- f"Error calculating metric '{metric_name}': {e}"
268
- )
269
- results[metric_name] = f"[Error: {e}]"
270
-
271
- return results
272
-
273
-
274
- def _get_metric_function(metric_name: str) -> Callable:
275
- """Maps metric names to their implementation functions."""
276
- # Lazy load metric libraries
277
- if metric_name == "exact_match":
278
- return lambda pred, act, **kw: str(pred).strip() == str(act).strip()
279
- elif metric_name == "fuzzy_match":
280
- try:
281
- from thefuzz import fuzz
282
-
283
- return (
284
- lambda pred, act, threshold=85, **kw: fuzz.ratio(
285
- str(pred), str(act)
286
- )
287
- >= threshold
288
- )
289
- except ImportError:
290
- logger_helpers.warning(
291
- "fuzzy_match requires 'thefuzz': pip install thefuzz[speedup]"
292
- )
293
- return lambda p, a, **kw: None
294
- elif metric_name.startswith("rouge"): # rouge_1, rouge_2, rouge_l
295
- try:
296
- from rouge_score import rouge_scorer
297
-
298
- scorer = rouge_scorer.RougeScorer(
299
- [metric_name.replace("_", "")], use_stemmer=True
300
- )
301
-
302
- def calculate_rouge(pred, act, score_type="fmeasure", **kw):
303
- scores = scorer.score(str(act), str(pred))
304
- return (
305
- scores[metric_name.replace("_", "")]
306
- ._asdict()
307
- .get(score_type, 0.0)
308
- )
309
-
310
- return calculate_rouge
311
- except ImportError:
312
- logger_helpers.warning(
313
- "rouge requires 'rouge-score': pip install rouge-score"
314
- )
315
- return lambda p, a, **kw: None
316
- elif metric_name == "semantic_similarity":
317
- try:
318
- from sentence_transformers import SentenceTransformer, util
319
-
320
- # Cache the model? Maybe pass it in via config?
321
- model = SentenceTransformer("all-MiniLM-L6-v2")
322
-
323
- def calculate_similarity(pred, act, **kw):
324
- emb1 = model.encode(str(pred), convert_to_tensor=True)
325
- emb2 = model.encode(str(act), convert_to_tensor=True)
326
- return util.pytorch_cos_sim(emb1, emb2).item()
327
-
328
- return calculate_similarity
329
- except ImportError:
330
- logger_helpers.warning(
331
- "semantic_similarity requires 'sentence-transformers': pip install sentence-transformers"
332
- )
333
- return lambda p, a, **kw: None
334
- # Add bleu, f1 etc.
335
- elif metric_name == "llm_judge":
336
- # This is handled by checking type in calculate_evaluation_metrics
337
- # but we need a placeholder callable here if we map by string first
338
- return lambda p, a, **kw: "[LLM Judge Not Implemented Directly]"
339
- else:
340
- raise ValueError(f"Unknown built-in metric: {metric_name}")
341
-
342
-
343
- def aggregate_results(results_list: list[dict[str, Any]]) -> dict[str, Any]:
344
- """Aggregates evaluation results across all items."""
345
- summary = {"total_items": len(results_list), "errors": 0}
346
- metric_values: dict[str, list[float | bool]] = {}
347
-
348
- for item in results_list:
349
- if item.get("error"):
350
- summary["errors"] += 1
351
- metrics = item.get("metrics", {})
352
- for name, value in metrics.items():
353
- if isinstance(
354
- value, (float, int, bool)
355
- ): # Only aggregate numerics/bools
356
- if name not in metric_values:
357
- metric_values[name] = []
358
- metric_values[name].append(value)
359
-
360
- summary["metrics_summary"] = {}
361
- for name, values in metric_values.items():
362
- if not values:
363
- continue
364
- # Calculate different stats based on value type
365
- if all(isinstance(v, bool) for v in values):
366
- summary["metrics_summary"][name] = {
367
- "accuracy": sum(values) / len(values)
368
- }
369
- elif all(isinstance(v, (int, float)) for v in values):
370
- numeric_values = [v for v in values if isinstance(v, (int, float))]
371
- if numeric_values:
372
- summary["metrics_summary"][name] = {
373
- "mean": sum(numeric_values) / len(numeric_values),
374
- "count": len(numeric_values),
375
- # Add min, max, stddev if needed
376
- }
377
-
378
- return summary
379
-
380
-
381
- # --- Placeholder for async judge execution ---
382
- # Need to run these within the main async context or manage loops carefully
383
- async def _run_judge_agent(judge_agent, predicted, expected, config):
384
- # Prepare input for the judge agent based on its signature
385
- # E.g., judge_input = {"prediction": predicted_value, "reference": expected_value, "criteria": ...}
386
- # judge_result = await judge_agent.run_async(judge_input)
387
- # return judge_result # Or extract specific score/judgement
388
- return "[Agent Judge Not Implemented]"
389
-
390
-
391
- async def _run_judge_evaluator(judge_evaluator, predicted, expected, config):
392
- # Prepare input for the judge evaluator based on its signature
393
- # judge_input = {"prediction": predicted_value, "reference": expected_value, **config}
394
- # judge_result = await judge_evaluator.evaluate(None, judge_input, []) # Agent might not be needed
395
- # return judge_result # Or extract specific score/judgement
396
- return "[Evaluator Judge Not Implemented]"