flock-core 0.4.543__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (501) hide show
  1. flock/__init__.py +12 -217
  2. flock/agent.py +1079 -0
  3. flock/api/themes.py +71 -0
  4. flock/artifacts.py +86 -0
  5. flock/cli.py +147 -0
  6. flock/components.py +189 -0
  7. flock/dashboard/__init__.py +30 -0
  8. flock/dashboard/collector.py +559 -0
  9. flock/dashboard/events.py +188 -0
  10. flock/dashboard/graph_builder.py +563 -0
  11. flock/dashboard/launcher.py +235 -0
  12. flock/dashboard/models/graph.py +156 -0
  13. flock/dashboard/service.py +991 -0
  14. flock/dashboard/static_v2/assets/index-DFRnI_mt.js +111 -0
  15. flock/dashboard/static_v2/assets/index-fPLNdmp1.css +1 -0
  16. flock/dashboard/static_v2/index.html +13 -0
  17. flock/dashboard/websocket.py +246 -0
  18. flock/engines/__init__.py +6 -0
  19. flock/engines/dspy_engine.py +932 -0
  20. flock/examples.py +131 -0
  21. flock/frontend/README.md +778 -0
  22. flock/frontend/docs/DESIGN_SYSTEM.md +1980 -0
  23. flock/frontend/index.html +12 -0
  24. flock/frontend/package-lock.json +4337 -0
  25. flock/frontend/package.json +48 -0
  26. flock/frontend/src/App.tsx +139 -0
  27. flock/frontend/src/__tests__/integration/graph-snapshot.test.tsx +647 -0
  28. flock/frontend/src/__tests__/integration/indexeddb-persistence.test.tsx +699 -0
  29. flock/frontend/src/components/common/BuildInfo.tsx +39 -0
  30. flock/frontend/src/components/common/EmptyState.module.css +115 -0
  31. flock/frontend/src/components/common/EmptyState.tsx +128 -0
  32. flock/frontend/src/components/common/ErrorBoundary.module.css +169 -0
  33. flock/frontend/src/components/common/ErrorBoundary.tsx +118 -0
  34. flock/frontend/src/components/common/KeyboardShortcutsDialog.css +251 -0
  35. flock/frontend/src/components/common/KeyboardShortcutsDialog.tsx +151 -0
  36. flock/frontend/src/components/common/LoadingSpinner.module.css +97 -0
  37. flock/frontend/src/components/common/LoadingSpinner.tsx +29 -0
  38. flock/frontend/src/components/controls/PublishControl.css +547 -0
  39. flock/frontend/src/components/controls/PublishControl.test.tsx +543 -0
  40. flock/frontend/src/components/controls/PublishControl.tsx +432 -0
  41. flock/frontend/src/components/details/DetailWindowContainer.tsx +58 -0
  42. flock/frontend/src/components/details/LiveOutputTab.test.tsx +792 -0
  43. flock/frontend/src/components/details/LiveOutputTab.tsx +220 -0
  44. flock/frontend/src/components/details/MessageDetailWindow.tsx +439 -0
  45. flock/frontend/src/components/details/MessageHistoryTab.tsx +374 -0
  46. flock/frontend/src/components/details/NodeDetailWindow.test.tsx +501 -0
  47. flock/frontend/src/components/details/NodeDetailWindow.tsx +218 -0
  48. flock/frontend/src/components/details/RunStatusTab.tsx +348 -0
  49. flock/frontend/src/components/details/tabs.test.tsx +1015 -0
  50. flock/frontend/src/components/filters/ArtifactTypeFilter.tsx +21 -0
  51. flock/frontend/src/components/filters/CorrelationIDFilter.module.css +102 -0
  52. flock/frontend/src/components/filters/CorrelationIDFilter.test.tsx +197 -0
  53. flock/frontend/src/components/filters/CorrelationIDFilter.tsx +121 -0
  54. flock/frontend/src/components/filters/FilterFlyout.module.css +104 -0
  55. flock/frontend/src/components/filters/FilterFlyout.tsx +80 -0
  56. flock/frontend/src/components/filters/FilterPills.module.css +220 -0
  57. flock/frontend/src/components/filters/FilterPills.test.tsx +189 -0
  58. flock/frontend/src/components/filters/FilterPills.tsx +143 -0
  59. flock/frontend/src/components/filters/ProducerFilter.tsx +21 -0
  60. flock/frontend/src/components/filters/SavedFiltersControl.module.css +60 -0
  61. flock/frontend/src/components/filters/SavedFiltersControl.test.tsx +158 -0
  62. flock/frontend/src/components/filters/SavedFiltersControl.tsx +159 -0
  63. flock/frontend/src/components/filters/TagFilter.tsx +21 -0
  64. flock/frontend/src/components/filters/TimeRangeFilter.module.css +115 -0
  65. flock/frontend/src/components/filters/TimeRangeFilter.test.tsx +154 -0
  66. flock/frontend/src/components/filters/TimeRangeFilter.tsx +110 -0
  67. flock/frontend/src/components/filters/VisibilityFilter.tsx +21 -0
  68. flock/frontend/src/components/graph/AgentNode.test.tsx +77 -0
  69. flock/frontend/src/components/graph/AgentNode.tsx +324 -0
  70. flock/frontend/src/components/graph/GraphCanvas.tsx +613 -0
  71. flock/frontend/src/components/graph/MessageFlowEdge.tsx +128 -0
  72. flock/frontend/src/components/graph/MessageNode.test.tsx +64 -0
  73. flock/frontend/src/components/graph/MessageNode.tsx +129 -0
  74. flock/frontend/src/components/graph/MiniMap.tsx +47 -0
  75. flock/frontend/src/components/graph/TransformEdge.tsx +123 -0
  76. flock/frontend/src/components/layout/DashboardLayout.css +420 -0
  77. flock/frontend/src/components/layout/DashboardLayout.tsx +287 -0
  78. flock/frontend/src/components/layout/Header.module.css +88 -0
  79. flock/frontend/src/components/layout/Header.tsx +52 -0
  80. flock/frontend/src/components/modules/HistoricalArtifactsModule.module.css +288 -0
  81. flock/frontend/src/components/modules/HistoricalArtifactsModule.tsx +450 -0
  82. flock/frontend/src/components/modules/HistoricalArtifactsModuleWrapper.tsx +13 -0
  83. flock/frontend/src/components/modules/JsonAttributeRenderer.tsx +140 -0
  84. flock/frontend/src/components/modules/ModuleRegistry.test.ts +333 -0
  85. flock/frontend/src/components/modules/ModuleRegistry.ts +93 -0
  86. flock/frontend/src/components/modules/ModuleWindow.tsx +223 -0
  87. flock/frontend/src/components/modules/TraceModuleJaeger.tsx +1971 -0
  88. flock/frontend/src/components/modules/TraceModuleJaegerWrapper.tsx +13 -0
  89. flock/frontend/src/components/modules/registerModules.ts +29 -0
  90. flock/frontend/src/components/settings/AdvancedSettings.tsx +175 -0
  91. flock/frontend/src/components/settings/AppearanceSettings.tsx +185 -0
  92. flock/frontend/src/components/settings/GraphSettings.tsx +110 -0
  93. flock/frontend/src/components/settings/MultiSelect.tsx +235 -0
  94. flock/frontend/src/components/settings/SettingsPanel.css +327 -0
  95. flock/frontend/src/components/settings/SettingsPanel.tsx +131 -0
  96. flock/frontend/src/components/settings/ThemeSelector.tsx +298 -0
  97. flock/frontend/src/components/settings/TracingSettings.tsx +404 -0
  98. flock/frontend/src/hooks/useKeyboardShortcuts.ts +148 -0
  99. flock/frontend/src/hooks/useModulePersistence.test.ts +442 -0
  100. flock/frontend/src/hooks/useModulePersistence.ts +154 -0
  101. flock/frontend/src/hooks/useModules.ts +157 -0
  102. flock/frontend/src/hooks/usePersistence.ts +141 -0
  103. flock/frontend/src/main.tsx +13 -0
  104. flock/frontend/src/services/api.ts +337 -0
  105. flock/frontend/src/services/graphService.test.ts +330 -0
  106. flock/frontend/src/services/graphService.ts +75 -0
  107. flock/frontend/src/services/indexeddb.test.ts +793 -0
  108. flock/frontend/src/services/indexeddb.ts +848 -0
  109. flock/frontend/src/services/layout.test.ts +437 -0
  110. flock/frontend/src/services/layout.ts +357 -0
  111. flock/frontend/src/services/themeApplicator.ts +140 -0
  112. flock/frontend/src/services/themeService.ts +77 -0
  113. flock/frontend/src/services/websocket.ts +650 -0
  114. flock/frontend/src/store/filterStore.test.ts +250 -0
  115. flock/frontend/src/store/filterStore.ts +272 -0
  116. flock/frontend/src/store/graphStore.test.ts +570 -0
  117. flock/frontend/src/store/graphStore.ts +462 -0
  118. flock/frontend/src/store/moduleStore.test.ts +253 -0
  119. flock/frontend/src/store/moduleStore.ts +75 -0
  120. flock/frontend/src/store/settingsStore.ts +188 -0
  121. flock/frontend/src/store/streamStore.ts +68 -0
  122. flock/frontend/src/store/uiStore.test.ts +54 -0
  123. flock/frontend/src/store/uiStore.ts +122 -0
  124. flock/frontend/src/store/wsStore.ts +34 -0
  125. flock/frontend/src/styles/index.css +15 -0
  126. flock/frontend/src/styles/scrollbar.css +47 -0
  127. flock/frontend/src/styles/variables.css +488 -0
  128. flock/frontend/src/test/setup.ts +1 -0
  129. flock/frontend/src/types/filters.ts +47 -0
  130. flock/frontend/src/types/graph.ts +95 -0
  131. flock/frontend/src/types/modules.ts +10 -0
  132. flock/frontend/src/types/theme.ts +55 -0
  133. flock/frontend/src/utils/artifacts.ts +24 -0
  134. flock/frontend/src/utils/mockData.ts +98 -0
  135. flock/frontend/src/utils/performance.ts +16 -0
  136. flock/frontend/src/vite-env.d.ts +17 -0
  137. flock/frontend/tsconfig.json +27 -0
  138. flock/frontend/tsconfig.node.json +11 -0
  139. flock/frontend/vite.config.ts +25 -0
  140. flock/frontend/vitest.config.ts +11 -0
  141. flock/{core/util → helper}/cli_helper.py +9 -5
  142. flock/{core/logging → logging}/__init__.py +2 -3
  143. flock/logging/auto_trace.py +159 -0
  144. flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
  145. flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
  146. flock/{core/logging → logging}/formatters/themed_formatter.py +69 -107
  147. flock/{core/logging → logging}/logging.py +78 -61
  148. flock/{core/logging → logging}/telemetry.py +66 -26
  149. flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
  150. flock/logging/telemetry_exporter/duckdb_exporter.py +216 -0
  151. flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +13 -10
  152. flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
  153. flock/logging/trace_and_logged.py +304 -0
  154. flock/mcp/__init__.py +91 -0
  155. flock/{core/mcp/mcp_client.py → mcp/client.py} +131 -158
  156. flock/{core/mcp/mcp_config.py → mcp/config.py} +86 -132
  157. flock/mcp/manager.py +286 -0
  158. flock/mcp/servers/sse/__init__.py +1 -1
  159. flock/mcp/servers/sse/flock_sse_server.py +16 -58
  160. flock/mcp/servers/stdio/__init__.py +1 -1
  161. flock/mcp/servers/stdio/flock_stdio_server.py +13 -53
  162. flock/mcp/servers/streamable_http/flock_streamable_http_server.py +22 -67
  163. flock/mcp/servers/websockets/flock_websocket_server.py +12 -45
  164. flock/{core/mcp/flock_mcp_tool_base.py → mcp/tool.py} +24 -78
  165. flock/mcp/types/__init__.py +42 -0
  166. flock/{core/mcp → mcp}/types/callbacks.py +12 -15
  167. flock/{core/mcp → mcp}/types/factories.py +7 -6
  168. flock/{core/mcp → mcp}/types/handlers.py +13 -18
  169. flock/{core/mcp → mcp}/types/types.py +70 -74
  170. flock/{core/mcp → mcp}/util/helpers.py +3 -3
  171. flock/orchestrator.py +970 -0
  172. flock/registry.py +148 -0
  173. flock/runtime.py +262 -0
  174. flock/service.py +277 -0
  175. flock/store.py +1214 -0
  176. flock/subscription.py +111 -0
  177. flock/themes/andromeda.toml +1 -1
  178. flock/themes/apple-system-colors.toml +1 -1
  179. flock/themes/arcoiris.toml +1 -1
  180. flock/themes/atomonelight.toml +1 -1
  181. flock/themes/ayu copy.toml +1 -1
  182. flock/themes/ayu-light.toml +1 -1
  183. flock/themes/belafonte-day.toml +1 -1
  184. flock/themes/belafonte-night.toml +1 -1
  185. flock/themes/blulocodark.toml +1 -1
  186. flock/themes/breeze.toml +1 -1
  187. flock/themes/broadcast.toml +1 -1
  188. flock/themes/brogrammer.toml +1 -1
  189. flock/themes/builtin-dark.toml +1 -1
  190. flock/themes/builtin-pastel-dark.toml +1 -1
  191. flock/themes/catppuccin-latte.toml +1 -1
  192. flock/themes/catppuccin-macchiato.toml +1 -1
  193. flock/themes/catppuccin-mocha.toml +1 -1
  194. flock/themes/cga.toml +1 -1
  195. flock/themes/chalk.toml +1 -1
  196. flock/themes/ciapre.toml +1 -1
  197. flock/themes/coffee-theme.toml +1 -1
  198. flock/themes/cyberpunkscarletprotocol.toml +1 -1
  199. flock/themes/dark+.toml +1 -1
  200. flock/themes/darkermatrix.toml +1 -1
  201. flock/themes/darkmatrix.toml +2 -2
  202. flock/themes/darkside.toml +1 -1
  203. flock/themes/deep.toml +2 -2
  204. flock/themes/desert.toml +1 -1
  205. flock/themes/django.toml +1 -1
  206. flock/themes/djangosmooth.toml +1 -1
  207. flock/themes/doomone.toml +1 -1
  208. flock/themes/dotgov.toml +1 -1
  209. flock/themes/dracula+.toml +1 -1
  210. flock/themes/duckbones.toml +1 -1
  211. flock/themes/encom.toml +1 -1
  212. flock/themes/espresso.toml +1 -1
  213. flock/themes/everblush.toml +1 -1
  214. flock/themes/fairyfloss.toml +1 -1
  215. flock/themes/fideloper.toml +1 -1
  216. flock/themes/fishtank.toml +1 -1
  217. flock/themes/flexoki-light.toml +1 -1
  218. flock/themes/floraverse.toml +1 -1
  219. flock/themes/framer.toml +1 -1
  220. flock/themes/galizur.toml +1 -1
  221. flock/themes/github.toml +1 -1
  222. flock/themes/grass.toml +1 -1
  223. flock/themes/grey-green.toml +1 -1
  224. flock/themes/gruvboxlight.toml +1 -1
  225. flock/themes/guezwhoz.toml +1 -1
  226. flock/themes/harper.toml +1 -1
  227. flock/themes/hax0r-blue.toml +1 -1
  228. flock/themes/hopscotch.256.toml +1 -1
  229. flock/themes/ic-green-ppl.toml +1 -1
  230. flock/themes/iceberg-dark.toml +1 -1
  231. flock/themes/japanesque.toml +1 -1
  232. flock/themes/jubi.toml +1 -1
  233. flock/themes/kibble.toml +1 -1
  234. flock/themes/kolorit.toml +1 -1
  235. flock/themes/kurokula.toml +1 -1
  236. flock/themes/materialdesigncolors.toml +1 -1
  237. flock/themes/matrix.toml +1 -1
  238. flock/themes/mellifluous.toml +1 -1
  239. flock/themes/midnight-in-mojave.toml +1 -1
  240. flock/themes/monokai-remastered.toml +1 -1
  241. flock/themes/monokai-soda.toml +1 -1
  242. flock/themes/neon.toml +1 -1
  243. flock/themes/neopolitan.toml +5 -5
  244. flock/themes/nord-light.toml +1 -1
  245. flock/themes/ocean.toml +1 -1
  246. flock/themes/onehalfdark.toml +1 -1
  247. flock/themes/onehalflight.toml +1 -1
  248. flock/themes/palenighthc.toml +1 -1
  249. flock/themes/paulmillr.toml +1 -1
  250. flock/themes/pencildark.toml +1 -1
  251. flock/themes/pnevma.toml +1 -1
  252. flock/themes/purple-rain.toml +1 -1
  253. flock/themes/purplepeter.toml +1 -1
  254. flock/themes/raycast-dark.toml +1 -1
  255. flock/themes/red-sands.toml +1 -1
  256. flock/themes/relaxed.toml +1 -1
  257. flock/themes/retro.toml +1 -1
  258. flock/themes/rose-pine.toml +1 -1
  259. flock/themes/royal.toml +1 -1
  260. flock/themes/ryuuko.toml +1 -1
  261. flock/themes/sakura.toml +1 -1
  262. flock/themes/scarlet-protocol.toml +1 -1
  263. flock/themes/seoulbones-dark.toml +1 -1
  264. flock/themes/shades-of-purple.toml +1 -1
  265. flock/themes/smyck.toml +1 -1
  266. flock/themes/softserver.toml +1 -1
  267. flock/themes/solarized-darcula.toml +1 -1
  268. flock/themes/square.toml +1 -1
  269. flock/themes/sugarplum.toml +1 -1
  270. flock/themes/thayer-bright.toml +1 -1
  271. flock/themes/tokyonight.toml +1 -1
  272. flock/themes/tomorrow.toml +1 -1
  273. flock/themes/ubuntu.toml +1 -1
  274. flock/themes/ultradark.toml +1 -1
  275. flock/themes/ultraviolent.toml +1 -1
  276. flock/themes/unikitty.toml +1 -1
  277. flock/themes/urple.toml +1 -1
  278. flock/themes/vesper.toml +1 -1
  279. flock/themes/vimbones.toml +1 -1
  280. flock/themes/wildcherry.toml +1 -1
  281. flock/themes/wilmersdorf.toml +1 -1
  282. flock/themes/wryan.toml +1 -1
  283. flock/themes/xcodedarkhc.toml +1 -1
  284. flock/themes/xcodelight.toml +1 -1
  285. flock/themes/zenbones-light.toml +1 -1
  286. flock/themes/zenwritten-dark.toml +1 -1
  287. flock/utilities.py +301 -0
  288. flock/utility/output_utility_component.py +226 -0
  289. flock/visibility.py +107 -0
  290. flock_core-0.5.0.dist-info/METADATA +964 -0
  291. flock_core-0.5.0.dist-info/RECORD +525 -0
  292. flock_core-0.5.0.dist-info/entry_points.txt +2 -0
  293. {flock_core-0.4.543.dist-info → flock_core-0.5.0.dist-info}/licenses/LICENSE +1 -1
  294. flock/adapter/__init__.py +0 -14
  295. flock/adapter/azure_adapter.py +0 -68
  296. flock/adapter/chroma_adapter.py +0 -73
  297. flock/adapter/faiss_adapter.py +0 -97
  298. flock/adapter/pinecone_adapter.py +0 -51
  299. flock/adapter/vector_base.py +0 -47
  300. flock/cli/assets/release_notes.md +0 -140
  301. flock/cli/config.py +0 -8
  302. flock/cli/constants.py +0 -36
  303. flock/cli/create_agent.py +0 -1
  304. flock/cli/create_flock.py +0 -280
  305. flock/cli/execute_flock.py +0 -620
  306. flock/cli/load_agent.py +0 -1
  307. flock/cli/load_examples.py +0 -1
  308. flock/cli/load_flock.py +0 -192
  309. flock/cli/load_release_notes.py +0 -20
  310. flock/cli/loaded_flock_cli.py +0 -254
  311. flock/cli/manage_agents.py +0 -459
  312. flock/cli/registry_management.py +0 -889
  313. flock/cli/runner.py +0 -41
  314. flock/cli/settings.py +0 -857
  315. flock/cli/utils.py +0 -135
  316. flock/cli/view_results.py +0 -29
  317. flock/cli/yaml_editor.py +0 -396
  318. flock/config.py +0 -56
  319. flock/core/__init__.py +0 -44
  320. flock/core/api/__init__.py +0 -10
  321. flock/core/api/custom_endpoint.py +0 -45
  322. flock/core/api/endpoints.py +0 -262
  323. flock/core/api/main.py +0 -162
  324. flock/core/api/models.py +0 -101
  325. flock/core/api/run_store.py +0 -224
  326. flock/core/api/runner.py +0 -44
  327. flock/core/api/service.py +0 -214
  328. flock/core/config/flock_agent_config.py +0 -11
  329. flock/core/config/scheduled_agent_config.py +0 -40
  330. flock/core/context/context.py +0 -214
  331. flock/core/context/context_manager.py +0 -40
  332. flock/core/context/context_vars.py +0 -11
  333. flock/core/evaluation/utils.py +0 -395
  334. flock/core/execution/batch_executor.py +0 -369
  335. flock/core/execution/evaluation_executor.py +0 -438
  336. flock/core/execution/local_executor.py +0 -31
  337. flock/core/execution/opik_executor.py +0 -103
  338. flock/core/execution/temporal_executor.py +0 -166
  339. flock/core/flock.py +0 -1003
  340. flock/core/flock_agent.py +0 -1258
  341. flock/core/flock_evaluator.py +0 -60
  342. flock/core/flock_factory.py +0 -513
  343. flock/core/flock_module.py +0 -207
  344. flock/core/flock_registry.py +0 -702
  345. flock/core/flock_router.py +0 -83
  346. flock/core/flock_scheduler.py +0 -166
  347. flock/core/flock_server_manager.py +0 -136
  348. flock/core/interpreter/python_interpreter.py +0 -689
  349. flock/core/logging/live_capture.py +0 -137
  350. flock/core/logging/trace_and_logged.py +0 -59
  351. flock/core/mcp/__init__.py +0 -1
  352. flock/core/mcp/flock_mcp_server.py +0 -640
  353. flock/core/mcp/mcp_client_manager.py +0 -201
  354. flock/core/mcp/types/__init__.py +0 -1
  355. flock/core/mixin/dspy_integration.py +0 -445
  356. flock/core/mixin/prompt_parser.py +0 -125
  357. flock/core/serialization/__init__.py +0 -13
  358. flock/core/serialization/callable_registry.py +0 -52
  359. flock/core/serialization/flock_serializer.py +0 -854
  360. flock/core/serialization/json_encoder.py +0 -41
  361. flock/core/serialization/secure_serializer.py +0 -175
  362. flock/core/serialization/serializable.py +0 -342
  363. flock/core/serialization/serialization_utils.py +0 -409
  364. flock/core/util/file_path_utils.py +0 -223
  365. flock/core/util/hydrator.py +0 -309
  366. flock/core/util/input_resolver.py +0 -141
  367. flock/core/util/loader.py +0 -59
  368. flock/core/util/splitter.py +0 -219
  369. flock/di.py +0 -41
  370. flock/evaluators/__init__.py +0 -1
  371. flock/evaluators/declarative/__init__.py +0 -1
  372. flock/evaluators/declarative/declarative_evaluator.py +0 -217
  373. flock/evaluators/memory/memory_evaluator.py +0 -90
  374. flock/evaluators/test/test_case_evaluator.py +0 -38
  375. flock/evaluators/zep/zep_evaluator.py +0 -59
  376. flock/modules/__init__.py +0 -1
  377. flock/modules/assertion/__init__.py +0 -1
  378. flock/modules/assertion/assertion_module.py +0 -286
  379. flock/modules/callback/__init__.py +0 -1
  380. flock/modules/callback/callback_module.py +0 -91
  381. flock/modules/enterprise_memory/README.md +0 -99
  382. flock/modules/enterprise_memory/enterprise_memory_module.py +0 -526
  383. flock/modules/mem0/__init__.py +0 -1
  384. flock/modules/mem0/mem0_module.py +0 -126
  385. flock/modules/mem0_async/__init__.py +0 -1
  386. flock/modules/mem0_async/async_mem0_module.py +0 -126
  387. flock/modules/memory/__init__.py +0 -1
  388. flock/modules/memory/memory_module.py +0 -429
  389. flock/modules/memory/memory_parser.py +0 -125
  390. flock/modules/memory/memory_storage.py +0 -736
  391. flock/modules/output/__init__.py +0 -1
  392. flock/modules/output/output_module.py +0 -196
  393. flock/modules/performance/__init__.py +0 -1
  394. flock/modules/performance/metrics_module.py +0 -678
  395. flock/modules/zep/__init__.py +0 -1
  396. flock/modules/zep/zep_module.py +0 -192
  397. flock/platform/docker_tools.py +0 -49
  398. flock/platform/jaeger_install.py +0 -86
  399. flock/routers/__init__.py +0 -1
  400. flock/routers/agent/__init__.py +0 -1
  401. flock/routers/agent/agent_router.py +0 -236
  402. flock/routers/agent/handoff_agent.py +0 -58
  403. flock/routers/conditional/conditional_router.py +0 -486
  404. flock/routers/default/__init__.py +0 -1
  405. flock/routers/default/default_router.py +0 -80
  406. flock/routers/feedback/feedback_router.py +0 -114
  407. flock/routers/list_generator/list_generator_router.py +0 -166
  408. flock/routers/llm/__init__.py +0 -1
  409. flock/routers/llm/llm_router.py +0 -365
  410. flock/tools/__init__.py +0 -0
  411. flock/tools/azure_tools.py +0 -781
  412. flock/tools/code_tools.py +0 -167
  413. flock/tools/file_tools.py +0 -149
  414. flock/tools/github_tools.py +0 -157
  415. flock/tools/markdown_tools.py +0 -205
  416. flock/tools/system_tools.py +0 -9
  417. flock/tools/text_tools.py +0 -810
  418. flock/tools/web_tools.py +0 -92
  419. flock/tools/zendesk_tools.py +0 -501
  420. flock/webapp/__init__.py +0 -1
  421. flock/webapp/app/__init__.py +0 -0
  422. flock/webapp/app/api/__init__.py +0 -0
  423. flock/webapp/app/api/agent_management.py +0 -237
  424. flock/webapp/app/api/execution.py +0 -503
  425. flock/webapp/app/api/flock_management.py +0 -125
  426. flock/webapp/app/api/registry_viewer.py +0 -29
  427. flock/webapp/app/chat.py +0 -662
  428. flock/webapp/app/config.py +0 -104
  429. flock/webapp/app/dependencies.py +0 -117
  430. flock/webapp/app/main.py +0 -1086
  431. flock/webapp/app/middleware.py +0 -113
  432. flock/webapp/app/models_ui.py +0 -7
  433. flock/webapp/app/services/__init__.py +0 -0
  434. flock/webapp/app/services/feedback_file_service.py +0 -363
  435. flock/webapp/app/services/flock_service.py +0 -345
  436. flock/webapp/app/services/sharing_models.py +0 -81
  437. flock/webapp/app/services/sharing_store.py +0 -597
  438. flock/webapp/app/templates/theme_mapper.html +0 -326
  439. flock/webapp/app/theme_mapper.py +0 -811
  440. flock/webapp/app/utils.py +0 -85
  441. flock/webapp/run.py +0 -219
  442. flock/webapp/static/css/chat.css +0 -301
  443. flock/webapp/static/css/components.css +0 -167
  444. flock/webapp/static/css/header.css +0 -39
  445. flock/webapp/static/css/layout.css +0 -281
  446. flock/webapp/static/css/sidebar.css +0 -127
  447. flock/webapp/static/css/two-pane.css +0 -48
  448. flock/webapp/templates/base.html +0 -389
  449. flock/webapp/templates/chat.html +0 -152
  450. flock/webapp/templates/chat_settings.html +0 -19
  451. flock/webapp/templates/flock_editor.html +0 -16
  452. flock/webapp/templates/index.html +0 -12
  453. flock/webapp/templates/partials/_agent_detail_form.html +0 -93
  454. flock/webapp/templates/partials/_agent_list.html +0 -18
  455. flock/webapp/templates/partials/_agent_manager_view.html +0 -51
  456. flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
  457. flock/webapp/templates/partials/_chat_container.html +0 -15
  458. flock/webapp/templates/partials/_chat_messages.html +0 -57
  459. flock/webapp/templates/partials/_chat_settings_form.html +0 -85
  460. flock/webapp/templates/partials/_create_flock_form.html +0 -50
  461. flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
  462. flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
  463. flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
  464. flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
  465. flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
  466. flock/webapp/templates/partials/_env_vars_table.html +0 -23
  467. flock/webapp/templates/partials/_execution_form.html +0 -127
  468. flock/webapp/templates/partials/_execution_view_container.html +0 -28
  469. flock/webapp/templates/partials/_flock_file_list.html +0 -23
  470. flock/webapp/templates/partials/_flock_properties_form.html +0 -52
  471. flock/webapp/templates/partials/_flock_upload_form.html +0 -16
  472. flock/webapp/templates/partials/_header_flock_status.html +0 -5
  473. flock/webapp/templates/partials/_live_logs.html +0 -13
  474. flock/webapp/templates/partials/_load_manager_view.html +0 -49
  475. flock/webapp/templates/partials/_registry_table.html +0 -25
  476. flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
  477. flock/webapp/templates/partials/_results_display.html +0 -78
  478. flock/webapp/templates/partials/_settings_env_content.html +0 -9
  479. flock/webapp/templates/partials/_settings_theme_content.html +0 -14
  480. flock/webapp/templates/partials/_settings_view.html +0 -36
  481. flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
  482. flock/webapp/templates/partials/_share_link_snippet.html +0 -35
  483. flock/webapp/templates/partials/_sidebar.html +0 -74
  484. flock/webapp/templates/partials/_structured_data_view.html +0 -40
  485. flock/webapp/templates/partials/_theme_preview.html +0 -36
  486. flock/webapp/templates/registry_viewer.html +0 -84
  487. flock/webapp/templates/shared_run_page.html +0 -140
  488. flock/workflow/__init__.py +0 -0
  489. flock/workflow/activities.py +0 -237
  490. flock/workflow/agent_activities.py +0 -24
  491. flock/workflow/agent_execution_activity.py +0 -240
  492. flock/workflow/flock_workflow.py +0 -225
  493. flock/workflow/temporal_config.py +0 -96
  494. flock/workflow/temporal_setup.py +0 -60
  495. flock_core-0.4.543.dist-info/METADATA +0 -676
  496. flock_core-0.4.543.dist-info/RECORD +0 -572
  497. flock_core-0.4.543.dist-info/entry_points.txt +0 -2
  498. /flock/{core/logging → logging}/formatters/themes.py +0 -0
  499. /flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
  500. /flock/{core/mcp → mcp}/util/__init__.py +0 -0
  501. {flock_core-0.4.543.dist-info → flock_core-0.5.0.dist-info}/WHEEL +0 -0
@@ -1,438 +0,0 @@
1
- # src/flock/core/execution/evaluation_processor.py
2
- """Contains the EvaluationProcessor class responsible for evaluating Flock agents
3
- against datasets using various metrics.
4
- """
5
-
6
- import asyncio
7
- import json
8
- from collections.abc import Callable
9
- from pathlib import Path
10
- from typing import (
11
- TYPE_CHECKING,
12
- Any,
13
- Literal,
14
- Union,
15
- )
16
-
17
- from pandas import DataFrame
18
-
19
- # Conditional pandas import
20
- try:
21
- import pandas as pd
22
-
23
- PANDAS_AVAILABLE = True
24
- except ImportError:
25
- pd = None # type: ignore
26
- PANDAS_AVAILABLE = False
27
-
28
- # Box for results
29
- from box import Box
30
- from datasets import Dataset as HFDataset
31
-
32
- from flock.core.evaluation.utils import (
33
- aggregate_results,
34
- calculate_evaluation_metrics,
35
- extract_value_by_dot_notation,
36
- normalize_dataset,
37
- # Import metric calculation/aggregation helpers
38
- )
39
-
40
- # Flock core imports
41
- from flock.core.logging.logging import get_logger
42
-
43
- if TYPE_CHECKING:
44
- from flock.core.flock import Flock
45
- from flock.core.flock_agent import FlockAgent
46
- from flock.core.flock_evaluator import FlockEvaluator
47
- # Conditional types
48
-
49
-
50
- logger = get_logger("execution.evaluation")
51
-
52
-
53
- class EvaluationExecutor:
54
- """Handles the evaluation of Flock agents against datasets."""
55
-
56
- def __init__(self, flock_instance: "Flock"):
57
- """Initializes the EvaluationProcessor.
58
-
59
- Args:
60
- flock_instance: The Flock instance this processor will use.
61
- """
62
- self.flock = flock_instance
63
-
64
- async def evaluate_async(
65
- self,
66
- dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
67
- start_agent: Union["FlockAgent", str],
68
- input_mapping: dict[str, str],
69
- answer_mapping: dict[str, str],
70
- metrics: list[
71
- Union[
72
- str,
73
- Callable[[Any, Any], bool | float | dict[str, Any]],
74
- "FlockAgent",
75
- "FlockEvaluator",
76
- ]
77
- ],
78
- metric_configs: dict[str, dict[str, Any]] | None = None,
79
- static_inputs: dict[str, Any] | None = None,
80
- parallel: bool = True,
81
- max_workers: int = 5,
82
- use_temporal: bool | None = None,
83
- error_handling: Literal["raise", "skip", "log"] = "log",
84
- output_file: str | Path | None = None,
85
- return_dataframe: bool = True,
86
- silent_mode: bool = False,
87
- metadata_columns: list[str] | None = None, # Columns to pass through
88
- # dataset_split: Optional[str] = None # TODO: Add split support in normalize_dataset
89
- ) -> DataFrame | list[dict[str, Any]]:
90
- """Evaluates the Flock's performance against a dataset asynchronously."""
91
- effective_use_temporal = (
92
- use_temporal
93
- if use_temporal is not None
94
- else self.flock.enable_temporal
95
- )
96
- exec_mode = (
97
- "Temporal"
98
- if effective_use_temporal
99
- else ("Parallel Local" if parallel else "Sequential Local")
100
- )
101
- start_agent_name = (
102
- start_agent.name if hasattr(start_agent, "name") else start_agent
103
- )
104
- logger.info(
105
- f"Starting evaluation for agent '{start_agent_name}'. Execution: {exec_mode}, Silent: {silent_mode}"
106
- )
107
-
108
- # --- 1. Normalize Dataset ---
109
- try:
110
- df = normalize_dataset(dataset) # Uses helper
111
- if df is None or df.empty:
112
- raise ValueError(
113
- "Provided dataset is empty or could not be normalized."
114
- )
115
- logger.info(f"Normalized dataset with {len(df)} items.")
116
- except Exception as e:
117
- logger.error(
118
- f"Failed to load or normalize dataset: {e}", exc_info=True
119
- )
120
- raise ValueError(f"Dataset processing failed: {e}") from e
121
-
122
- # --- 2. Prepare Batch Items ---
123
- batch_items = []
124
- required_input_cols = list(input_mapping.keys())
125
- required_answer_cols = list(answer_mapping.values())
126
- required_metadata_cols = metadata_columns or []
127
- all_required_cols = set(
128
- required_input_cols + required_answer_cols + required_metadata_cols
129
- )
130
-
131
- missing_cols = all_required_cols - set(df.columns)
132
- if missing_cols:
133
- raise ValueError(
134
- f"Dataset missing required columns: {', '.join(missing_cols)}"
135
- )
136
-
137
- for index, row in df.iterrows():
138
- agent_input = {
139
- agent_key: row[df_col]
140
- for df_col, agent_key in input_mapping.items()
141
- }
142
- expected_answers = {
143
- agent_out_key: row[answer_col]
144
- for agent_out_key, answer_col in answer_mapping.items()
145
- }
146
- metadata = {col: row[col] for col in required_metadata_cols}
147
- batch_items.append(
148
- {
149
- "_original_index": index, # Store original DF index
150
- "_agent_input": agent_input,
151
- "_expected_answers": expected_answers,
152
- "_metadata": metadata,
153
- }
154
- )
155
-
156
- if not batch_items:
157
- logger.warning("No items prepared for evaluation.")
158
- return pd.DataFrame() if return_dataframe else []
159
-
160
- # --- 3. Execute Workers ---
161
- results_dict = {} # Store results keyed by original index
162
- tasks = []
163
- semaphore = asyncio.Semaphore(
164
- max_workers if parallel and not effective_use_temporal else 1
165
- )
166
-
167
- # --- Worker Function ---
168
- async def evaluate_worker(item_index: int, item_data: dict[str, Any]):
169
- nonlocal results_dict
170
- original_index = item_data["_original_index"]
171
- item_result_details = {
172
- "index": original_index, # Use original index in result
173
- "inputs": item_data["_agent_input"],
174
- "expected_answers": item_data["_expected_answers"],
175
- "agent_output": None,
176
- "metrics": {},
177
- "error": None,
178
- **(item_data["_metadata"]), # Include pass-through metadata
179
- }
180
- agent_inputs_with_static = {
181
- **(static_inputs or {}),
182
- **item_data["_agent_input"],
183
- }
184
-
185
- async with semaphore: # Acquire semaphore
186
- run_desc = f"Evaluation item (original index: {original_index})"
187
- logger.debug(f"{run_desc} starting.")
188
- try:
189
- # Run the agent/flock for this item
190
- agent_output = await self.flock.run_async(
191
- start_agent=start_agent, # Name or instance
192
- input=agent_inputs_with_static,
193
- box_result=True, # Use Box for easier access via dot notation
194
- # context=... # Assuming isolated context for now
195
- )
196
- item_result_details["agent_output"] = (
197
- agent_output # Store Box or dict
198
- )
199
-
200
- # Extract predicted values based on answer_mapping
201
- predicted_answers = {}
202
- for agent_out_key in answer_mapping:
203
- # Use helper to handle dot notation
204
- predicted_answers[agent_out_key] = (
205
- extract_value_by_dot_notation(
206
- agent_output, agent_out_key
207
- )
208
- )
209
-
210
- # Calculate metrics using helper
211
- item_result_details["metrics"] = (
212
- calculate_evaluation_metrics(
213
- metrics=metrics,
214
- metric_configs=metric_configs or {},
215
- predicted_answers=predicted_answers,
216
- expected_answers=item_data["_expected_answers"],
217
- agent_inputs=agent_inputs_with_static, # Pass context if needed
218
- agent_output=agent_output, # Pass context if needed
219
- )
220
- )
221
- logger.debug(f"{run_desc} finished successfully.")
222
-
223
- except Exception as e:
224
- logger.warning(
225
- f"Error processing item {original_index}: {e}"
226
- )
227
- item_result_details["error"] = str(e)
228
- if error_handling == "raise":
229
- raise # Re-raise to stop processing (if parallel, stops gather)
230
- elif error_handling == "skip":
231
- item_result_details["_skip"] = (
232
- True # Mark for filtering
233
- )
234
-
235
- # Store result associated with original index
236
- results_dict[original_index] = item_result_details
237
-
238
- # Update progress bar if applicable (inside the worker is okay)
239
- if progress_context:
240
- progress.update(progress_task_id, advance=1)
241
-
242
- # --- Setup Progress Bar if Silent ---
243
- progress_context = None
244
- progress_task_id = None
245
- if silent_mode:
246
- from rich.progress import (
247
- BarColumn,
248
- Progress,
249
- SpinnerColumn,
250
- TextColumn,
251
- TimeElapsedColumn,
252
- )
253
-
254
- progress = Progress(
255
- SpinnerColumn(),
256
- TextColumn("[progress.description]{task.description}"),
257
- BarColumn(),
258
- TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
259
- TextColumn("({task.completed}/{task.total})"),
260
- TimeElapsedColumn(),
261
- )
262
- progress_context = progress
263
- progress_task_id = progress.add_task(
264
- f"Evaluating {len(batch_items)} items...",
265
- total=len(batch_items),
266
- )
267
- progress.start()
268
-
269
- # --- Execute Tasks ---
270
- try:
271
- if effective_use_temporal:
272
- # TODO: Implement parallel Temporal evaluation
273
- logger.info(
274
- "Running evaluation using Temporal (executing sequentially for now)..."
275
- )
276
- for i, item_data in enumerate(batch_items):
277
- await evaluate_worker(i, item_data) # Pass sequential index
278
- elif parallel:
279
- logger.info(
280
- f"Running evaluation in parallel with max_workers={max_workers}..."
281
- )
282
- for i, item_data in enumerate(batch_items):
283
- # Pass sequential index i, and the item_data which contains original_index
284
- tasks.append(
285
- asyncio.create_task(evaluate_worker(i, item_data))
286
- )
287
- await asyncio.gather(*tasks)
288
- else: # Sequential Local
289
- logger.info("Running evaluation sequentially...")
290
- for i, item_data in enumerate(batch_items):
291
- await evaluate_worker(i, item_data)
292
-
293
- logger.info("Evaluation execution finished.")
294
-
295
- except Exception as batch_error:
296
- logger.error(
297
- f"Evaluation stopped due to an error in one of the items: {batch_error}"
298
- )
299
- if (
300
- not error_handling == "skip"
301
- ): # If skipping, we continue; otherwise, re-raise if required
302
- if error_handling == "raise":
303
- raise
304
- finally:
305
- if progress_context:
306
- progress.stop()
307
-
308
- # --- 4. Process Results ---
309
- # Reconstruct results list based on original order and filtering
310
- final_results_list = []
311
- for idx in df.index: # Iterate through original DataFrame index
312
- res = results_dict.get(idx)
313
- if res:
314
- if error_handling == "skip" and res.get("_skip"):
315
- continue # Skip items marked for skipping
316
- # Remove internal skip flag if present
317
- res.pop("_skip", None)
318
- final_results_list.append(res)
319
-
320
- # Calculate aggregate summary using helper
321
- summary = aggregate_results(final_results_list)
322
- logger.info(
323
- "Evaluation Summary:", extra=summary
324
- ) # Log summary automatically
325
-
326
- # --- 5. Save and Return ---
327
- if output_file:
328
- output_path = Path(output_file)
329
- output_path.parent.mkdir(parents=True, exist_ok=True)
330
- try:
331
- results_df = pd.DataFrame(final_results_list)
332
- # Handle complex objects before saving
333
- if "agent_output" in results_df.columns:
334
- results_df["agent_output"] = results_df[
335
- "agent_output"
336
- ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
337
- if (
338
- "expected_answers" in results_df.columns
339
- ): # Flatten dicts for CSV
340
- results_df = pd.concat(
341
- [
342
- results_df.drop(["expected_answers"], axis=1),
343
- pd.json_normalize(
344
- results_df["expected_answers"]
345
- ).add_prefix("expected_"),
346
- ],
347
- axis=1,
348
- )
349
- if "metrics" in results_df.columns: # Flatten dicts for CSV
350
- results_df = pd.concat(
351
- [
352
- results_df.drop(["metrics"], axis=1),
353
- pd.json_normalize(results_df["metrics"]).add_prefix(
354
- "metric_"
355
- ),
356
- ],
357
- axis=1,
358
- )
359
- if "inputs" in results_df.columns: # Flatten dicts for CSV
360
- results_df = pd.concat(
361
- [
362
- results_df.drop(["inputs"], axis=1),
363
- pd.json_normalize(results_df["inputs"]).add_prefix(
364
- "input_"
365
- ),
366
- ],
367
- axis=1,
368
- )
369
-
370
- # Convert lists/dicts in metadata columns for CSV saving
371
- for col in metadata_columns or []:
372
- if col in results_df.columns:
373
- # Check if column contains lists/dicts before converting
374
- if (
375
- results_df[col]
376
- .apply(lambda x: isinstance(x, (list, dict)))
377
- .any()
378
- ):
379
- results_df[col] = results_df[col].apply(json.dumps)
380
-
381
- if output_path.suffix.lower() == ".csv":
382
- results_df.to_csv(output_path, index=False)
383
- elif output_path.suffix.lower() == ".json":
384
- # Save list of dicts directly (before potential DataFrame manipulation)
385
- # Need to handle non-serializable types like Box
386
- serializable_results = []
387
- for res_dict in final_results_list:
388
- if "agent_output" in res_dict and isinstance(
389
- res_dict["agent_output"], Box
390
- ):
391
- res_dict["agent_output"] = res_dict[
392
- "agent_output"
393
- ].to_dict()
394
- serializable_results.append(res_dict)
395
- with open(output_path, "w", encoding="utf-8") as f:
396
- json.dump(
397
- serializable_results, f, indent=2, default=str
398
- ) # Use default=str for safety
399
- else:
400
- logger.warning(
401
- f"Unsupported output file format: {output_path.suffix}. Use .csv or .json."
402
- )
403
- logger.info(
404
- f"Detailed evaluation results saved to {output_path}"
405
- )
406
- except Exception as e:
407
- logger.error(
408
- f"Failed to save evaluation results to {output_file}: {e}",
409
- exc_info=True,
410
- )
411
-
412
- if return_dataframe:
413
- if not PANDAS_AVAILABLE:
414
- logger.error(
415
- "Cannot return DataFrame: pandas library not installed."
416
- )
417
- return final_results_list # Fallback to list
418
- # Ensure DataFrame is created if not done for saving
419
- if "results_df" not in locals():
420
- results_df = pd.DataFrame(final_results_list)
421
- # Convert Box if needed
422
- if "agent_output" in results_df.columns:
423
- results_df["agent_output"] = results_df[
424
- "agent_output"
425
- ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
426
- return results_df
427
- else:
428
- # Ensure Box objects are converted if returning list
429
- final_list = []
430
- for res_dict in final_results_list:
431
- if "agent_output" in res_dict and isinstance(
432
- res_dict["agent_output"], Box
433
- ):
434
- res_dict["agent_output"] = res_dict[
435
- "agent_output"
436
- ].to_dict()
437
- final_list.append(res_dict)
438
- return final_list
@@ -1,31 +0,0 @@
1
- # src/your_package/core/execution/local_executor.py
2
- from flock.core.context.context import FlockContext
3
- from flock.core.logging.logging import get_logger
4
- from flock.workflow.activities import (
5
- run_agent, # This should be the local activity function
6
- )
7
-
8
- logger = get_logger("flock")
9
-
10
-
11
- async def run_local_workflow(
12
- context: FlockContext, box_result: bool = True
13
- ) -> dict:
14
- """Execute the agent workflow locally (for debugging).
15
-
16
- Args:
17
- context: The FlockContext instance with state and history.
18
- output_formatter: Formatter options for displaying results.
19
- box_result: If True, wraps the result in a Box for nicer display.
20
-
21
- Returns:
22
- A dictionary containing the workflow result.
23
- """
24
- logger.info("Running local debug workflow")
25
- result = await run_agent(context)
26
- if box_result:
27
- from box import Box
28
-
29
- logger.debug("Boxing result")
30
- return Box(result)
31
- return result
@@ -1,103 +0,0 @@
1
- # src/flock/core/execution/evaluation_processor.py
2
- """Contains the EvaluationProcessor class responsible for evaluating Flock agents
3
- against datasets using various metrics.
4
- """
5
-
6
- from pathlib import Path
7
- from typing import (
8
- TYPE_CHECKING,
9
- Any,
10
- Union,
11
- )
12
-
13
- from opik import Opik
14
- from pandas import DataFrame
15
-
16
- # Conditional pandas import
17
- try:
18
- import pandas as pd
19
-
20
- PANDAS_AVAILABLE = True
21
- except ImportError:
22
- pd = None # type: ignore
23
- PANDAS_AVAILABLE = False
24
-
25
- # Box for results
26
- from datasets import Dataset as HFDataset
27
-
28
- from flock.core.evaluation.utils import (
29
- normalize_dataset,
30
- # Import metric calculation/aggregation helpers
31
- )
32
-
33
- # Flock core imports
34
- from flock.core.logging.logging import get_logger
35
-
36
- if TYPE_CHECKING:
37
- from flock.core.flock import Flock
38
- from flock.core.flock_agent import FlockAgent
39
- # Conditional types
40
-
41
-
42
- logger = get_logger("execution.opik")
43
-
44
-
45
- class OpikExecutor:
46
- """Handles the evaluation of Flock agents against datasets."""
47
-
48
- def __init__(self, flock_instance: "Flock"):
49
- """Initializes the EvaluationProcessor.
50
-
51
- Args:
52
- flock_instance: The Flock instance this processor will use.
53
- """
54
- self.flock = flock_instance
55
-
56
- async def evaluate_with_opik(
57
- self,
58
- dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
59
- start_agent: Union["FlockAgent", str],
60
- input_mapping: dict[str, str],
61
- answer_mapping: dict[str, str],) -> DataFrame | list[dict[str, Any]]:
62
- """Evaluates the Flock's performance against a dataset asynchronously."""
63
- logger.info(f"Evaluating Flock's performance against dataset: {dataset}")
64
-
65
- # Evaluation task
66
- def evaluation_task(dataset_item):
67
- flock_result = self.flock.run(start_agent=start_agent, input=dataset_item, box_result=False)
68
-
69
- result = {
70
- "input": dataset_item.get("test"),
71
- "output": flock_result.get("answer"),
72
- "context": ["placeholder string"]
73
- }
74
-
75
- return result
76
-
77
- start_agent_name = (
78
- start_agent.name if hasattr(start_agent, "name") else start_agent
79
- )
80
- dataset_name = str(dataset)
81
-
82
- # --- 1. Normalize Dataset ---
83
- try:
84
- df = normalize_dataset(dataset) # Uses helper
85
- if df is None or df.empty:
86
- raise ValueError(
87
- "Provided dataset is empty or could not be normalized."
88
- )
89
- logger.info(f"Normalized dataset with {len(df)} items.")
90
- except Exception as e:
91
- logger.error(
92
- f"Failed to load or normalize dataset: {e}", exc_info=True
93
- )
94
- raise ValueError(f"Dataset processing failed: {e}") from e
95
-
96
- logger.info(f"type(df): {type(df)}") # ➜ <class 'pandas.core.frame.DataFrame'>
97
- logger.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
98
- logger.info(f"df['split'].value_counts(): {df['split'].value_counts()}")
99
- logger.info(f"df['config'].unique(): {df['config'].unique()}")
100
- client = Opik()
101
- dataset = client.get_or_create_dataset(name=dataset_name)
102
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
103
- logger.info(f"Imported dataset to Opik")