flock-core 0.4.542__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (501) hide show
  1. flock/__init__.py +12 -217
  2. flock/agent.py +1079 -0
  3. flock/api/themes.py +71 -0
  4. flock/artifacts.py +86 -0
  5. flock/cli.py +147 -0
  6. flock/components.py +189 -0
  7. flock/dashboard/__init__.py +30 -0
  8. flock/dashboard/collector.py +559 -0
  9. flock/dashboard/events.py +188 -0
  10. flock/dashboard/graph_builder.py +563 -0
  11. flock/dashboard/launcher.py +235 -0
  12. flock/dashboard/models/graph.py +156 -0
  13. flock/dashboard/service.py +991 -0
  14. flock/dashboard/static_v2/assets/index-DFRnI_mt.js +111 -0
  15. flock/dashboard/static_v2/assets/index-fPLNdmp1.css +1 -0
  16. flock/dashboard/static_v2/index.html +13 -0
  17. flock/dashboard/websocket.py +246 -0
  18. flock/engines/__init__.py +6 -0
  19. flock/engines/dspy_engine.py +932 -0
  20. flock/examples.py +131 -0
  21. flock/frontend/README.md +778 -0
  22. flock/frontend/docs/DESIGN_SYSTEM.md +1980 -0
  23. flock/frontend/index.html +12 -0
  24. flock/frontend/package-lock.json +4337 -0
  25. flock/frontend/package.json +48 -0
  26. flock/frontend/src/App.tsx +139 -0
  27. flock/frontend/src/__tests__/integration/graph-snapshot.test.tsx +647 -0
  28. flock/frontend/src/__tests__/integration/indexeddb-persistence.test.tsx +699 -0
  29. flock/frontend/src/components/common/BuildInfo.tsx +39 -0
  30. flock/frontend/src/components/common/EmptyState.module.css +115 -0
  31. flock/frontend/src/components/common/EmptyState.tsx +128 -0
  32. flock/frontend/src/components/common/ErrorBoundary.module.css +169 -0
  33. flock/frontend/src/components/common/ErrorBoundary.tsx +118 -0
  34. flock/frontend/src/components/common/KeyboardShortcutsDialog.css +251 -0
  35. flock/frontend/src/components/common/KeyboardShortcutsDialog.tsx +151 -0
  36. flock/frontend/src/components/common/LoadingSpinner.module.css +97 -0
  37. flock/frontend/src/components/common/LoadingSpinner.tsx +29 -0
  38. flock/frontend/src/components/controls/PublishControl.css +547 -0
  39. flock/frontend/src/components/controls/PublishControl.test.tsx +543 -0
  40. flock/frontend/src/components/controls/PublishControl.tsx +432 -0
  41. flock/frontend/src/components/details/DetailWindowContainer.tsx +58 -0
  42. flock/frontend/src/components/details/LiveOutputTab.test.tsx +792 -0
  43. flock/frontend/src/components/details/LiveOutputTab.tsx +220 -0
  44. flock/frontend/src/components/details/MessageDetailWindow.tsx +439 -0
  45. flock/frontend/src/components/details/MessageHistoryTab.tsx +374 -0
  46. flock/frontend/src/components/details/NodeDetailWindow.test.tsx +501 -0
  47. flock/frontend/src/components/details/NodeDetailWindow.tsx +218 -0
  48. flock/frontend/src/components/details/RunStatusTab.tsx +348 -0
  49. flock/frontend/src/components/details/tabs.test.tsx +1015 -0
  50. flock/frontend/src/components/filters/ArtifactTypeFilter.tsx +21 -0
  51. flock/frontend/src/components/filters/CorrelationIDFilter.module.css +102 -0
  52. flock/frontend/src/components/filters/CorrelationIDFilter.test.tsx +197 -0
  53. flock/frontend/src/components/filters/CorrelationIDFilter.tsx +121 -0
  54. flock/frontend/src/components/filters/FilterFlyout.module.css +104 -0
  55. flock/frontend/src/components/filters/FilterFlyout.tsx +80 -0
  56. flock/frontend/src/components/filters/FilterPills.module.css +220 -0
  57. flock/frontend/src/components/filters/FilterPills.test.tsx +189 -0
  58. flock/frontend/src/components/filters/FilterPills.tsx +143 -0
  59. flock/frontend/src/components/filters/ProducerFilter.tsx +21 -0
  60. flock/frontend/src/components/filters/SavedFiltersControl.module.css +60 -0
  61. flock/frontend/src/components/filters/SavedFiltersControl.test.tsx +158 -0
  62. flock/frontend/src/components/filters/SavedFiltersControl.tsx +159 -0
  63. flock/frontend/src/components/filters/TagFilter.tsx +21 -0
  64. flock/frontend/src/components/filters/TimeRangeFilter.module.css +115 -0
  65. flock/frontend/src/components/filters/TimeRangeFilter.test.tsx +154 -0
  66. flock/frontend/src/components/filters/TimeRangeFilter.tsx +110 -0
  67. flock/frontend/src/components/filters/VisibilityFilter.tsx +21 -0
  68. flock/frontend/src/components/graph/AgentNode.test.tsx +77 -0
  69. flock/frontend/src/components/graph/AgentNode.tsx +324 -0
  70. flock/frontend/src/components/graph/GraphCanvas.tsx +613 -0
  71. flock/frontend/src/components/graph/MessageFlowEdge.tsx +128 -0
  72. flock/frontend/src/components/graph/MessageNode.test.tsx +64 -0
  73. flock/frontend/src/components/graph/MessageNode.tsx +129 -0
  74. flock/frontend/src/components/graph/MiniMap.tsx +47 -0
  75. flock/frontend/src/components/graph/TransformEdge.tsx +123 -0
  76. flock/frontend/src/components/layout/DashboardLayout.css +420 -0
  77. flock/frontend/src/components/layout/DashboardLayout.tsx +287 -0
  78. flock/frontend/src/components/layout/Header.module.css +88 -0
  79. flock/frontend/src/components/layout/Header.tsx +52 -0
  80. flock/frontend/src/components/modules/HistoricalArtifactsModule.module.css +288 -0
  81. flock/frontend/src/components/modules/HistoricalArtifactsModule.tsx +450 -0
  82. flock/frontend/src/components/modules/HistoricalArtifactsModuleWrapper.tsx +13 -0
  83. flock/frontend/src/components/modules/JsonAttributeRenderer.tsx +140 -0
  84. flock/frontend/src/components/modules/ModuleRegistry.test.ts +333 -0
  85. flock/frontend/src/components/modules/ModuleRegistry.ts +93 -0
  86. flock/frontend/src/components/modules/ModuleWindow.tsx +223 -0
  87. flock/frontend/src/components/modules/TraceModuleJaeger.tsx +1971 -0
  88. flock/frontend/src/components/modules/TraceModuleJaegerWrapper.tsx +13 -0
  89. flock/frontend/src/components/modules/registerModules.ts +29 -0
  90. flock/frontend/src/components/settings/AdvancedSettings.tsx +175 -0
  91. flock/frontend/src/components/settings/AppearanceSettings.tsx +185 -0
  92. flock/frontend/src/components/settings/GraphSettings.tsx +110 -0
  93. flock/frontend/src/components/settings/MultiSelect.tsx +235 -0
  94. flock/frontend/src/components/settings/SettingsPanel.css +327 -0
  95. flock/frontend/src/components/settings/SettingsPanel.tsx +131 -0
  96. flock/frontend/src/components/settings/ThemeSelector.tsx +298 -0
  97. flock/frontend/src/components/settings/TracingSettings.tsx +404 -0
  98. flock/frontend/src/hooks/useKeyboardShortcuts.ts +148 -0
  99. flock/frontend/src/hooks/useModulePersistence.test.ts +442 -0
  100. flock/frontend/src/hooks/useModulePersistence.ts +154 -0
  101. flock/frontend/src/hooks/useModules.ts +157 -0
  102. flock/frontend/src/hooks/usePersistence.ts +141 -0
  103. flock/frontend/src/main.tsx +13 -0
  104. flock/frontend/src/services/api.ts +337 -0
  105. flock/frontend/src/services/graphService.test.ts +330 -0
  106. flock/frontend/src/services/graphService.ts +75 -0
  107. flock/frontend/src/services/indexeddb.test.ts +793 -0
  108. flock/frontend/src/services/indexeddb.ts +848 -0
  109. flock/frontend/src/services/layout.test.ts +437 -0
  110. flock/frontend/src/services/layout.ts +357 -0
  111. flock/frontend/src/services/themeApplicator.ts +140 -0
  112. flock/frontend/src/services/themeService.ts +77 -0
  113. flock/frontend/src/services/websocket.ts +650 -0
  114. flock/frontend/src/store/filterStore.test.ts +250 -0
  115. flock/frontend/src/store/filterStore.ts +272 -0
  116. flock/frontend/src/store/graphStore.test.ts +570 -0
  117. flock/frontend/src/store/graphStore.ts +462 -0
  118. flock/frontend/src/store/moduleStore.test.ts +253 -0
  119. flock/frontend/src/store/moduleStore.ts +75 -0
  120. flock/frontend/src/store/settingsStore.ts +188 -0
  121. flock/frontend/src/store/streamStore.ts +68 -0
  122. flock/frontend/src/store/uiStore.test.ts +54 -0
  123. flock/frontend/src/store/uiStore.ts +122 -0
  124. flock/frontend/src/store/wsStore.ts +34 -0
  125. flock/frontend/src/styles/index.css +15 -0
  126. flock/frontend/src/styles/scrollbar.css +47 -0
  127. flock/frontend/src/styles/variables.css +488 -0
  128. flock/frontend/src/test/setup.ts +1 -0
  129. flock/frontend/src/types/filters.ts +47 -0
  130. flock/frontend/src/types/graph.ts +95 -0
  131. flock/frontend/src/types/modules.ts +10 -0
  132. flock/frontend/src/types/theme.ts +55 -0
  133. flock/frontend/src/utils/artifacts.ts +24 -0
  134. flock/frontend/src/utils/mockData.ts +98 -0
  135. flock/frontend/src/utils/performance.ts +16 -0
  136. flock/frontend/src/vite-env.d.ts +17 -0
  137. flock/frontend/tsconfig.json +27 -0
  138. flock/frontend/tsconfig.node.json +11 -0
  139. flock/frontend/vite.config.ts +25 -0
  140. flock/frontend/vitest.config.ts +11 -0
  141. flock/{core/util → helper}/cli_helper.py +9 -5
  142. flock/{core/logging → logging}/__init__.py +2 -3
  143. flock/logging/auto_trace.py +159 -0
  144. flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
  145. flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
  146. flock/{core/logging → logging}/formatters/themed_formatter.py +69 -107
  147. flock/{core/logging → logging}/logging.py +78 -61
  148. flock/{core/logging → logging}/telemetry.py +66 -26
  149. flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
  150. flock/logging/telemetry_exporter/duckdb_exporter.py +216 -0
  151. flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +13 -10
  152. flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
  153. flock/logging/trace_and_logged.py +304 -0
  154. flock/mcp/__init__.py +91 -0
  155. flock/{core/mcp/mcp_client.py → mcp/client.py} +131 -158
  156. flock/{core/mcp/mcp_config.py → mcp/config.py} +86 -132
  157. flock/mcp/manager.py +286 -0
  158. flock/mcp/servers/sse/__init__.py +1 -1
  159. flock/mcp/servers/sse/flock_sse_server.py +16 -58
  160. flock/mcp/servers/stdio/__init__.py +1 -1
  161. flock/mcp/servers/stdio/flock_stdio_server.py +13 -53
  162. flock/mcp/servers/streamable_http/flock_streamable_http_server.py +22 -67
  163. flock/mcp/servers/websockets/flock_websocket_server.py +12 -45
  164. flock/{core/mcp/flock_mcp_tool_base.py → mcp/tool.py} +24 -78
  165. flock/mcp/types/__init__.py +42 -0
  166. flock/{core/mcp → mcp}/types/callbacks.py +12 -15
  167. flock/{core/mcp → mcp}/types/factories.py +7 -6
  168. flock/{core/mcp → mcp}/types/handlers.py +13 -18
  169. flock/{core/mcp → mcp}/types/types.py +70 -74
  170. flock/{core/mcp → mcp}/util/helpers.py +3 -3
  171. flock/orchestrator.py +970 -0
  172. flock/registry.py +148 -0
  173. flock/runtime.py +262 -0
  174. flock/service.py +277 -0
  175. flock/store.py +1214 -0
  176. flock/subscription.py +111 -0
  177. flock/themes/andromeda.toml +1 -1
  178. flock/themes/apple-system-colors.toml +1 -1
  179. flock/themes/arcoiris.toml +1 -1
  180. flock/themes/atomonelight.toml +1 -1
  181. flock/themes/ayu copy.toml +1 -1
  182. flock/themes/ayu-light.toml +1 -1
  183. flock/themes/belafonte-day.toml +1 -1
  184. flock/themes/belafonte-night.toml +1 -1
  185. flock/themes/blulocodark.toml +1 -1
  186. flock/themes/breeze.toml +1 -1
  187. flock/themes/broadcast.toml +1 -1
  188. flock/themes/brogrammer.toml +1 -1
  189. flock/themes/builtin-dark.toml +1 -1
  190. flock/themes/builtin-pastel-dark.toml +1 -1
  191. flock/themes/catppuccin-latte.toml +1 -1
  192. flock/themes/catppuccin-macchiato.toml +1 -1
  193. flock/themes/catppuccin-mocha.toml +1 -1
  194. flock/themes/cga.toml +1 -1
  195. flock/themes/chalk.toml +1 -1
  196. flock/themes/ciapre.toml +1 -1
  197. flock/themes/coffee-theme.toml +1 -1
  198. flock/themes/cyberpunkscarletprotocol.toml +1 -1
  199. flock/themes/dark+.toml +1 -1
  200. flock/themes/darkermatrix.toml +1 -1
  201. flock/themes/darkmatrix.toml +2 -2
  202. flock/themes/darkside.toml +1 -1
  203. flock/themes/deep.toml +2 -2
  204. flock/themes/desert.toml +1 -1
  205. flock/themes/django.toml +1 -1
  206. flock/themes/djangosmooth.toml +1 -1
  207. flock/themes/doomone.toml +1 -1
  208. flock/themes/dotgov.toml +1 -1
  209. flock/themes/dracula+.toml +1 -1
  210. flock/themes/duckbones.toml +1 -1
  211. flock/themes/encom.toml +1 -1
  212. flock/themes/espresso.toml +1 -1
  213. flock/themes/everblush.toml +1 -1
  214. flock/themes/fairyfloss.toml +1 -1
  215. flock/themes/fideloper.toml +1 -1
  216. flock/themes/fishtank.toml +1 -1
  217. flock/themes/flexoki-light.toml +1 -1
  218. flock/themes/floraverse.toml +1 -1
  219. flock/themes/framer.toml +1 -1
  220. flock/themes/galizur.toml +1 -1
  221. flock/themes/github.toml +1 -1
  222. flock/themes/grass.toml +1 -1
  223. flock/themes/grey-green.toml +1 -1
  224. flock/themes/gruvboxlight.toml +1 -1
  225. flock/themes/guezwhoz.toml +1 -1
  226. flock/themes/harper.toml +1 -1
  227. flock/themes/hax0r-blue.toml +1 -1
  228. flock/themes/hopscotch.256.toml +1 -1
  229. flock/themes/ic-green-ppl.toml +1 -1
  230. flock/themes/iceberg-dark.toml +1 -1
  231. flock/themes/japanesque.toml +1 -1
  232. flock/themes/jubi.toml +1 -1
  233. flock/themes/kibble.toml +1 -1
  234. flock/themes/kolorit.toml +1 -1
  235. flock/themes/kurokula.toml +1 -1
  236. flock/themes/materialdesigncolors.toml +1 -1
  237. flock/themes/matrix.toml +1 -1
  238. flock/themes/mellifluous.toml +1 -1
  239. flock/themes/midnight-in-mojave.toml +1 -1
  240. flock/themes/monokai-remastered.toml +1 -1
  241. flock/themes/monokai-soda.toml +1 -1
  242. flock/themes/neon.toml +1 -1
  243. flock/themes/neopolitan.toml +5 -5
  244. flock/themes/nord-light.toml +1 -1
  245. flock/themes/ocean.toml +1 -1
  246. flock/themes/onehalfdark.toml +1 -1
  247. flock/themes/onehalflight.toml +1 -1
  248. flock/themes/palenighthc.toml +1 -1
  249. flock/themes/paulmillr.toml +1 -1
  250. flock/themes/pencildark.toml +1 -1
  251. flock/themes/pnevma.toml +1 -1
  252. flock/themes/purple-rain.toml +1 -1
  253. flock/themes/purplepeter.toml +1 -1
  254. flock/themes/raycast-dark.toml +1 -1
  255. flock/themes/red-sands.toml +1 -1
  256. flock/themes/relaxed.toml +1 -1
  257. flock/themes/retro.toml +1 -1
  258. flock/themes/rose-pine.toml +1 -1
  259. flock/themes/royal.toml +1 -1
  260. flock/themes/ryuuko.toml +1 -1
  261. flock/themes/sakura.toml +1 -1
  262. flock/themes/scarlet-protocol.toml +1 -1
  263. flock/themes/seoulbones-dark.toml +1 -1
  264. flock/themes/shades-of-purple.toml +1 -1
  265. flock/themes/smyck.toml +1 -1
  266. flock/themes/softserver.toml +1 -1
  267. flock/themes/solarized-darcula.toml +1 -1
  268. flock/themes/square.toml +1 -1
  269. flock/themes/sugarplum.toml +1 -1
  270. flock/themes/thayer-bright.toml +1 -1
  271. flock/themes/tokyonight.toml +1 -1
  272. flock/themes/tomorrow.toml +1 -1
  273. flock/themes/ubuntu.toml +1 -1
  274. flock/themes/ultradark.toml +1 -1
  275. flock/themes/ultraviolent.toml +1 -1
  276. flock/themes/unikitty.toml +1 -1
  277. flock/themes/urple.toml +1 -1
  278. flock/themes/vesper.toml +1 -1
  279. flock/themes/vimbones.toml +1 -1
  280. flock/themes/wildcherry.toml +1 -1
  281. flock/themes/wilmersdorf.toml +1 -1
  282. flock/themes/wryan.toml +1 -1
  283. flock/themes/xcodedarkhc.toml +1 -1
  284. flock/themes/xcodelight.toml +1 -1
  285. flock/themes/zenbones-light.toml +1 -1
  286. flock/themes/zenwritten-dark.toml +1 -1
  287. flock/utilities.py +301 -0
  288. flock/utility/output_utility_component.py +226 -0
  289. flock/visibility.py +107 -0
  290. flock_core-0.5.0.dist-info/METADATA +964 -0
  291. flock_core-0.5.0.dist-info/RECORD +525 -0
  292. flock_core-0.5.0.dist-info/entry_points.txt +2 -0
  293. {flock_core-0.4.542.dist-info → flock_core-0.5.0.dist-info}/licenses/LICENSE +1 -1
  294. flock/adapter/__init__.py +0 -14
  295. flock/adapter/azure_adapter.py +0 -68
  296. flock/adapter/chroma_adapter.py +0 -73
  297. flock/adapter/faiss_adapter.py +0 -97
  298. flock/adapter/pinecone_adapter.py +0 -51
  299. flock/adapter/vector_base.py +0 -47
  300. flock/cli/assets/release_notes.md +0 -140
  301. flock/cli/config.py +0 -8
  302. flock/cli/constants.py +0 -36
  303. flock/cli/create_agent.py +0 -1
  304. flock/cli/create_flock.py +0 -280
  305. flock/cli/execute_flock.py +0 -620
  306. flock/cli/load_agent.py +0 -1
  307. flock/cli/load_examples.py +0 -1
  308. flock/cli/load_flock.py +0 -192
  309. flock/cli/load_release_notes.py +0 -20
  310. flock/cli/loaded_flock_cli.py +0 -254
  311. flock/cli/manage_agents.py +0 -459
  312. flock/cli/registry_management.py +0 -889
  313. flock/cli/runner.py +0 -41
  314. flock/cli/settings.py +0 -857
  315. flock/cli/utils.py +0 -135
  316. flock/cli/view_results.py +0 -29
  317. flock/cli/yaml_editor.py +0 -396
  318. flock/config.py +0 -56
  319. flock/core/__init__.py +0 -44
  320. flock/core/api/__init__.py +0 -10
  321. flock/core/api/custom_endpoint.py +0 -45
  322. flock/core/api/endpoints.py +0 -262
  323. flock/core/api/main.py +0 -162
  324. flock/core/api/models.py +0 -101
  325. flock/core/api/run_store.py +0 -224
  326. flock/core/api/runner.py +0 -44
  327. flock/core/api/service.py +0 -214
  328. flock/core/config/flock_agent_config.py +0 -11
  329. flock/core/config/scheduled_agent_config.py +0 -40
  330. flock/core/context/context.py +0 -214
  331. flock/core/context/context_manager.py +0 -40
  332. flock/core/context/context_vars.py +0 -11
  333. flock/core/evaluation/utils.py +0 -395
  334. flock/core/execution/batch_executor.py +0 -369
  335. flock/core/execution/evaluation_executor.py +0 -438
  336. flock/core/execution/local_executor.py +0 -31
  337. flock/core/execution/opik_executor.py +0 -103
  338. flock/core/execution/temporal_executor.py +0 -166
  339. flock/core/flock.py +0 -1003
  340. flock/core/flock_agent.py +0 -1258
  341. flock/core/flock_evaluator.py +0 -60
  342. flock/core/flock_factory.py +0 -513
  343. flock/core/flock_module.py +0 -207
  344. flock/core/flock_registry.py +0 -702
  345. flock/core/flock_router.py +0 -83
  346. flock/core/flock_scheduler.py +0 -166
  347. flock/core/flock_server_manager.py +0 -136
  348. flock/core/interpreter/python_interpreter.py +0 -689
  349. flock/core/logging/live_capture.py +0 -137
  350. flock/core/logging/trace_and_logged.py +0 -59
  351. flock/core/mcp/__init__.py +0 -1
  352. flock/core/mcp/flock_mcp_server.py +0 -640
  353. flock/core/mcp/mcp_client_manager.py +0 -201
  354. flock/core/mcp/types/__init__.py +0 -1
  355. flock/core/mixin/dspy_integration.py +0 -445
  356. flock/core/mixin/prompt_parser.py +0 -125
  357. flock/core/serialization/__init__.py +0 -13
  358. flock/core/serialization/callable_registry.py +0 -52
  359. flock/core/serialization/flock_serializer.py +0 -854
  360. flock/core/serialization/json_encoder.py +0 -41
  361. flock/core/serialization/secure_serializer.py +0 -175
  362. flock/core/serialization/serializable.py +0 -342
  363. flock/core/serialization/serialization_utils.py +0 -409
  364. flock/core/util/file_path_utils.py +0 -223
  365. flock/core/util/hydrator.py +0 -309
  366. flock/core/util/input_resolver.py +0 -141
  367. flock/core/util/loader.py +0 -59
  368. flock/core/util/splitter.py +0 -219
  369. flock/di.py +0 -41
  370. flock/evaluators/__init__.py +0 -1
  371. flock/evaluators/declarative/__init__.py +0 -1
  372. flock/evaluators/declarative/declarative_evaluator.py +0 -217
  373. flock/evaluators/memory/memory_evaluator.py +0 -90
  374. flock/evaluators/test/test_case_evaluator.py +0 -38
  375. flock/evaluators/zep/zep_evaluator.py +0 -59
  376. flock/modules/__init__.py +0 -1
  377. flock/modules/assertion/__init__.py +0 -1
  378. flock/modules/assertion/assertion_module.py +0 -286
  379. flock/modules/callback/__init__.py +0 -1
  380. flock/modules/callback/callback_module.py +0 -91
  381. flock/modules/enterprise_memory/README.md +0 -99
  382. flock/modules/enterprise_memory/enterprise_memory_module.py +0 -526
  383. flock/modules/mem0/__init__.py +0 -1
  384. flock/modules/mem0/mem0_module.py +0 -126
  385. flock/modules/mem0_async/__init__.py +0 -1
  386. flock/modules/mem0_async/async_mem0_module.py +0 -126
  387. flock/modules/memory/__init__.py +0 -1
  388. flock/modules/memory/memory_module.py +0 -429
  389. flock/modules/memory/memory_parser.py +0 -125
  390. flock/modules/memory/memory_storage.py +0 -736
  391. flock/modules/output/__init__.py +0 -1
  392. flock/modules/output/output_module.py +0 -196
  393. flock/modules/performance/__init__.py +0 -1
  394. flock/modules/performance/metrics_module.py +0 -678
  395. flock/modules/zep/__init__.py +0 -1
  396. flock/modules/zep/zep_module.py +0 -192
  397. flock/platform/docker_tools.py +0 -49
  398. flock/platform/jaeger_install.py +0 -86
  399. flock/routers/__init__.py +0 -1
  400. flock/routers/agent/__init__.py +0 -1
  401. flock/routers/agent/agent_router.py +0 -236
  402. flock/routers/agent/handoff_agent.py +0 -58
  403. flock/routers/conditional/conditional_router.py +0 -486
  404. flock/routers/default/__init__.py +0 -1
  405. flock/routers/default/default_router.py +0 -80
  406. flock/routers/feedback/feedback_router.py +0 -114
  407. flock/routers/list_generator/list_generator_router.py +0 -166
  408. flock/routers/llm/__init__.py +0 -1
  409. flock/routers/llm/llm_router.py +0 -365
  410. flock/tools/__init__.py +0 -0
  411. flock/tools/azure_tools.py +0 -781
  412. flock/tools/code_tools.py +0 -167
  413. flock/tools/file_tools.py +0 -149
  414. flock/tools/github_tools.py +0 -157
  415. flock/tools/markdown_tools.py +0 -205
  416. flock/tools/system_tools.py +0 -9
  417. flock/tools/text_tools.py +0 -810
  418. flock/tools/web_tools.py +0 -92
  419. flock/tools/zendesk_tools.py +0 -501
  420. flock/webapp/__init__.py +0 -1
  421. flock/webapp/app/__init__.py +0 -0
  422. flock/webapp/app/api/__init__.py +0 -0
  423. flock/webapp/app/api/agent_management.py +0 -237
  424. flock/webapp/app/api/execution.py +0 -503
  425. flock/webapp/app/api/flock_management.py +0 -125
  426. flock/webapp/app/api/registry_viewer.py +0 -29
  427. flock/webapp/app/chat.py +0 -662
  428. flock/webapp/app/config.py +0 -104
  429. flock/webapp/app/dependencies.py +0 -117
  430. flock/webapp/app/main.py +0 -1086
  431. flock/webapp/app/middleware.py +0 -113
  432. flock/webapp/app/models_ui.py +0 -7
  433. flock/webapp/app/services/__init__.py +0 -0
  434. flock/webapp/app/services/feedback_file_service.py +0 -363
  435. flock/webapp/app/services/flock_service.py +0 -345
  436. flock/webapp/app/services/sharing_models.py +0 -81
  437. flock/webapp/app/services/sharing_store.py +0 -597
  438. flock/webapp/app/templates/theme_mapper.html +0 -326
  439. flock/webapp/app/theme_mapper.py +0 -811
  440. flock/webapp/app/utils.py +0 -85
  441. flock/webapp/run.py +0 -219
  442. flock/webapp/static/css/chat.css +0 -301
  443. flock/webapp/static/css/components.css +0 -167
  444. flock/webapp/static/css/header.css +0 -39
  445. flock/webapp/static/css/layout.css +0 -281
  446. flock/webapp/static/css/sidebar.css +0 -127
  447. flock/webapp/static/css/two-pane.css +0 -48
  448. flock/webapp/templates/base.html +0 -389
  449. flock/webapp/templates/chat.html +0 -152
  450. flock/webapp/templates/chat_settings.html +0 -19
  451. flock/webapp/templates/flock_editor.html +0 -16
  452. flock/webapp/templates/index.html +0 -12
  453. flock/webapp/templates/partials/_agent_detail_form.html +0 -93
  454. flock/webapp/templates/partials/_agent_list.html +0 -18
  455. flock/webapp/templates/partials/_agent_manager_view.html +0 -51
  456. flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
  457. flock/webapp/templates/partials/_chat_container.html +0 -15
  458. flock/webapp/templates/partials/_chat_messages.html +0 -57
  459. flock/webapp/templates/partials/_chat_settings_form.html +0 -85
  460. flock/webapp/templates/partials/_create_flock_form.html +0 -50
  461. flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
  462. flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
  463. flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
  464. flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
  465. flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
  466. flock/webapp/templates/partials/_env_vars_table.html +0 -23
  467. flock/webapp/templates/partials/_execution_form.html +0 -127
  468. flock/webapp/templates/partials/_execution_view_container.html +0 -28
  469. flock/webapp/templates/partials/_flock_file_list.html +0 -23
  470. flock/webapp/templates/partials/_flock_properties_form.html +0 -52
  471. flock/webapp/templates/partials/_flock_upload_form.html +0 -16
  472. flock/webapp/templates/partials/_header_flock_status.html +0 -5
  473. flock/webapp/templates/partials/_live_logs.html +0 -13
  474. flock/webapp/templates/partials/_load_manager_view.html +0 -49
  475. flock/webapp/templates/partials/_registry_table.html +0 -25
  476. flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
  477. flock/webapp/templates/partials/_results_display.html +0 -78
  478. flock/webapp/templates/partials/_settings_env_content.html +0 -9
  479. flock/webapp/templates/partials/_settings_theme_content.html +0 -14
  480. flock/webapp/templates/partials/_settings_view.html +0 -36
  481. flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
  482. flock/webapp/templates/partials/_share_link_snippet.html +0 -35
  483. flock/webapp/templates/partials/_sidebar.html +0 -74
  484. flock/webapp/templates/partials/_structured_data_view.html +0 -40
  485. flock/webapp/templates/partials/_theme_preview.html +0 -36
  486. flock/webapp/templates/registry_viewer.html +0 -84
  487. flock/webapp/templates/shared_run_page.html +0 -140
  488. flock/workflow/__init__.py +0 -0
  489. flock/workflow/activities.py +0 -237
  490. flock/workflow/agent_activities.py +0 -24
  491. flock/workflow/agent_execution_activity.py +0 -240
  492. flock/workflow/flock_workflow.py +0 -225
  493. flock/workflow/temporal_config.py +0 -96
  494. flock/workflow/temporal_setup.py +0 -60
  495. flock_core-0.4.542.dist-info/METADATA +0 -676
  496. flock_core-0.4.542.dist-info/RECORD +0 -572
  497. flock_core-0.4.542.dist-info/entry_points.txt +0 -2
  498. /flock/{core/logging → logging}/formatters/themes.py +0 -0
  499. /flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
  500. /flock/{core/mcp → mcp}/util/__init__.py +0 -0
  501. {flock_core-0.4.542.dist-info → flock_core-0.5.0.dist-info}/WHEEL +0 -0
@@ -1,214 +0,0 @@
1
- import uuid
2
- from dataclasses import asdict
3
- from datetime import datetime
4
- from typing import Any, Literal
5
-
6
- from opentelemetry import trace
7
- from pydantic import BaseModel, Field
8
-
9
- from flock.core.context.context_vars import FLOCK_LAST_AGENT, FLOCK_LAST_RESULT
10
- from flock.core.logging.logging import get_logger
11
- from flock.core.serialization.serializable import Serializable
12
-
13
- logger = get_logger("context")
14
- tracer = trace.get_tracer(__name__)
15
-
16
-
17
- class AgentRunRecord(BaseModel):
18
- id: str = Field(default="")
19
- agent: str = Field(default="")
20
- data: dict[str, Any] = Field(default_factory=dict)
21
- timestamp: str = Field(default="")
22
- hand_off: dict | None = Field(default_factory=dict)
23
- called_from: str | None = Field(default=None)
24
-
25
-
26
- class AgentDefinition(BaseModel):
27
- agent_type: str = Field(default="")
28
- agent_name: str = Field(default="")
29
- agent_data: dict = Field(default_factory=dict)
30
- serializer: Literal["json", "cloudpickle", "msgpack"] = Field(
31
- default="cloudpickle"
32
- )
33
-
34
-
35
- class FlockContext(Serializable, BaseModel):
36
- state: dict[str, Any] = Field(default_factory=dict)
37
- history: list[AgentRunRecord] = Field(default_factory=list)
38
- agent_definitions: dict[str, AgentDefinition] = Field(default_factory=dict)
39
- run_id: str = Field(default="")
40
- workflow_id: str = Field(default="")
41
- workflow_timestamp: str = Field(default="")
42
-
43
- def record(
44
- self,
45
- agent_name: str,
46
- data: dict[str, Any],
47
- timestamp: str,
48
- hand_off: str,
49
- called_from: str,
50
- ) -> None:
51
- record = AgentRunRecord(
52
- id=agent_name + "_" + uuid.uuid4().hex[:4],
53
- agent=agent_name,
54
- data=data.copy(),
55
- timestamp=timestamp,
56
- hand_off=hand_off,
57
- called_from=called_from,
58
- )
59
- self.history.append(record)
60
- for key, value in data.items():
61
- self.set_variable(f"{agent_name}.{key}", value)
62
- self.set_variable(FLOCK_LAST_RESULT, data)
63
- self.set_variable(FLOCK_LAST_AGENT, agent_name)
64
- logger.info(
65
- f"Agent run recorded - run_id '{record.id}'",
66
- agent=agent_name,
67
- timestamp=timestamp,
68
- data=data,
69
- )
70
- current_span = trace.get_current_span()
71
- if current_span.get_span_context().is_valid:
72
- current_span.add_event(
73
- "record",
74
- attributes={"agent": agent_name, "timestamp": timestamp},
75
- )
76
-
77
- def get_variable(self, key: str, default: Any = None) -> Any:
78
- return self.state.get(key, default)
79
-
80
- def set_variable(self, key: str, value: Any) -> None:
81
- old_value = self.state.get(key)
82
- self.state[key] = value
83
- if old_value != value:
84
- escaped_value = str(value).replace("{", "{{").replace("}", "}}")
85
-
86
- logger.info(
87
- "Context variable updated - {} -> {}",
88
- key,
89
- escaped_value, # Arguments in order
90
- )
91
-
92
- current_span = trace.get_current_span()
93
- if current_span.get_span_context().is_valid:
94
- current_span.add_event(
95
- "set_variable",
96
- attributes={
97
- "key": key,
98
- "old": str(old_value),
99
- "new": str(value),
100
- },
101
- )
102
-
103
- def deepcopy(self) -> "FlockContext":
104
- return FlockContext.from_dict(self.to_dict())
105
-
106
- def get_agent_history(self, agent_name: str) -> list[AgentRunRecord]:
107
- return [record for record in self.history if record.agent == agent_name]
108
-
109
- def next_input_for(self, agent) -> Any:
110
- try:
111
- if hasattr(agent, "input") and isinstance(agent.input, str):
112
- keys = [k.strip() for k in agent.input.split(",") if k.strip()]
113
- if len(keys) == 1:
114
- return self.get_variable(keys[0])
115
- else:
116
- return {key: self.get_variable(key) for key in keys}
117
- else:
118
- return self.get_variable("init_input")
119
- except Exception as e:
120
- logger.error(
121
- "Error getting next input for agent",
122
- agent=agent.name,
123
- error=str(e),
124
- )
125
- raise
126
-
127
- def get_most_recent_value(self, variable_name: str) -> Any:
128
- for history_record in reversed(self.history):
129
- if variable_name in history_record.data:
130
- return history_record.data[variable_name]
131
-
132
- def get_agent_definition(self, agent_name: str) -> AgentDefinition | None:
133
- return self.agent_definitions.get(agent_name)
134
-
135
- def get_last_agent_name(self) -> str | None:
136
- """Returns the name of the agent from the most recent history record."""
137
- if not self.history:
138
- return None
139
- last_record = self.history[-1]
140
- # The 'called_from' field in the *next* record is the previous agent.
141
- # However, to get the name of the *last executed agent*, we look at the 'agent' field.
142
- return last_record.agent
143
-
144
- def add_agent_definition(
145
- self, agent_type: type, agent_name: str, agent_data: Any
146
- ) -> None:
147
- definition = AgentDefinition(
148
- agent_type=agent_type.__name__,
149
- agent_name=agent_name,
150
- agent_data=agent_data,
151
- )
152
- self.agent_definitions[agent_name] = definition
153
-
154
- # Use the reactive setter for dict-like access.
155
- def __getitem__(self, key: str) -> Any:
156
- return self.get_variable(key)
157
-
158
- def __setitem__(self, key: str, value: Any) -> None:
159
- self.set_variable(key, value)
160
-
161
- def to_dict(self) -> dict[str, Any]:
162
- def convert(obj):
163
- if isinstance(obj, datetime):
164
- return obj.isoformat()
165
- if hasattr(obj, "__dataclass_fields__"):
166
- return asdict(
167
- obj, dict_factory=lambda x: {k: convert(v) for k, v in x}
168
- )
169
- return obj
170
-
171
- return convert(asdict(self))
172
-
173
- @classmethod
174
- def from_dict(cls, data: dict[str, Any]) -> "FlockContext":
175
- def convert(obj):
176
- if isinstance(obj, dict):
177
- if "timestamp" in obj:
178
- return AgentRunRecord(
179
- **{
180
- **obj,
181
- "timestamp": datetime.fromisoformat(
182
- obj["timestamp"]
183
- ),
184
- }
185
- )
186
- if "agent_type" in obj:
187
- return AgentDefinition(**obj)
188
- return {k: convert(v) for k, v in obj.items()}
189
- if isinstance(obj, list):
190
- return [convert(v) for v in obj]
191
- return obj
192
-
193
- converted = convert(data)
194
- return cls(**converted)
195
-
196
- def resolve(self, svc_type):
197
- """Resolve a service from the request-scoped DI container if present.
198
-
199
- The bootstrap code is expected to store the active `ServiceProvider` from
200
- `wd.di` in the context variable key ``di.container``. This helper
201
- provides a convenient façade so that Flock components can simply call
202
- ``context.resolve(SomeType)`` regardless of whether a container is
203
- available. When the container is missing or the service cannot be
204
- resolved, ``None`` is returned instead of raising to keep backward
205
- compatibility.
206
- """
207
- container = self.get_variable("di.container")
208
- if container is None:
209
- return None
210
- try:
211
- return container.get_service(svc_type)
212
- except Exception:
213
- # Service not registered or other resolution error – fall back to None
214
- return None
@@ -1,40 +0,0 @@
1
- """Module for managing the FlockContext."""
2
-
3
- from flock.core.context.context import FlockContext
4
- from flock.core.context.context_vars import (
5
- FLOCK_CURRENT_AGENT,
6
- FLOCK_INITIAL_INPUT,
7
- FLOCK_LOCAL_DEBUG,
8
- FLOCK_MODEL,
9
- FLOCK_RUN_ID,
10
- FLOCK_USE_PRODUCTION_TOOLS,
11
- )
12
-
13
-
14
- def initialize_context(
15
- context: FlockContext,
16
- agent_name: str,
17
- input_data: dict,
18
- run_id: str,
19
- local_debug: bool,
20
- model: str,
21
- use_production_tools: bool,
22
- ) -> None:
23
- """Initialize the FlockContext with standard variables before running an agent.
24
-
25
- Args:
26
- context: The FlockContext instance.
27
- agent_name: The name of the current agent.
28
- input_data: A dictionary of inputs for the agent.
29
- run_id: A unique identifier for the run.
30
- local_debug: Flag indicating whether local debugging is enabled.
31
- """
32
- context.set_variable(FLOCK_CURRENT_AGENT, agent_name)
33
- for key, value in input_data.items():
34
- context.set_variable("flock." + key, value)
35
- context.set_variable(FLOCK_INITIAL_INPUT, input_data)
36
- context.set_variable(FLOCK_LOCAL_DEBUG, local_debug)
37
- context.run_id = run_id
38
- context.set_variable(FLOCK_RUN_ID, run_id)
39
- context.set_variable(FLOCK_MODEL, model)
40
- context.set_variable(FLOCK_USE_PRODUCTION_TOOLS, use_production_tools)
@@ -1,11 +0,0 @@
1
- """Context variables for Flock."""
2
-
3
- FLOCK_CURRENT_AGENT = "flock.current_agent"
4
- FLOCK_INITIAL_INPUT = "flock.initial_input"
5
- FLOCK_LOCAL_DEBUG = "flock.local_debug"
6
- FLOCK_RUN_ID = "flock.run_id"
7
- FLOCK_LAST_AGENT = "flock.last_agent"
8
- FLOCK_LAST_RESULT = "flock.last_result"
9
- FLOCK_MODEL = "flock.model"
10
- FLOCK_BATCH_SILENT_MODE = "flock.batch_silent"
11
- FLOCK_USE_PRODUCTION_TOOLS = "flock.use_production_tools"
@@ -1,395 +0,0 @@
1
- # src/flock/core/util/evaluation_helpers.py
2
- import inspect
3
- import sys
4
- from collections.abc import Callable
5
- from pathlib import Path
6
- from typing import Any, Union
7
-
8
- import pandas as pd
9
- from box import Box
10
- from datasets import (
11
- Dataset as HFDataset,
12
- get_dataset_config_names,
13
- load_dataset,
14
- )
15
- from opik import Opik
16
- from opik.evaluation import evaluate
17
-
18
- from flock.core.flock import Flock
19
- from flock.core.flock_agent import FlockAgent
20
- from flock.core.flock_evaluator import FlockEvaluator
21
- from flock.core.logging.logging import get_logger
22
-
23
- # Potentially import metrics libraries like rouge_score, nltk, sentence_transformers
24
-
25
- logger_helpers = get_logger("util.evaluation")
26
-
27
-
28
- def evaluate_with_opik(
29
- dataset: str | Path | list[dict[str, Any]] | pd.DataFrame | HFDataset,
30
- dataset_name: str,
31
- experiment_name: str,
32
- start_agent: FlockAgent | str,
33
- input_mapping: dict[str, str],
34
- answer_mapping: dict[str, str],
35
- metrics: list[
36
- str
37
- | Callable[[Any, Any], bool | float | dict[str, Any]]
38
- | FlockAgent
39
- | FlockEvaluator
40
- ],
41
- ):
42
- df = normalize_dataset(dataset)
43
- client = Opik()
44
- dataset = client.get_or_create_dataset(name=dataset_name)
45
-
46
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
47
-
48
- # Create a single Flock instance outside the task function
49
- shared_flock = Flock(
50
- name="opik_eval", model="azure/gpt-4.1", show_flock_banner=False
51
- )
52
- shared_flock.add_agent(start_agent)
53
-
54
- def evaluation_task(dataset_item):
55
- agent_input = {
56
- value: dataset_item[key] for key, value in input_mapping.items()
57
- }
58
-
59
- # Use the shared Flock instance instead of creating a new one
60
- result_flock = shared_flock.run(
61
- start_agent=start_agent, input=agent_input, box_result=False
62
- )
63
-
64
- # agent_output = result_flock.get(answer_mapping[key], "No answer found")
65
-
66
- key = next(iter(answer_mapping.keys()))
67
- reference = dataset_item[key]
68
- answer = result_flock.get(answer_mapping[key], "No answer found")
69
-
70
- result = {
71
- "input": agent_input,
72
- "output": answer,
73
- "reference": reference,
74
- }
75
-
76
- return result
77
-
78
- eval_results = evaluate(
79
- experiment_name=experiment_name,
80
- dataset=dataset,
81
- task=evaluation_task,
82
- scoring_metrics=metrics,
83
- )
84
-
85
-
86
- def load_and_merge_all_configs(dataset_name: str) -> pd.DataFrame:
87
- all_configs = get_dataset_config_names(dataset_name)
88
- all_dfs = []
89
-
90
- for config in all_configs:
91
- dataset_dict = load_dataset(dataset_name, config)
92
- for split_name, split_dataset in dataset_dict.items():
93
- df = split_dataset.to_pandas()
94
- df["config"] = config
95
- df["split"] = split_name
96
- all_dfs.append(df)
97
-
98
- merged_df = pd.concat(all_dfs, ignore_index=True)
99
- logger_helpers.info(f"merged_df.head(): {merged_df.head()}")
100
- return merged_df
101
-
102
-
103
- def import_hf_dataset_to_opik(dataset_name: str) -> pd.DataFrame:
104
- df = load_and_merge_all_configs(dataset_name)
105
- logger_helpers.info(
106
- f"type(df): {type(df)}"
107
- ) # ➜ <class 'pandas.core.frame.DataFrame'>
108
- logger_helpers.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
109
- logger_helpers.info(
110
- f"df['split'].value_counts(): {df['split'].value_counts()}"
111
- )
112
- logger_helpers.info(f"df['config'].unique(): {df['config'].unique()}")
113
- client = Opik()
114
- dataset = client.get_or_create_dataset(name=dataset_name)
115
-
116
- dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
117
- return df
118
-
119
-
120
- def normalize_dataset(dataset: Any) -> pd.DataFrame:
121
- """Converts various dataset formats into a pandas DataFrame."""
122
- if isinstance(dataset, pd.DataFrame):
123
- return dataset.copy()
124
- elif isinstance(dataset, str | Path):
125
- path = Path(dataset)
126
- if not path.exists():
127
- try:
128
- return load_and_merge_all_configs(dataset)
129
- except Exception as e:
130
- raise FileNotFoundError(
131
- f"Dataset file not found: {path}"
132
- ) from e
133
- if path.suffix.lower() == ".csv":
134
- return pd.read_csv(path)
135
- # Add support for json, jsonl etc. if needed
136
- else:
137
- raise ValueError(
138
- f"Unsupported file type for dataset: {path.suffix}"
139
- )
140
- elif isinstance(dataset, list):
141
- if not dataset or not isinstance(dataset[0], dict):
142
- raise ValueError("Dataset list must contain dictionaries.")
143
- return pd.DataFrame(dataset)
144
- elif "datasets" in sys.modules and isinstance(
145
- dataset, sys.modules["datasets"].Dataset
146
- ):
147
- # Requires 'datasets' library to be installed
148
- return dataset.to_pandas()
149
- else:
150
- raise TypeError(f"Unsupported dataset type: {type(dataset)}")
151
-
152
-
153
- def extract_value_by_dot_notation(data: dict | Box, key: str) -> Any:
154
- """Retrieves a value from a nested dictionary or Box object using dot notation."""
155
- if not key:
156
- return None
157
- keys = key.split(".")
158
- value = data
159
- try:
160
- for k in keys:
161
- if isinstance(value, (dict, Box)):
162
- value = value.get(k)
163
- # Add list index handling if needed: e.g., 'results[0].field'
164
- # elif isinstance(value, list) and k.isdigit():
165
- # value = value[int(k)]
166
- else:
167
- return None # Cannot traverse further
168
- if value is None:
169
- return None # Key not found at this level
170
- return value
171
- except (KeyError, IndexError, AttributeError):
172
- return None
173
-
174
-
175
- def calculate_evaluation_metrics(
176
- metrics: list[Union[str, Callable, "FlockAgent", "FlockEvaluator"]],
177
- metric_configs: dict[str, dict[str, Any]],
178
- predicted_answers: dict[str, Any],
179
- expected_answers: dict[str, Any],
180
- agent_inputs: dict[str, Any], # For context
181
- agent_output: Any, # For context
182
- ) -> dict[str, Any]:
183
- """Calculates all specified metrics for a single evaluation item."""
184
- results = {}
185
- for metric in metrics:
186
- metric_name = ""
187
- metric_result = None
188
- try:
189
- if isinstance(metric, str):
190
- metric_name = metric
191
- # Find predicted/expected values relevant to this metric string
192
- # Simple case: metric name matches an answer_mapping key
193
- if (
194
- metric_name in predicted_answers
195
- and metric_name in expected_answers
196
- ):
197
- predicted = predicted_answers[metric_name]
198
- expected = expected_answers[metric_name]
199
- metric_func = _get_metric_function(metric_name)
200
- config = metric_configs.get(metric_name, {})
201
- metric_result = metric_func(predicted, expected, **config)
202
- else:
203
- logger_helpers.warning(
204
- f"Could not find matching predicted/expected values for metric '{metric_name}' based on answer_mapping keys."
205
- )
206
- metric_result = None # Or some error indicator
207
-
208
- elif isinstance(metric, Callable):
209
- metric_name = getattr(metric, "__name__", "custom_function")
210
- # Custom functions might need specific predicted/expected pairs, or all of them
211
- # Let's pass all for flexibility, user function needs to handle it
212
- config = metric_configs.get(metric_name, {})
213
- # Allow passing context if function signature supports it
214
- sig = inspect.signature(metric)
215
- call_kwargs = config.copy()
216
- if "agent_inputs" in sig.parameters:
217
- call_kwargs["agent_inputs"] = agent_inputs
218
- if "agent_output" in sig.parameters:
219
- call_kwargs["agent_output"] = agent_output
220
-
221
- metric_result = metric(
222
- predicted_answers, expected_answers, **call_kwargs
223
- )
224
-
225
- # --- Placeholder for Agent/Evaluator based metrics ---
226
- elif "FlockAgent" in str(
227
- type(metric)
228
- ): # Avoid hard import if possible
229
- metric_name = getattr(metric, "name", "judge_agent")
230
- config = metric_configs.get(metric_name, {})
231
- # Requires running the judge agent - needs async context
232
- # metric_result = asyncio.run(_run_judge_agent(metric, predicted_answers, expected_answers, config))
233
- logger_helpers.warning(
234
- f"Agent-based metric '{metric_name}' execution not implemented in this sketch."
235
- )
236
- metric_result = "[Agent Judge Not Implemented]"
237
-
238
- elif "FlockEvaluator" in str(
239
- type(metric)
240
- ): # Avoid hard import if possible
241
- metric_name = getattr(metric, "name", "judge_evaluator")
242
- config = metric_configs.get(metric_name, {})
243
- # Requires running the evaluator - needs async context
244
- # metric_result = asyncio.run(_run_judge_evaluator(metric, predicted_answers, expected_answers, config))
245
- logger_helpers.warning(
246
- f"Evaluator-based metric '{metric_name}' execution not implemented in this sketch."
247
- )
248
- metric_result = "[Evaluator Judge Not Implemented]"
249
- # --- End Placeholder ---
250
-
251
- else:
252
- logger_helpers.warning(
253
- f"Unsupported metric type: {type(metric)}"
254
- )
255
- continue
256
-
257
- # Store result - handle dict results from metrics
258
- if isinstance(metric_result, dict):
259
- for sub_key, sub_value in metric_result.items():
260
- results[f"{metric_name}_{sub_key}"] = sub_value
261
- else:
262
- results[metric_name] = metric_result
263
-
264
- except Exception as e:
265
- logger_helpers.error(
266
- f"Error calculating metric '{metric_name}': {e}"
267
- )
268
- results[metric_name] = f"[Error: {e}]"
269
-
270
- return results
271
-
272
-
273
- def _get_metric_function(metric_name: str) -> Callable:
274
- """Maps metric names to their implementation functions."""
275
- # Lazy load metric libraries
276
- if metric_name == "exact_match":
277
- return lambda pred, act, **kw: str(pred).strip() == str(act).strip()
278
- elif metric_name == "fuzzy_match":
279
- try:
280
- from thefuzz import fuzz
281
-
282
- return (
283
- lambda pred, act, threshold=85, **kw: fuzz.ratio(
284
- str(pred), str(act)
285
- )
286
- >= threshold
287
- )
288
- except ImportError:
289
- logger_helpers.warning(
290
- "fuzzy_match requires 'thefuzz': pip install thefuzz[speedup]"
291
- )
292
- return lambda p, a, **kw: None
293
- elif metric_name.startswith("rouge"): # rouge_1, rouge_2, rouge_l
294
- try:
295
- from rouge_score import rouge_scorer
296
-
297
- scorer = rouge_scorer.RougeScorer(
298
- [metric_name.replace("_", "")], use_stemmer=True
299
- )
300
-
301
- def calculate_rouge(pred, act, score_type="fmeasure", **kw):
302
- scores = scorer.score(str(act), str(pred))
303
- return (
304
- scores[metric_name.replace("_", "")]
305
- ._asdict()
306
- .get(score_type, 0.0)
307
- )
308
-
309
- return calculate_rouge
310
- except ImportError:
311
- logger_helpers.warning(
312
- "rouge requires 'rouge-score': pip install rouge-score"
313
- )
314
- return lambda p, a, **kw: None
315
- elif metric_name == "semantic_similarity":
316
- try:
317
- from sentence_transformers import SentenceTransformer, util
318
-
319
- # Cache the model? Maybe pass it in via config?
320
- model = SentenceTransformer("all-MiniLM-L6-v2")
321
-
322
- def calculate_similarity(pred, act, **kw):
323
- emb1 = model.encode(str(pred), convert_to_tensor=True)
324
- emb2 = model.encode(str(act), convert_to_tensor=True)
325
- return util.pytorch_cos_sim(emb1, emb2).item()
326
-
327
- return calculate_similarity
328
- except ImportError:
329
- logger_helpers.warning(
330
- "semantic_similarity requires 'sentence-transformers': pip install sentence-transformers"
331
- )
332
- return lambda p, a, **kw: None
333
- # Add bleu, f1 etc.
334
- elif metric_name == "llm_judge":
335
- # This is handled by checking type in calculate_evaluation_metrics
336
- # but we need a placeholder callable here if we map by string first
337
- return lambda p, a, **kw: "[LLM Judge Not Implemented Directly]"
338
- else:
339
- raise ValueError(f"Unknown built-in metric: {metric_name}")
340
-
341
-
342
- def aggregate_results(results_list: list[dict[str, Any]]) -> dict[str, Any]:
343
- """Aggregates evaluation results across all items."""
344
- summary = {"total_items": len(results_list), "errors": 0}
345
- metric_values: dict[str, list[float | bool]] = {}
346
-
347
- for item in results_list:
348
- if item.get("error"):
349
- summary["errors"] += 1
350
- metrics = item.get("metrics", {})
351
- for name, value in metrics.items():
352
- if isinstance(
353
- value, (float, int, bool)
354
- ): # Only aggregate numerics/bools
355
- if name not in metric_values:
356
- metric_values[name] = []
357
- metric_values[name].append(value)
358
-
359
- summary["metrics_summary"] = {}
360
- for name, values in metric_values.items():
361
- if not values:
362
- continue
363
- # Calculate different stats based on value type
364
- if all(isinstance(v, bool) for v in values):
365
- summary["metrics_summary"][name] = {
366
- "accuracy": sum(values) / len(values)
367
- }
368
- elif all(isinstance(v, (int, float)) for v in values):
369
- numeric_values = [v for v in values if isinstance(v, (int, float))]
370
- if numeric_values:
371
- summary["metrics_summary"][name] = {
372
- "mean": sum(numeric_values) / len(numeric_values),
373
- "count": len(numeric_values),
374
- # Add min, max, stddev if needed
375
- }
376
-
377
- return summary
378
-
379
-
380
- # --- Placeholder for async judge execution ---
381
- # Need to run these within the main async context or manage loops carefully
382
- async def _run_judge_agent(judge_agent, predicted, expected, config):
383
- # Prepare input for the judge agent based on its signature
384
- # E.g., judge_input = {"prediction": predicted_value, "reference": expected_value, "criteria": ...}
385
- # judge_result = await judge_agent.run_async(judge_input)
386
- # return judge_result # Or extract specific score/judgement
387
- return "[Agent Judge Not Implemented]"
388
-
389
-
390
- async def _run_judge_evaluator(judge_evaluator, predicted, expected, config):
391
- # Prepare input for the judge evaluator based on its signature
392
- # judge_input = {"prediction": predicted_value, "reference": expected_value, **config}
393
- # judge_result = await judge_evaluator.evaluate(None, judge_input, []) # Agent might not be needed
394
- # return judge_result # Or extract specific score/judgement
395
- return "[Evaluator Judge Not Implemented]"