PyPI - flock-core - Versions diffs - 0.5.0b28__py3-none-any.whl → 0.5.56b0__py3-none-any.whl - Mend

flock-core 0.5.0b28py3-none-any.whl → 0.5.56b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of flock-core might be problematic. Click here for more details.

Files changed (359) hide show

flock/__init__.py +12 -217
flock/agent.py +678 -0
flock/api/themes.py +71 -0
flock/artifacts.py +79 -0
flock/cli.py +75 -0
flock/components.py +173 -0
flock/dashboard/__init__.py +28 -0
flock/dashboard/collector.py +283 -0
flock/dashboard/events.py +182 -0
flock/dashboard/launcher.py +230 -0
flock/dashboard/service.py +537 -0
flock/dashboard/websocket.py +235 -0
flock/engines/__init__.py +6 -0
flock/engines/dspy_engine.py +856 -0
flock/examples.py +128 -0
flock/{core/util → helper}/cli_helper.py +4 -3
flock/{core/logging → logging}/__init__.py +2 -3
flock/{core/logging → logging}/formatters/enum_builder.py +3 -4
flock/{core/logging → logging}/formatters/theme_builder.py +19 -44
flock/{core/logging → logging}/formatters/themed_formatter.py +69 -115
flock/{core/logging → logging}/logging.py +77 -61
flock/{core/logging → logging}/telemetry.py +20 -26
flock/{core/logging → logging}/telemetry_exporter/base_exporter.py +2 -2
flock/{core/logging → logging}/telemetry_exporter/file_exporter.py +6 -9
flock/{core/logging → logging}/telemetry_exporter/sqlite_exporter.py +2 -3
flock/{core/logging → logging}/trace_and_logged.py +20 -24
flock/mcp/__init__.py +91 -0
flock/{core/mcp/mcp_client.py → mcp/client.py} +103 -154
flock/{core/mcp/mcp_config.py → mcp/config.py} +62 -117
flock/mcp/manager.py +255 -0
flock/mcp/servers/sse/__init__.py +1 -1
flock/mcp/servers/sse/flock_sse_server.py +11 -53
flock/mcp/servers/stdio/__init__.py +1 -1
flock/mcp/servers/stdio/flock_stdio_server.py +8 -48
flock/mcp/servers/streamable_http/flock_streamable_http_server.py +17 -62
flock/mcp/servers/websockets/flock_websocket_server.py +7 -40
flock/{core/mcp/flock_mcp_tool.py → mcp/tool.py} +16 -26
flock/mcp/types/__init__.py +42 -0
flock/{core/mcp → mcp}/types/callbacks.py +9 -15
flock/{core/mcp → mcp}/types/factories.py +7 -6
flock/{core/mcp → mcp}/types/handlers.py +13 -18
flock/{core/mcp → mcp}/types/types.py +70 -74
flock/{core/mcp → mcp}/util/helpers.py +1 -1
flock/orchestrator.py +645 -0
flock/registry.py +148 -0
flock/runtime.py +262 -0
flock/service.py +140 -0
flock/store.py +69 -0
flock/subscription.py +111 -0
flock/themes/andromeda.toml +1 -1
flock/themes/apple-system-colors.toml +1 -1
flock/themes/arcoiris.toml +1 -1
flock/themes/atomonelight.toml +1 -1
flock/themes/ayu copy.toml +1 -1
flock/themes/ayu-light.toml +1 -1
flock/themes/belafonte-day.toml +1 -1
flock/themes/belafonte-night.toml +1 -1
flock/themes/blulocodark.toml +1 -1
flock/themes/breeze.toml +1 -1
flock/themes/broadcast.toml +1 -1
flock/themes/brogrammer.toml +1 -1
flock/themes/builtin-dark.toml +1 -1
flock/themes/builtin-pastel-dark.toml +1 -1
flock/themes/catppuccin-latte.toml +1 -1
flock/themes/catppuccin-macchiato.toml +1 -1
flock/themes/catppuccin-mocha.toml +1 -1
flock/themes/cga.toml +1 -1
flock/themes/chalk.toml +1 -1
flock/themes/ciapre.toml +1 -1
flock/themes/coffee-theme.toml +1 -1
flock/themes/cyberpunkscarletprotocol.toml +1 -1
flock/themes/dark+.toml +1 -1
flock/themes/darkermatrix.toml +1 -1
flock/themes/darkside.toml +1 -1
flock/themes/desert.toml +1 -1
flock/themes/django.toml +1 -1
flock/themes/djangosmooth.toml +1 -1
flock/themes/doomone.toml +1 -1
flock/themes/dotgov.toml +1 -1
flock/themes/dracula+.toml +1 -1
flock/themes/duckbones.toml +1 -1
flock/themes/encom.toml +1 -1
flock/themes/espresso.toml +1 -1
flock/themes/everblush.toml +1 -1
flock/themes/fairyfloss.toml +1 -1
flock/themes/fideloper.toml +1 -1
flock/themes/fishtank.toml +1 -1
flock/themes/flexoki-light.toml +1 -1
flock/themes/floraverse.toml +1 -1
flock/themes/framer.toml +1 -1
flock/themes/galizur.toml +1 -1
flock/themes/github.toml +1 -1
flock/themes/grass.toml +1 -1
flock/themes/grey-green.toml +1 -1
flock/themes/gruvboxlight.toml +1 -1
flock/themes/guezwhoz.toml +1 -1
flock/themes/harper.toml +1 -1
flock/themes/hax0r-blue.toml +1 -1
flock/themes/hopscotch.256.toml +1 -1
flock/themes/ic-green-ppl.toml +1 -1
flock/themes/iceberg-dark.toml +1 -1
flock/themes/japanesque.toml +1 -1
flock/themes/jubi.toml +1 -1
flock/themes/kibble.toml +1 -1
flock/themes/kolorit.toml +1 -1
flock/themes/kurokula.toml +1 -1
flock/themes/materialdesigncolors.toml +1 -1
flock/themes/matrix.toml +1 -1
flock/themes/mellifluous.toml +1 -1
flock/themes/midnight-in-mojave.toml +1 -1
flock/themes/monokai-remastered.toml +1 -1
flock/themes/monokai-soda.toml +1 -1
flock/themes/neon.toml +1 -1
flock/themes/neopolitan.toml +1 -1
flock/themes/nord-light.toml +1 -1
flock/themes/ocean.toml +1 -1
flock/themes/onehalfdark.toml +1 -1
flock/themes/onehalflight.toml +1 -1
flock/themes/palenighthc.toml +1 -1
flock/themes/paulmillr.toml +1 -1
flock/themes/pencildark.toml +1 -1
flock/themes/pnevma.toml +1 -1
flock/themes/purple-rain.toml +1 -1
flock/themes/purplepeter.toml +1 -1
flock/themes/raycast-dark.toml +1 -1
flock/themes/red-sands.toml +1 -1
flock/themes/relaxed.toml +1 -1
flock/themes/retro.toml +1 -1
flock/themes/rose-pine.toml +1 -1
flock/themes/royal.toml +1 -1
flock/themes/ryuuko.toml +1 -1
flock/themes/sakura.toml +1 -1
flock/themes/scarlet-protocol.toml +1 -1
flock/themes/seoulbones-dark.toml +1 -1
flock/themes/shades-of-purple.toml +1 -1
flock/themes/smyck.toml +1 -1
flock/themes/softserver.toml +1 -1
flock/themes/solarized-darcula.toml +1 -1
flock/themes/square.toml +1 -1
flock/themes/sugarplum.toml +1 -1
flock/themes/thayer-bright.toml +1 -1
flock/themes/tokyonight.toml +1 -1
flock/themes/tomorrow.toml +1 -1
flock/themes/ubuntu.toml +1 -1
flock/themes/ultradark.toml +1 -1
flock/themes/ultraviolent.toml +1 -1
flock/themes/unikitty.toml +1 -1
flock/themes/urple.toml +1 -1
flock/themes/vesper.toml +1 -1
flock/themes/vimbones.toml +1 -1
flock/themes/wildcherry.toml +1 -1
flock/themes/wilmersdorf.toml +1 -1
flock/themes/wryan.toml +1 -1
flock/themes/xcodedarkhc.toml +1 -1
flock/themes/xcodelight.toml +1 -1
flock/themes/zenbones-light.toml +1 -1
flock/themes/zenwritten-dark.toml +1 -1
flock/utilities.py +301 -0
flock/{components/utility → utility}/output_utility_component.py +68 -53
flock/visibility.py +107 -0
flock_core-0.5.56b0.dist-info/METADATA +747 -0
flock_core-0.5.56b0.dist-info/RECORD +398 -0
flock_core-0.5.56b0.dist-info/entry_points.txt +2 -0
{flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/licenses/LICENSE +1 -1
flock/adapter/__init__.py +0 -14
flock/adapter/azure_adapter.py +0 -68
flock/adapter/chroma_adapter.py +0 -73
flock/adapter/faiss_adapter.py +0 -97
flock/adapter/pinecone_adapter.py +0 -51
flock/adapter/vector_base.py +0 -47
flock/cli/assets/release_notes.md +0 -140
flock/cli/config.py +0 -8
flock/cli/constants.py +0 -36
flock/cli/create_agent.py +0 -1
flock/cli/create_flock.py +0 -280
flock/cli/execute_flock.py +0 -620
flock/cli/load_agent.py +0 -1
flock/cli/load_examples.py +0 -1
flock/cli/load_flock.py +0 -192
flock/cli/load_release_notes.py +0 -20
flock/cli/loaded_flock_cli.py +0 -254
flock/cli/manage_agents.py +0 -459
flock/cli/registry_management.py +0 -889
flock/cli/runner.py +0 -41
flock/cli/settings.py +0 -857
flock/cli/utils.py +0 -135
flock/cli/view_results.py +0 -29
flock/cli/yaml_editor.py +0 -396
flock/components/__init__.py +0 -30
flock/components/evaluation/__init__.py +0 -9
flock/components/evaluation/declarative_evaluation_component.py +0 -606
flock/components/routing/__init__.py +0 -15
flock/components/routing/conditional_routing_component.py +0 -494
flock/components/routing/default_routing_component.py +0 -103
flock/components/routing/llm_routing_component.py +0 -206
flock/components/utility/__init__.py +0 -22
flock/components/utility/example_utility_component.py +0 -250
flock/components/utility/feedback_utility_component.py +0 -206
flock/components/utility/memory_utility_component.py +0 -550
flock/components/utility/metrics_utility_component.py +0 -700
flock/config.py +0 -61
flock/core/__init__.py +0 -110
flock/core/agent/__init__.py +0 -16
flock/core/agent/default_agent.py +0 -216
flock/core/agent/flock_agent_components.py +0 -104
flock/core/agent/flock_agent_execution.py +0 -101
flock/core/agent/flock_agent_integration.py +0 -260
flock/core/agent/flock_agent_lifecycle.py +0 -186
flock/core/agent/flock_agent_serialization.py +0 -381
flock/core/api/__init__.py +0 -10
flock/core/api/custom_endpoint.py +0 -45
flock/core/api/endpoints.py +0 -254
flock/core/api/main.py +0 -162
flock/core/api/models.py +0 -97
flock/core/api/run_store.py +0 -224
flock/core/api/runner.py +0 -44
flock/core/api/service.py +0 -214
flock/core/component/__init__.py +0 -15
flock/core/component/agent_component_base.py +0 -309
flock/core/component/evaluation_component.py +0 -62
flock/core/component/routing_component.py +0 -74
flock/core/component/utility_component.py +0 -69
flock/core/config/flock_agent_config.py +0 -58
flock/core/config/scheduled_agent_config.py +0 -40
flock/core/context/context.py +0 -213
flock/core/context/context_manager.py +0 -37
flock/core/context/context_vars.py +0 -10
flock/core/evaluation/utils.py +0 -396
flock/core/execution/batch_executor.py +0 -369
flock/core/execution/evaluation_executor.py +0 -438
flock/core/execution/local_executor.py +0 -31
flock/core/execution/opik_executor.py +0 -103
flock/core/execution/temporal_executor.py +0 -164
flock/core/flock.py +0 -634
flock/core/flock_agent.py +0 -336
flock/core/flock_factory.py +0 -613
flock/core/flock_scheduler.py +0 -166
flock/core/flock_server_manager.py +0 -136
flock/core/interpreter/python_interpreter.py +0 -689
flock/core/mcp/__init__.py +0 -1
flock/core/mcp/flock_mcp_server.py +0 -680
flock/core/mcp/mcp_client_manager.py +0 -201
flock/core/mcp/types/__init__.py +0 -1
flock/core/mixin/dspy_integration.py +0 -403
flock/core/mixin/prompt_parser.py +0 -125
flock/core/orchestration/__init__.py +0 -15
flock/core/orchestration/flock_batch_processor.py +0 -94
flock/core/orchestration/flock_evaluator.py +0 -113
flock/core/orchestration/flock_execution.py +0 -295
flock/core/orchestration/flock_initialization.py +0 -149
flock/core/orchestration/flock_server_manager.py +0 -67
flock/core/orchestration/flock_web_server.py +0 -117
flock/core/registry/__init__.py +0 -45
flock/core/registry/agent_registry.py +0 -69
flock/core/registry/callable_registry.py +0 -139
flock/core/registry/component_discovery.py +0 -142
flock/core/registry/component_registry.py +0 -64
flock/core/registry/config_mapping.py +0 -64
flock/core/registry/decorators.py +0 -137
flock/core/registry/registry_hub.py +0 -205
flock/core/registry/server_registry.py +0 -57
flock/core/registry/type_registry.py +0 -86
flock/core/serialization/__init__.py +0 -13
flock/core/serialization/callable_registry.py +0 -52
flock/core/serialization/flock_serializer.py +0 -832
flock/core/serialization/json_encoder.py +0 -41
flock/core/serialization/secure_serializer.py +0 -175
flock/core/serialization/serializable.py +0 -342
flock/core/serialization/serialization_utils.py +0 -412
flock/core/util/file_path_utils.py +0 -223
flock/core/util/hydrator.py +0 -309
flock/core/util/input_resolver.py +0 -164
flock/core/util/loader.py +0 -59
flock/core/util/splitter.py +0 -219
flock/di.py +0 -27
flock/platform/docker_tools.py +0 -49
flock/platform/jaeger_install.py +0 -86
flock/webapp/__init__.py +0 -1
flock/webapp/app/__init__.py +0 -0
flock/webapp/app/api/__init__.py +0 -0
flock/webapp/app/api/agent_management.py +0 -241
flock/webapp/app/api/execution.py +0 -709
flock/webapp/app/api/flock_management.py +0 -129
flock/webapp/app/api/registry_viewer.py +0 -30
flock/webapp/app/chat.py +0 -665
flock/webapp/app/config.py +0 -104
flock/webapp/app/dependencies.py +0 -117
flock/webapp/app/main.py +0 -1070
flock/webapp/app/middleware.py +0 -113
flock/webapp/app/models_ui.py +0 -7
flock/webapp/app/services/__init__.py +0 -0
flock/webapp/app/services/feedback_file_service.py +0 -363
flock/webapp/app/services/flock_service.py +0 -337
flock/webapp/app/services/sharing_models.py +0 -81
flock/webapp/app/services/sharing_store.py +0 -762
flock/webapp/app/templates/theme_mapper.html +0 -326
flock/webapp/app/theme_mapper.py +0 -812
flock/webapp/app/utils.py +0 -85
flock/webapp/run.py +0 -215
flock/webapp/static/css/chat.css +0 -301
flock/webapp/static/css/components.css +0 -167
flock/webapp/static/css/header.css +0 -39
flock/webapp/static/css/layout.css +0 -46
flock/webapp/static/css/sidebar.css +0 -127
flock/webapp/static/css/two-pane.css +0 -48
flock/webapp/templates/base.html +0 -200
flock/webapp/templates/chat.html +0 -152
flock/webapp/templates/chat_settings.html +0 -19
flock/webapp/templates/flock_editor.html +0 -16
flock/webapp/templates/index.html +0 -12
flock/webapp/templates/partials/_agent_detail_form.html +0 -93
flock/webapp/templates/partials/_agent_list.html +0 -18
flock/webapp/templates/partials/_agent_manager_view.html +0 -51
flock/webapp/templates/partials/_agent_tools_checklist.html +0 -14
flock/webapp/templates/partials/_chat_container.html +0 -15
flock/webapp/templates/partials/_chat_messages.html +0 -57
flock/webapp/templates/partials/_chat_settings_form.html +0 -85
flock/webapp/templates/partials/_create_flock_form.html +0 -50
flock/webapp/templates/partials/_dashboard_flock_detail.html +0 -17
flock/webapp/templates/partials/_dashboard_flock_file_list.html +0 -16
flock/webapp/templates/partials/_dashboard_flock_properties_preview.html +0 -28
flock/webapp/templates/partials/_dashboard_upload_flock_form.html +0 -16
flock/webapp/templates/partials/_dynamic_input_form_content.html +0 -22
flock/webapp/templates/partials/_env_vars_table.html +0 -23
flock/webapp/templates/partials/_execution_form.html +0 -118
flock/webapp/templates/partials/_execution_view_container.html +0 -28
flock/webapp/templates/partials/_flock_file_list.html +0 -23
flock/webapp/templates/partials/_flock_properties_form.html +0 -52
flock/webapp/templates/partials/_flock_upload_form.html +0 -16
flock/webapp/templates/partials/_header_flock_status.html +0 -5
flock/webapp/templates/partials/_load_manager_view.html +0 -49
flock/webapp/templates/partials/_registry_table.html +0 -25
flock/webapp/templates/partials/_registry_viewer_content.html +0 -70
flock/webapp/templates/partials/_results_display.html +0 -78
flock/webapp/templates/partials/_settings_env_content.html +0 -9
flock/webapp/templates/partials/_settings_theme_content.html +0 -14
flock/webapp/templates/partials/_settings_view.html +0 -36
flock/webapp/templates/partials/_share_chat_link_snippet.html +0 -11
flock/webapp/templates/partials/_share_link_snippet.html +0 -35
flock/webapp/templates/partials/_sidebar.html +0 -74
flock/webapp/templates/partials/_streaming_results_container.html +0 -195
flock/webapp/templates/partials/_structured_data_view.html +0 -40
flock/webapp/templates/partials/_theme_preview.html +0 -36
flock/webapp/templates/registry_viewer.html +0 -84
flock/webapp/templates/shared_run_page.html +0 -140
flock/workflow/__init__.py +0 -0
flock/workflow/activities.py +0 -196
flock/workflow/agent_activities.py +0 -24
flock/workflow/agent_execution_activity.py +0 -202
flock/workflow/flock_workflow.py +0 -214
flock/workflow/temporal_config.py +0 -96
flock/workflow/temporal_setup.py +0 -68
flock_core-0.5.0b28.dist-info/METADATA +0 -274
flock_core-0.5.0b28.dist-info/RECORD +0 -561
flock_core-0.5.0b28.dist-info/entry_points.txt +0 -2
/flock/{core/logging → logging}/formatters/themes.py +0 -0
/flock/{core/logging → logging}/span_middleware/baggage_span_processor.py +0 -0
/flock/{core/mcp → mcp}/util/__init__.py +0 -0
{flock_core-0.5.0b28.dist-info → flock_core-0.5.56b0.dist-info}/WHEEL +0 -0

flock/core/execution/evaluation_executor.py DELETED Viewed

@@ -1,438 +0,0 @@
-# src/flock/core/execution/evaluation_processor.py
-"""Contains the EvaluationProcessor class responsible for evaluating Flock agents
-against datasets using various metrics.
-"""
-import asyncio
-import json
-from collections.abc import Callable
-from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Literal,
-    Union,
-)
-from pandas import DataFrame
-# Conditional pandas import
-try:
-    import pandas as pd
-    PANDAS_AVAILABLE = True
-except ImportError:
-    pd = None  # type: ignore
-    PANDAS_AVAILABLE = False
-# Box for results
-from box import Box
-from datasets import Dataset as HFDataset
-from flock.core.evaluation.utils import (
-    aggregate_results,
-    calculate_evaluation_metrics,
-    extract_value_by_dot_notation,
-    normalize_dataset,
-    # Import metric calculation/aggregation helpers
-)
-# Flock core imports
-from flock.core.logging.logging import get_logger
-if TYPE_CHECKING:
-    from flock.core.flock import Flock
-    from flock.core.flock_agent import FlockAgent
-    # Legacy FlockEvaluator import removed
-    # Conditional types
-logger = get_logger("execution.evaluation")
-class EvaluationExecutor:
-    """Handles the evaluation of Flock agents against datasets."""
-    def __init__(self, flock_instance: "Flock"):
-        """Initializes the EvaluationProcessor.
-        Args:
-            flock_instance: The Flock instance this processor will use.
-        """
-        self.flock = flock_instance
-    async def evaluate_async(
-        self,
-        dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
-        start_agent: Union["FlockAgent", str],
-        input_mapping: dict[str, str],
-        answer_mapping: dict[str, str],
-        metrics: list[
-            Union[
-                str,
-                Callable[[Any, Any], bool | float | dict[str, Any]],
-                "FlockAgent",
-                "FlockEvaluator",
-            ]
-        ],
-        metric_configs: dict[str, dict[str, Any]] | None = None,
-        static_inputs: dict[str, Any] | None = None,
-        parallel: bool = True,
-        max_workers: int = 5,
-        use_temporal: bool | None = None,
-        error_handling: Literal["raise", "skip", "log"] = "log",
-        output_file: str | Path | None = None,
-        return_dataframe: bool = True,
-        silent_mode: bool = False,
-        metadata_columns: list[str] | None = None,  # Columns to pass through
-        # dataset_split: Optional[str] = None # TODO: Add split support in normalize_dataset
-    ) -> DataFrame | list[dict[str, Any]]:
-        """Evaluates the Flock's performance against a dataset asynchronously."""
-        effective_use_temporal = (
-            use_temporal
-            if use_temporal is not None
-            else self.flock.enable_temporal
-        )
-        exec_mode = (
-            "Temporal"
-            if effective_use_temporal
-            else ("Parallel Local" if parallel else "Sequential Local")
-        )
-        start_agent_name = (
-            start_agent.name if hasattr(start_agent, "name") else start_agent
-        )
-        logger.info(
-            f"Starting evaluation for agent '{start_agent_name}'. Execution: {exec_mode}, Silent: {silent_mode}"
-        )
-        # --- 1. Normalize Dataset ---
-        try:
-            df = normalize_dataset(dataset)  # Uses helper
-            if df is None or df.empty:
-                raise ValueError(
-                    "Provided dataset is empty or could not be normalized."
-                )
-            logger.info(f"Normalized dataset with {len(df)} items.")
-        except Exception as e:
-            logger.error(
-                f"Failed to load or normalize dataset: {e}", exc_info=True
-            )
-            raise ValueError(f"Dataset processing failed: {e}") from e
-        # --- 2. Prepare Batch Items ---
-        batch_items = []
-        required_input_cols = list(input_mapping.keys())
-        required_answer_cols = list(answer_mapping.values())
-        required_metadata_cols = metadata_columns or []
-        all_required_cols = set(
-            required_input_cols + required_answer_cols + required_metadata_cols
-        )
-        missing_cols = all_required_cols - set(df.columns)
-        if missing_cols:
-            raise ValueError(
-                f"Dataset missing required columns: {', '.join(missing_cols)}"
-            )
-        for index, row in df.iterrows():
-            agent_input = {
-                agent_key: row[df_col]
-                for df_col, agent_key in input_mapping.items()
-            }
-            expected_answers = {
-                agent_out_key: row[answer_col]
-                for agent_out_key, answer_col in answer_mapping.items()
-            }
-            metadata = {col: row[col] for col in required_metadata_cols}
-            batch_items.append(
-                {
-                    "_original_index": index,  # Store original DF index
-                    "_agent_input": agent_input,
-                    "_expected_answers": expected_answers,
-                    "_metadata": metadata,
-                }
-            )
-        if not batch_items:
-            logger.warning("No items prepared for evaluation.")
-            return pd.DataFrame() if return_dataframe else []
-        # --- 3. Execute Workers ---
-        results_dict = {}  # Store results keyed by original index
-        tasks = []
-        semaphore = asyncio.Semaphore(
-            max_workers if parallel and not effective_use_temporal else 1
-        )
-        # --- Worker Function ---
-        async def evaluate_worker(item_index: int, item_data: dict[str, Any]):
-            nonlocal results_dict
-            original_index = item_data["_original_index"]
-            item_result_details = {
-                "index": original_index,  # Use original index in result
-                "inputs": item_data["_agent_input"],
-                "expected_answers": item_data["_expected_answers"],
-                "agent_output": None,
-                "metrics": {},
-                "error": None,
-                **(item_data["_metadata"]),  # Include pass-through metadata
-            }
-            agent_inputs_with_static = {
-                **(static_inputs or {}),
-                **item_data["_agent_input"],
-            }
-            async with semaphore:  # Acquire semaphore
-                run_desc = f"Evaluation item (original index: {original_index})"
-                logger.debug(f"{run_desc} starting.")
-                try:
-                    # Run the agent/flock for this item
-                    agent_output = await self.flock.run_async(
-                        agent=start_agent,  # Name or instance
-                        input=agent_inputs_with_static,
-                        box_result=True,  # Use Box for easier access via dot notation
-                        # context=... # Assuming isolated context for now
-                    )
-                    item_result_details["agent_output"] = (
-                        agent_output  # Store Box or dict
-                    )
-                    # Extract predicted values based on answer_mapping
-                    predicted_answers = {}
-                    for agent_out_key in answer_mapping:
-                        # Use helper to handle dot notation
-                        predicted_answers[agent_out_key] = (
-                            extract_value_by_dot_notation(
-                                agent_output, agent_out_key
-                            )
-                        )
-                    # Calculate metrics using helper
-                    item_result_details["metrics"] = (
-                        calculate_evaluation_metrics(
-                            metrics=metrics,
-                            metric_configs=metric_configs or {},
-                            predicted_answers=predicted_answers,
-                            expected_answers=item_data["_expected_answers"],
-                            agent_inputs=agent_inputs_with_static,  # Pass context if needed
-                            agent_output=agent_output,  # Pass context if needed
-                        )
-                    )
-                    logger.debug(f"{run_desc} finished successfully.")
-                except Exception as e:
-                    logger.warning(
-                        f"Error processing item {original_index}: {e}"
-                    )
-                    item_result_details["error"] = str(e)
-                    if error_handling == "raise":
-                        raise  # Re-raise to stop processing (if parallel, stops gather)
-                    elif error_handling == "skip":
-                        item_result_details["_skip"] = (
-                            True  # Mark for filtering
-                        )
-                # Store result associated with original index
-                results_dict[original_index] = item_result_details
-                # Update progress bar if applicable (inside the worker is okay)
-                if progress_context:
-                    progress.update(progress_task_id, advance=1)
-        # --- Setup Progress Bar if Silent ---
-        progress_context = None
-        progress_task_id = None
-        if silent_mode:
-            from rich.progress import (
-                BarColumn,
-                Progress,
-                SpinnerColumn,
-                TextColumn,
-                TimeElapsedColumn,
-            )
-            progress = Progress(
-                SpinnerColumn(),
-                TextColumn("[progress.description]{task.description}"),
-                BarColumn(),
-                TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
-                TextColumn("({task.completed}/{task.total})"),
-                TimeElapsedColumn(),
-            )
-            progress_context = progress
-            progress_task_id = progress.add_task(
-                f"Evaluating {len(batch_items)} items...",
-                total=len(batch_items),
-            )
-            progress.start()
-        # --- Execute Tasks ---
-        try:
-            if effective_use_temporal:
-                # TODO: Implement parallel Temporal evaluation
-                logger.info(
-                    "Running evaluation using Temporal (executing sequentially for now)..."
-                )
-                for i, item_data in enumerate(batch_items):
-                    await evaluate_worker(i, item_data)  # Pass sequential index
-            elif parallel:
-                logger.info(
-                    f"Running evaluation in parallel with max_workers={max_workers}..."
-                )
-                for i, item_data in enumerate(batch_items):
-                    # Pass sequential index i, and the item_data which contains original_index
-                    tasks.append(
-                        asyncio.create_task(evaluate_worker(i, item_data))
-                    )
-                await asyncio.gather(*tasks)
-            else:  # Sequential Local
-                logger.info("Running evaluation sequentially...")
-                for i, item_data in enumerate(batch_items):
-                    await evaluate_worker(i, item_data)
-            logger.info("Evaluation execution finished.")
-        except Exception as batch_error:
-            logger.error(
-                f"Evaluation stopped due to an error in one of the items: {batch_error}"
-            )
-            if (
-                not error_handling == "skip"
-            ):  # If skipping, we continue; otherwise, re-raise if required
-                if error_handling == "raise":
-                    raise
-        finally:
-            if progress_context:
-                progress.stop()
-        # --- 4. Process Results ---
-        # Reconstruct results list based on original order and filtering
-        final_results_list = []
-        for idx in df.index:  # Iterate through original DataFrame index
-            res = results_dict.get(idx)
-            if res:
-                if error_handling == "skip" and res.get("_skip"):
-                    continue  # Skip items marked for skipping
-                # Remove internal skip flag if present
-                res.pop("_skip", None)
-                final_results_list.append(res)
-        # Calculate aggregate summary using helper
-        summary = aggregate_results(final_results_list)
-        logger.info(
-            "Evaluation Summary:", extra=summary
-        )  # Log summary automatically
-        # --- 5. Save and Return ---
-        if output_file:
-            output_path = Path(output_file)
-            output_path.parent.mkdir(parents=True, exist_ok=True)
-            try:
-                results_df = pd.DataFrame(final_results_list)
-                # Handle complex objects before saving
-                if "agent_output" in results_df.columns:
-                    results_df["agent_output"] = results_df[
-                        "agent_output"
-                    ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
-                if (
-                    "expected_answers" in results_df.columns
-                ):  # Flatten dicts for CSV
-                    results_df = pd.concat(
-                        [
-                            results_df.drop(["expected_answers"], axis=1),
-                            pd.json_normalize(
-                                results_df["expected_answers"]
-                            ).add_prefix("expected_"),
-                        ],
-                        axis=1,
-                    )
-                if "metrics" in results_df.columns:  # Flatten dicts for CSV
-                    results_df = pd.concat(
-                        [
-                            results_df.drop(["metrics"], axis=1),
-                            pd.json_normalize(results_df["metrics"]).add_prefix(
-                                "metric_"
-                            ),
-                        ],
-                        axis=1,
-                    )
-                if "inputs" in results_df.columns:  # Flatten dicts for CSV
-                    results_df = pd.concat(
-                        [
-                            results_df.drop(["inputs"], axis=1),
-                            pd.json_normalize(results_df["inputs"]).add_prefix(
-                                "input_"
-                            ),
-                        ],
-                        axis=1,
-                    )
-                # Convert lists/dicts in metadata columns for CSV saving
-                for col in metadata_columns or []:
-                    if col in results_df.columns:
-                        # Check if column contains lists/dicts before converting
-                        if (
-                            results_df[col]
-                            .apply(lambda x: isinstance(x, (list, dict)))
-                            .any()
-                        ):
-                            results_df[col] = results_df[col].apply(json.dumps)
-                if output_path.suffix.lower() == ".csv":
-                    results_df.to_csv(output_path, index=False)
-                elif output_path.suffix.lower() == ".json":
-                    # Save list of dicts directly (before potential DataFrame manipulation)
-                    # Need to handle non-serializable types like Box
-                    serializable_results = []
-                    for res_dict in final_results_list:
-                        if "agent_output" in res_dict and isinstance(
-                            res_dict["agent_output"], Box
-                        ):
-                            res_dict["agent_output"] = res_dict[
-                                "agent_output"
-                            ].to_dict()
-                        serializable_results.append(res_dict)
-                    with open(output_path, "w", encoding="utf-8") as f:
-                        json.dump(
-                            serializable_results, f, indent=2, default=str
-                        )  # Use default=str for safety
-                else:
-                    logger.warning(
-                        f"Unsupported output file format: {output_path.suffix}. Use .csv or .json."
-                    )
-                logger.info(
-                    f"Detailed evaluation results saved to {output_path}"
-                )
-            except Exception as e:
-                logger.error(
-                    f"Failed to save evaluation results to {output_file}: {e}",
-                    exc_info=True,
-                )
-        if return_dataframe:
-            if not PANDAS_AVAILABLE:
-                logger.error(
-                    "Cannot return DataFrame: pandas library not installed."
-                )
-                return final_results_list  # Fallback to list
-            # Ensure DataFrame is created if not done for saving
-            if "results_df" not in locals():
-                results_df = pd.DataFrame(final_results_list)
-                # Convert Box if needed
-                if "agent_output" in results_df.columns:
-                    results_df["agent_output"] = results_df[
-                        "agent_output"
-                    ].apply(lambda x: x.to_dict() if isinstance(x, Box) else x)
-            return results_df
-        else:
-            # Ensure Box objects are converted if returning list
-            final_list = []
-            for res_dict in final_results_list:
-                if "agent_output" in res_dict and isinstance(
-                    res_dict["agent_output"], Box
-                ):
-                    res_dict["agent_output"] = res_dict[
-                        "agent_output"
-                    ].to_dict()
-                final_list.append(res_dict)
-            return final_list

flock/core/execution/local_executor.py DELETED Viewed

@@ -1,31 +0,0 @@
-# src/flock/core/execution/local_executor.py
-from flock.core.context.context import FlockContext
-from flock.core.logging.logging import get_logger
-from flock.workflow.activities import (
-    run_agent,  # This should be the local activity function
-)
-logger = get_logger("flock")
-async def run_local_workflow(
-    context: FlockContext, box_result: bool = True
-) -> dict:
-    """Execute the agent workflow locally (for debugging).
-    Args:
-        context: The FlockContext instance with state and history.
-        output_formatter: Formatter options for displaying results.
-        box_result: If True, wraps the result in a Box for nicer display.
-    Returns:
-        A dictionary containing the workflow result.
-    """
-    logger.info("Running local debug workflow")
-    result = await run_agent(context)
-    if box_result:
-        from box import Box
-        logger.debug("Boxing result")
-        return Box(result)
-    return result

flock/core/execution/opik_executor.py DELETED Viewed

@@ -1,103 +0,0 @@
-# src/flock/core/execution/evaluation_processor.py
-"""Contains the EvaluationProcessor class responsible for evaluating Flock agents
-against datasets using various metrics.
-"""
-from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Union,
-)
-from opik import Opik
-from pandas import DataFrame
-# Conditional pandas import
-try:
-    import pandas as pd
-    PANDAS_AVAILABLE = True
-except ImportError:
-    pd = None  # type: ignore
-    PANDAS_AVAILABLE = False
-# Box for results
-from datasets import Dataset as HFDataset
-from flock.core.evaluation.utils import (
-    normalize_dataset,
-    # Import metric calculation/aggregation helpers
-)
-# Flock core imports
-from flock.core.logging.logging import get_logger
-if TYPE_CHECKING:
-    from flock.core.flock import Flock
-    from flock.core.flock_agent import FlockAgent
-    # Conditional types
-logger = get_logger("execution.opik")
-class OpikExecutor:
-    """Handles the evaluation of Flock agents against datasets."""
-    def __init__(self, flock_instance: "Flock"):
-        """Initializes the EvaluationProcessor.
-        Args:
-            flock_instance: The Flock instance this processor will use.
-        """
-        self.flock = flock_instance
-    async def evaluate_with_opik(
-        self,
-        dataset: str | Path | list[dict[str, Any]] | DataFrame | HFDataset,
-        start_agent: Union["FlockAgent", str],
-        input_mapping: dict[str, str],
-        answer_mapping: dict[str, str],) -> DataFrame | list[dict[str, Any]]:
-        """Evaluates the Flock's performance against a dataset asynchronously."""
-        logger.info(f"Evaluating Flock's performance against dataset: {dataset}")
-        # Evaluation task
-        def evaluation_task(dataset_item):
-          flock_result = self.flock.run(agent=start_agent, input=dataset_item, box_result=False)
-          result = {
-              "input": dataset_item.get("test"),
-              "output": flock_result.get("answer"),
-              "context": ["placeholder string"]
-          }
-          return result
-        start_agent_name = (
-            start_agent.name if hasattr(start_agent, "name") else start_agent
-        )
-        dataset_name = str(dataset)
-        # --- 1. Normalize Dataset ---
-        try:
-            df = normalize_dataset(dataset)  # Uses helper
-            if df is None or df.empty:
-                raise ValueError(
-                    "Provided dataset is empty or could not be normalized."
-                )
-            logger.info(f"Normalized dataset with {len(df)} items.")
-        except Exception as e:
-            logger.error(
-                f"Failed to load or normalize dataset: {e}", exc_info=True
-            )
-            raise ValueError(f"Dataset processing failed: {e}") from e
-        logger.info(f"type(df): {type(df)}")        # ➜ <class 'pandas.core.frame.DataFrame'>
-        logger.info(f"df.shape: {df.shape}")        # e.g. (123456, N_COLUMNS+2)
-        logger.info(f"df['split'].value_counts(): {df['split'].value_counts()}")
-        logger.info(f"df['config'].unique(): {df['config'].unique()}")
-        client = Opik()
-        dataset = client.get_or_create_dataset(name=dataset_name)
-        dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
-        logger.info(f"Imported dataset to Opik")

flock-core 0.5.0b28__py3-none-any.whl → 0.5.56b0__py3-none-any.whl

Potentially problematic release.

flock-core 0.5.0b28py3-none-any.whl → 0.5.56b0py3-none-any.whl