PyPI - flock-core - Versions diffs - 0.4.0b4__py3-none-any.whl → 0.4.0b5__py3-none-any.whl - Mend

flock-core 0.4.0b4py3-none-any.whl → 0.4.0b5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of flock-core might be problematic. Click here for more details.

Files changed (28) hide show

flock/__init__.py +12 -0
flock/cli/config.py +8 -0
flock/cli/constants.py +11 -0
flock/cli/create_flock.py +18 -6
flock/cli/execute_flock.py +397 -1
flock/cli/loaded_flock_cli.py +19 -4
flock/cli/runner.py +41 -0
flock/config.py +5 -0
flock/core/api/endpoints.py +102 -2
flock/core/api/main.py +214 -0
flock/core/api/models.py +63 -0
flock/core/api/run_store.py +153 -1
flock/core/api/runner.py +38 -0
flock/core/context/context_vars.py +1 -0
flock/core/evaluation/utils.py +312 -0
flock/core/execution/batch_executor.py +325 -0
flock/core/execution/evaluation_executor.py +438 -0
flock/core/flock.py +325 -1152
flock/core/serialization/flock_serializer.py +717 -0
flock/core/tools/azure_tools.py +2 -1
flock/core/tools/basic_tools.py +1 -1
flock/core/util/loader.py +59 -0
flock/modules/output/output_module.py +43 -8
{flock_core-0.4.0b4.dist-info → flock_core-0.4.0b5.dist-info}/METADATA +4 -1
{flock_core-0.4.0b4.dist-info → flock_core-0.4.0b5.dist-info}/RECORD +28 -20
{flock_core-0.4.0b4.dist-info → flock_core-0.4.0b5.dist-info}/WHEEL +0 -0
{flock_core-0.4.0b4.dist-info → flock_core-0.4.0b5.dist-info}/entry_points.txt +0 -0
{flock_core-0.4.0b4.dist-info → flock_core-0.4.0b5.dist-info}/licenses/LICENSE +0 -0

flock/core/api/endpoints.py CHANGED Viewed

@@ -18,7 +18,12 @@ from fastapi.responses import HTMLResponse
 from flock.core.logging.logging import get_logger
 # Import models and UI utils
-from .models import FlockAPIRequest, FlockAPIResponse
+from .models import (
+    FlockAPIRequest,
+    FlockAPIResponse,
+    FlockBatchRequest,
+    FlockBatchResponse,
+)
 # Import UI utils - assuming they are now in ui/utils.py
@@ -98,6 +103,68 @@ def create_api_router(flock_api: "FlockAPI") -> APIRouter:
                 run_store.update_run_status(run_id, "failed", error_msg)
             raise HTTPException(status_code=500, detail=error_msg)
+    @router.post("/run/batch", response_model=FlockBatchResponse, tags=["API"])
+    async def run_batch_json(
+        request: FlockBatchRequest, background_tasks: BackgroundTasks
+    ):
+        """Run a batch of inputs through the flock workflow (expects JSON)."""
+        batch_id = None
+        try:
+            # Validate agent exists
+            if request.agent_name not in flock_instance.agents:
+                raise ValueError(f"Agent '{request.agent_name}' not found")
+            # Validate batch inputs
+            if (
+                isinstance(request.batch_inputs, list)
+                and not request.batch_inputs
+            ):
+                raise ValueError("Batch inputs list cannot be empty")
+            batch_id = str(uuid.uuid4())
+            run_store.create_batch(batch_id)  # Use RunStore
+            response = run_store.get_batch(
+                batch_id
+            )  # Get initial response from store
+            # Log batch size for monitoring
+            batch_size = (
+                len(request.batch_inputs)
+                if isinstance(request.batch_inputs, list)
+                else "CSV/DataFrame"
+            )
+            logger.info(
+                f"API request: run batch with '{request.agent_name}' (batch_id: {batch_id})",
+                batch_size=batch_size,
+            )
+            # Always run batch processing asynchronously
+            logger.debug(
+                f"Running batch with '{request.agent_name}' asynchronously (batch_id: {batch_id})"
+            )
+            # Call the helper method on the passed FlockAPI instance
+            background_tasks.add_task(
+                flock_api._run_batch,
+                batch_id,
+                request,
+            )
+            run_store.update_batch_status(batch_id, "running")
+            response.status = "running"  # Update local response copy too
+            return response
+        except ValueError as ve:
+            error_msg = f"Value error starting batch: {ve}"
+            logger.error(error_msg)
+            if batch_id:
+                run_store.update_batch_status(batch_id, "failed", str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            error_msg = f"Internal server error: {type(e).__name__}: {e!s}"
+            logger.error(error_msg, exc_info=True)
+            if batch_id:
+                run_store.update_batch_status(batch_id, "failed", error_msg)
+            raise HTTPException(status_code=500, detail=error_msg)
     @router.get("/run/{run_id}", response_model=FlockAPIResponse, tags=["API"])
     async def get_run_status(run_id: str):
         """Get the status of a specific run."""
@@ -108,6 +175,39 @@ def create_api_router(flock_api: "FlockAPI") -> APIRouter:
             raise HTTPException(status_code=404, detail="Run not found")
         return run_data
+    @router.get(
+        "/batch/{batch_id}", response_model=FlockBatchResponse, tags=["API"]
+    )
+    async def get_batch_status(batch_id: str):
+        """Get the status of a specific batch run.
+        Returns details including:
+        - Total number of items in the batch
+        - Number of completed items
+        - Percentage of completion
+        - Any partial results available (for running batches)
+        - Complete results (for completed batches)
+        """
+        logger.debug(f"API request: get status for batch_id: {batch_id}")
+        batch_data = run_store.get_batch(batch_id)
+        if not batch_data:
+            logger.warning(f"Batch ID not found: {batch_id}")
+            raise HTTPException(status_code=404, detail="Batch not found")
+        # Add useful info for client display
+        extra_info = {
+            "status": batch_data.status,
+            "completed_items": batch_data.completed_items,
+            "total_items": batch_data.total_items,
+            "progress_percentage": round(batch_data.progress_percentage, 1),
+            "has_partial_results": len(batch_data.results) > 0
+            and batch_data.status == "running",
+            "has_error": batch_data.error is not None,
+        }
+        logger.debug(f"Returning batch status: {extra_info}")
+        return batch_data
     @router.get("/agents", tags=["API"])
     async def list_agents():
         """List all available agents."""
@@ -162,7 +262,7 @@ def create_api_router(flock_api: "FlockAPI") -> APIRouter:
             logger.debug(f"Parsed form inputs for UI run: {form_inputs}")
             run_id = str(uuid.uuid4())
-            run_store.create_run(run_id)  # Use RunStore
+            run_store.create_run(run_id)
             logger.debug(
                 f"Running flock '{agent_name}' synchronously from UI (run_id: {run_id})"
             )

flock/core/api/main.py CHANGED Viewed

@@ -8,6 +8,7 @@ from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 # Flock core imports
+from flock.core.api.models import FlockBatchRequest
 from flock.core.flock import Flock
 from flock.core.logging.logging import get_logger
@@ -114,6 +115,219 @@ class FlockAPI:
             self.run_store.update_run_status(run_id, "failed", str(e))
             raise  # Re-raise for the endpoint handler
+    async def _run_batch(self, batch_id: str, request: "FlockBatchRequest"):
+        """Executes a batch of runs (internal helper)."""
+        try:
+            if request.agent_name not in self.flock.agents:
+                raise ValueError(f"Agent '{request.agent_name}' not found")
+            logger.debug(
+                f"Executing batch run starting with '{request.agent_name}' (batch_id: {batch_id})",
+                batch_size=len(request.batch_inputs)
+                if isinstance(request.batch_inputs, list)
+                else "CSV",
+            )
+            # Import the thread pool executor here to avoid circular imports
+            import asyncio
+            import threading
+            from concurrent.futures import ThreadPoolExecutor
+            # Define a synchronous function to run the batch processing
+            def run_batch_sync():
+                # Use a new event loop for the batch processing
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    # Set the total number of batch items if possible
+                    batch_size = (
+                        len(request.batch_inputs)
+                        if isinstance(request.batch_inputs, list)
+                        else 0
+                    )
+                    if batch_size > 0:
+                        # Directly call the store method - no need for asyncio here
+                        # since we're already in a separate thread
+                        self.run_store.set_batch_total_items(
+                            batch_id, batch_size
+                        )
+                    # Custom progress tracking wrapper
+                    class ProgressTracker:
+                        def __init__(self, store, batch_id, total_size):
+                            self.store = store
+                            self.batch_id = batch_id
+                            self.current_count = 0
+                            self.total_size = total_size
+                            self._lock = threading.Lock()
+                            self.partial_results = []
+                        def increment(self, result=None):
+                            with self._lock:
+                                self.current_count += 1
+                                if result is not None:
+                                    # Store partial result
+                                    self.partial_results.append(result)
+                                # Directly call the store method - no need for asyncio here
+                                # since we're already in a separate thread
+                                try:
+                                    self.store.update_batch_progress(
+                                        self.batch_id,
+                                        self.current_count,
+                                        self.partial_results,
+                                    )
+                                except Exception as e:
+                                    logger.error(
+                                        f"Error updating progress: {e}"
+                                    )
+                                return self.current_count
+                    # Create a progress tracker
+                    progress_tracker = ProgressTracker(
+                        self.run_store, batch_id, batch_size
+                    )
+                    # Define a custom worker that reports progress
+                    async def progress_aware_worker(index, item_inputs):
+                        try:
+                            result = await self.flock.run_async(
+                                start_agent=request.agent_name,
+                                input=item_inputs,
+                                box_result=request.box_results,
+                            )
+                            # Report progress after each item
+                            progress_tracker.increment(result)
+                            return result
+                        except Exception as e:
+                            logger.error(
+                                f"Error processing batch item {index}: {e}"
+                            )
+                            progress_tracker.increment(
+                                e if request.return_errors else None
+                            )
+                            if request.return_errors:
+                                return e
+                            return None
+                    # Process the batch items with progress tracking
+                    batch_inputs = request.batch_inputs
+                    if isinstance(batch_inputs, list):
+                        # Process list of inputs with progress tracking
+                        tasks = []
+                        for i, item_inputs in enumerate(batch_inputs):
+                            # Combine with static inputs if provided
+                            full_inputs = {
+                                **(request.static_inputs or {}),
+                                **item_inputs,
+                            }
+                            tasks.append(progress_aware_worker(i, full_inputs))
+                        # Run all tasks
+                        if request.parallel and request.max_workers > 1:
+                            # Run in parallel with semaphore for max_workers
+                            semaphore = asyncio.Semaphore(request.max_workers)
+                            async def bounded_worker(i, inputs):
+                                async with semaphore:
+                                    return await progress_aware_worker(
+                                        i, inputs
+                                    )
+                            bounded_tasks = []
+                            for i, item_inputs in enumerate(batch_inputs):
+                                full_inputs = {
+                                    **(request.static_inputs or {}),
+                                    **item_inputs,
+                                }
+                                bounded_tasks.append(
+                                    bounded_worker(i, full_inputs)
+                                )
+                            results = loop.run_until_complete(
+                                asyncio.gather(*bounded_tasks)
+                            )
+                        else:
+                            # Run sequentially
+                            results = []
+                            for i, item_inputs in enumerate(batch_inputs):
+                                full_inputs = {
+                                    **(request.static_inputs or {}),
+                                    **item_inputs,
+                                }
+                                result = loop.run_until_complete(
+                                    progress_aware_worker(i, full_inputs)
+                                )
+                                results.append(result)
+                    else:
+                        # Let the original run_batch_async handle DataFrame or CSV
+                        results = loop.run_until_complete(
+                            self.flock.run_batch_async(
+                                start_agent=request.agent_name,
+                                batch_inputs=request.batch_inputs,
+                                input_mapping=request.input_mapping,
+                                static_inputs=request.static_inputs,
+                                parallel=request.parallel,
+                                max_workers=request.max_workers,
+                                use_temporal=request.use_temporal,
+                                box_results=request.box_results,
+                                return_errors=request.return_errors,
+                                silent_mode=request.silent_mode,
+                                write_to_csv=request.write_to_csv,
+                            )
+                        )
+                    # Update progress one last time with final count
+                    if results:
+                        progress_tracker.current_count = len(results)
+                        self.run_store.update_batch_progress(
+                            batch_id,
+                            len(results),
+                            results,  # Include all results as partial results
+                        )
+                    # Update store with results from this thread
+                    self.run_store.update_batch_result(batch_id, results)
+                    logger.info(
+                        f"Batch run completed (batch_id: {batch_id})",
+                        num_results=len(results),
+                    )
+                    return results
+                except Exception as e:
+                    logger.error(
+                        f"Error in batch run {batch_id} (started with '{request.agent_name}'): {e!s}",
+                        exc_info=True,
+                    )
+                    # Update store status
+                    self.run_store.update_batch_status(
+                        batch_id, "failed", str(e)
+                    )
+                    return None
+                finally:
+                    loop.close()
+            # Run the batch processing in a thread pool
+            try:
+                loop = asyncio.get_running_loop()
+                with ThreadPoolExecutor() as pool:
+                    await loop.run_in_executor(pool, run_batch_sync)
+            except Exception as e:
+                error_msg = f"Error running batch in thread pool: {e!s}"
+                logger.error(error_msg, exc_info=True)
+                self.run_store.update_batch_status(
+                    batch_id, "failed", error_msg
+                )
+        except Exception as e:
+            logger.error(
+                f"Error setting up batch run {batch_id} (started with '{request.agent_name}'): {e!s}",
+                exc_info=True,
+            )
+            # Update store status
+            self.run_store.update_batch_status(batch_id, "failed", str(e))
+            raise  # Re-raise for the endpoint handler
     # --- UI Helper Methods (kept here as they are called by endpoints via self) ---
     def _parse_input_spec(self, input_spec: str) -> list[dict[str, str]]:

flock/core/api/models.py CHANGED Viewed

@@ -32,3 +32,66 @@ class FlockAPIResponse(BaseModel):
         None, description="When the run completed"
     )
     error: str | None = Field(None, description="Error message if failed")
+class FlockBatchRequest(BaseModel):
+    """Request model for batch processing via JSON API."""
+    agent_name: str = Field(..., description="Name of the agent to run")
+    batch_inputs: list[dict[str, Any]] | str = Field(
+        ..., description="List of input dictionaries or path to CSV file"
+    )
+    input_mapping: dict[str, str] | None = Field(
+        None, description="Maps DataFrame/CSV column names to agent input keys"
+    )
+    static_inputs: dict[str, Any] | None = Field(
+        None, description="Inputs constant across all batch runs"
+    )
+    parallel: bool = Field(
+        default=True, description="Whether to run jobs in parallel"
+    )
+    max_workers: int = Field(
+        default=5, description="Max concurrent workers for parallel runs"
+    )
+    use_temporal: bool | None = Field(
+        None, description="Override Flock's enable_temporal setting"
+    )
+    box_results: bool = Field(
+        default=True, description="Wrap results in Box objects"
+    )
+    return_errors: bool = Field(
+        default=False, description="Return Exception objects for failed runs"
+    )
+    silent_mode: bool = Field(
+        default=True, description="Suppress output and show progress bar"
+    )
+    write_to_csv: str | None = Field(
+        None, description="Path to save results as CSV file"
+    )
+class FlockBatchResponse(BaseModel):
+    """Response model for batch processing requests."""
+    batch_id: str = Field(..., description="Unique ID for this batch run")
+    status: str = Field(..., description="Status of the batch run")
+    results: list[Any] = Field(
+        default_factory=list,
+        description="List of results from batch processing",
+    )
+    started_at: datetime = Field(..., description="When the batch run started")
+    completed_at: datetime | None = Field(
+        None, description="When the batch run completed"
+    )
+    error: str | None = Field(None, description="Error message if failed")
+    # Additional fields for batch progress tracking
+    total_items: int = Field(
+        0, description="Total number of items in the batch"
+    )
+    completed_items: int = Field(
+        0, description="Number of completed items in the batch"
+    )
+    progress_percentage: float = Field(
+        0.0, description="Percentage of completion (0-100)"
+    )

flock/core/api/run_store.py CHANGED Viewed

@@ -3,10 +3,14 @@
 import threading
 from datetime import datetime
+from typing import Any
 from flock.core.logging.logging import get_logger
-from .models import FlockAPIResponse  # Import from the models file
+from .models import (  # Import from the models file
+    FlockAPIResponse,
+    FlockBatchResponse,
+)
 logger = get_logger("api.run_store")
@@ -16,6 +20,7 @@ class RunStore:
     def __init__(self):
         self._runs: dict[str, FlockAPIResponse] = {}
+        self._batches: dict[str, FlockBatchResponse] = {}
         self._lock = threading.Lock()  # Basic lock for thread safety
     def create_run(self, run_id: str) -> FlockAPIResponse:
@@ -69,4 +74,151 @@ class RunStore:
                     f"Attempted to update result for non-existent run_id: {run_id}"
                 )
+    def create_batch(self, batch_id: str) -> FlockBatchResponse:
+        """Creates a new batch record with 'starting' status."""
+        with self._lock:
+            if batch_id in self._batches:
+                logger.warning(
+                    f"Batch ID {batch_id} already exists. Overwriting."
+                )
+            response = FlockBatchResponse(
+                batch_id=batch_id,
+                status="starting",
+                results=[],
+                started_at=datetime.now(),
+                total_items=0,
+                completed_items=0,
+                progress_percentage=0.0,
+            )
+            self._batches[batch_id] = response
+            logger.debug(f"Created batch record for batch_id: {batch_id}")
+            return response
+    def get_batch(self, batch_id: str) -> FlockBatchResponse | None:
+        """Gets the status of a batch run."""
+        with self._lock:
+            return self._batches.get(batch_id)
+    def update_batch_status(
+        self, batch_id: str, status: str, error: str | None = None
+    ):
+        """Updates the status and potentially error of a batch run."""
+        with self._lock:
+            if batch_id in self._batches:
+                self._batches[batch_id].status = status
+                if error:
+                    self._batches[batch_id].error = error
+                if status in ["completed", "failed"]:
+                    self._batches[batch_id].completed_at = datetime.now()
+                    # When completed, ensure progress is 100%
+                    if (
+                        status == "completed"
+                        and self._batches[batch_id].total_items > 0
+                    ):
+                        self._batches[batch_id].completed_items = self._batches[
+                            batch_id
+                        ].total_items
+                        self._batches[batch_id].progress_percentage = 100.0
+                logger.debug(
+                    f"Updated status for batch_id {batch_id} to {status}"
+                )
+            else:
+                logger.warning(
+                    f"Attempted to update status for non-existent batch_id: {batch_id}"
+                )
+    def update_batch_result(self, batch_id: str, results: list[Any]):
+        """Updates the results of a completed batch run."""
+        with self._lock:
+            if batch_id in self._batches:
+                # Ensure results are serializable
+                final_results = [
+                    dict(r) if hasattr(r, "to_dict") else r for r in results
+                ]
+                self._batches[batch_id].results = final_results
+                self._batches[batch_id].status = "completed"
+                self._batches[batch_id].completed_at = datetime.now()
+                # Update progress tracking
+                self._batches[batch_id].completed_items = len(final_results)
+                self._batches[batch_id].total_items = len(final_results)
+                self._batches[batch_id].progress_percentage = 100.0
+                logger.debug(
+                    f"Updated results for completed batch_id: {batch_id}"
+                )
+            else:
+                logger.warning(
+                    f"Attempted to update results for non-existent batch_id: {batch_id}"
+                )
+    def set_batch_total_items(self, batch_id: str, total_items: int):
+        """Sets the total number of items in a batch."""
+        try:
+            with self._lock:
+                if batch_id in self._batches:
+                    self._batches[batch_id].total_items = total_items
+                    # Recalculate percentage
+                    if total_items > 0:
+                        self._batches[batch_id].progress_percentage = (
+                            self._batches[batch_id].completed_items
+                            / total_items
+                            * 100.0
+                        )
+                    logger.debug(
+                        f"Set total_items for batch_id {batch_id} to {total_items}"
+                    )
+                else:
+                    logger.warning(
+                        f"Attempted to set total_items for non-existent batch_id: {batch_id}"
+                    )
+        except Exception as e:
+            logger.error(f"Error setting batch total items: {e}", exc_info=True)
+    def update_batch_progress(
+        self,
+        batch_id: str,
+        completed_items: int,
+        partial_results: list[Any] = None,
+    ):
+        """Updates the progress of a batch run and optionally adds partial results.
+        Args:
+            batch_id: The ID of the batch to update
+            completed_items: The number of items that have been completed
+            partial_results: Optional list of results for completed items to add to the batch
+        """
+        try:
+            with self._lock:
+                if batch_id in self._batches:
+                    self._batches[batch_id].completed_items = completed_items
+                    # Calculate percentage if we have a total
+                    if self._batches[batch_id].total_items > 0:
+                        self._batches[batch_id].progress_percentage = (
+                            completed_items
+                            / self._batches[batch_id].total_items
+                            * 100.0
+                        )
+                    # Add partial results if provided
+                    if partial_results:
+                        # Ensure results are serializable
+                        final_results = [
+                            dict(r) if hasattr(r, "to_dict") else r
+                            for r in partial_results
+                        ]
+                        self._batches[batch_id].results = final_results
+                    logger.debug(
+                        f"Updated progress for batch_id {batch_id}: {completed_items}/{self._batches[batch_id].total_items} "
+                        f"({self._batches[batch_id].progress_percentage:.1f}%)"
+                    )
+                else:
+                    logger.warning(
+                        f"Attempted to update progress for non-existent batch_id: {batch_id}"
+                    )
+        except Exception as e:
+            logger.error(f"Error updating batch progress: {e}", exc_info=True)
     # Add methods for cleanup, persistence, etc. later

flock/core/api/runner.py ADDED Viewed

@@ -0,0 +1,38 @@
+# src/flock/api/runner.py
+"""Provides functionality to start the Flock API server."""
+from typing import TYPE_CHECKING
+from flock.core.logging.logging import get_logger
+if TYPE_CHECKING:
+    from flock.core.flock import Flock
+logger = get_logger("api.runner")
+def start_flock_api(
+    flock: "Flock",
+    host: str = "127.0.0.1",
+    port: int = 8344,
+    server_name: str = "Flock API",
+    create_ui: bool = False,
+) -> None:
+    """Start a REST API server for the given Flock instance."""
+    try:
+        # Import API class locally to avoid making it a hard dependency for core flock
+        from flock.core.api import FlockAPI
+    except ImportError:
+        logger.error(
+            "API components not found. Cannot start API. "
+            "Ensure 'fastapi' and 'uvicorn' are installed."
+        )
+        return
+    logger.info(
+        f"Preparing to start API server for Flock '{flock.name}' on {host}:{port} {'with UI' if create_ui else 'without UI'}"
+    )
+    api_instance = FlockAPI(flock)  # Pass the Flock instance to the API
+    api_instance.start(
+        host=host, port=port, server_name=server_name, create_ui=create_ui
+    )

flock/core/context/context_vars.py CHANGED Viewed

@@ -7,3 +7,4 @@ FLOCK_RUN_ID = "flock.run_id"
 FLOCK_LAST_AGENT = "flock.last_agent"
 FLOCK_LAST_RESULT = "flock.last_result"
 FLOCK_MODEL = "flock.model"
+FLOCK_BATCH_SILENT_MODE = "flock.batch_silent"

flock-core 0.4.0b4__py3-none-any.whl → 0.4.0b5__py3-none-any.whl

Potentially problematic release.

flock-core 0.4.0b4py3-none-any.whl → 0.4.0b5py3-none-any.whl