PyPI - orchestrator-core - Versions diffs - 4.6.5__py3-none-any.whl → 4.7.0__py3-none-any.whl - Mend

orchestrator-core 4.6.5py3-none-any.whl → 4.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

orchestrator/__init__.py +1 -1
orchestrator/api/api_v1/api.py +4 -0
orchestrator/api/api_v1/endpoints/processes.py +25 -9
orchestrator/api/api_v1/endpoints/schedules.py +44 -0
orchestrator/app.py +34 -1
orchestrator/cli/scheduler.py +126 -11
orchestrator/cli/search/resize_embedding.py +3 -0
orchestrator/db/models.py +26 -0
orchestrator/graphql/schemas/process.py +2 -2
orchestrator/graphql/schemas/workflow.py +1 -1
orchestrator/llm_settings.py +0 -1
orchestrator/migrations/versions/schema/2020-10-19_a76b9185b334_add_generic_workflows_to_core.py +1 -0
orchestrator/migrations/versions/schema/2021-04-06_3c8b9185c221_add_validate_products_task.py +1 -0
orchestrator/migrations/versions/schema/2025-11-18_961eddbd4c13_create_linker_table_workflow_apscheduler.py +106 -0
orchestrator/migrations/versions/schema/2025-12-10_9736496e3eba_set_is_task_true_on_certain_tasks.py +40 -0
orchestrator/schedules/__init__.py +8 -7
orchestrator/schedules/scheduler.py +27 -1
orchestrator/schedules/scheduling.py +5 -1
orchestrator/schedules/service.py +253 -0
orchestrator/schemas/schedules.py +71 -0
orchestrator/search/agent/prompts.py +10 -6
orchestrator/search/agent/tools.py +55 -15
orchestrator/search/aggregations/base.py +6 -2
orchestrator/search/query/builder.py +75 -3
orchestrator/search/query/mixins.py +57 -2
orchestrator/search/query/queries.py +15 -1
orchestrator/search/query/validation.py +43 -0
orchestrator/services/processes.py +0 -7
orchestrator/services/workflows.py +4 -0
orchestrator/settings.py +48 -0
orchestrator/utils/auth.py +2 -2
orchestrator/websocket/__init__.py +14 -0
orchestrator/workflow.py +1 -1
orchestrator/workflows/__init__.py +1 -0
orchestrator/workflows/modify_note.py +10 -1
orchestrator/workflows/removed_workflow.py +8 -1
orchestrator/workflows/tasks/cleanup_tasks_log.py +9 -2
orchestrator/workflows/tasks/resume_workflows.py +4 -0
orchestrator/workflows/tasks/validate_product_type.py +7 -1
orchestrator/workflows/tasks/validate_products.py +9 -1
orchestrator/{schedules → workflows/tasks}/validate_subscriptions.py +16 -3
orchestrator/workflows/translations/en-GB.json +2 -1
{orchestrator_core-4.6.5.dist-info → orchestrator_core-4.7.0.dist-info}/METADATA +11 -11
{orchestrator_core-4.6.5.dist-info → orchestrator_core-4.7.0.dist-info}/RECORD +46 -43
orchestrator/schedules/resume_workflows.py +0 -21
orchestrator/schedules/task_vacuum.py +0 -21
{orchestrator_core-4.6.5.dist-info → orchestrator_core-4.7.0.dist-info}/WHEEL +0 -0
{orchestrator_core-4.6.5.dist-info → orchestrator_core-4.7.0.dist-info}/licenses/LICENSE +0 -0

orchestrator/schedules/scheduler.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from contextlib import contextmanager
 from datetime import datetime
 from typing import Any, Generator
@@ -27,6 +26,7 @@ from orchestrator.db.filters import Filter
 from orchestrator.db.filters.filters import CallableErrorHandler
 from orchestrator.db.sorting import Sort
 from orchestrator.db.sorting.sorting import SortOrder
+from orchestrator.schedules.service import get_linker_entries_by_schedule_ids
 from orchestrator.utils.helpers import camel_to_snake, to_camel
 executors = {
@@ -75,6 +75,7 @@ def get_scheduler(paused: bool = False) -> Generator[BackgroundScheduler, Any, N
 class ScheduledTask(BaseModel):
     id: str
+    workflow_id: str | None = None
     name: str | None = None
     next_run_time: datetime | None = None
     trigger: str
@@ -161,6 +162,29 @@ def default_error_handler(message: str, **context) -> None:  # type: ignore
     raise ValueError(f"{message} {_format_context(context)}")
+def enrich_with_workflow_id(scheduled_tasks: list[ScheduledTask]) -> list[ScheduledTask]:
+    """Does a get call to the linker table to get the workflow_id for each scheduled task.
+    Returns all the scheduled tasks with the workflow_id added.
+    """
+    schedule_ids = [task.id for task in scheduled_tasks]
+    entries = {
+        str(entry.schedule_id): str(entry.workflow_id) for entry in get_linker_entries_by_schedule_ids(schedule_ids)
+    }
+    return [
+        ScheduledTask(
+            id=task.id,
+            workflow_id=entries.get(task.id, None),
+            name=task.name,
+            next_run_time=task.next_run_time,
+            trigger=str(task.trigger),
+        )
+        for task in scheduled_tasks
+    ]
 def get_scheduler_tasks(
     first: int = 10,
     after: int = 0,
@@ -171,6 +195,7 @@ def get_scheduler_tasks(
     scheduled_tasks = get_all_scheduler_tasks()
     scheduled_tasks = filter_scheduled_tasks(scheduled_tasks, error_handler, filter_by)
     scheduled_tasks = sort_scheduled_tasks(scheduled_tasks, error_handler, sort_by)
+    scheduled_tasks = enrich_with_workflow_id(scheduled_tasks)
     total = len(scheduled_tasks)
     paginated_tasks = scheduled_tasks[after : after + first + 1]
@@ -178,6 +203,7 @@ def get_scheduler_tasks(
     return [
         ScheduledTask(
             id=task.id,
+            workflow_id=task.workflow_id,
             name=task.name,
             next_run_time=task.next_run_time,
             trigger=str(task.trigger),

orchestrator/schedules/scheduling.py CHANGED Viewed

@@ -23,7 +23,11 @@ F = TypeVar("F", bound=Callable[..., object])
 @deprecated(
-    reason="We changed from scheduler to apscheduler which has its own decoractor, use `@scheduler.scheduled_job()` from `from orchestrator.scheduling.scheduler import scheduler`"
+    reason=(
+        "Scheduling tasks with a decorator is deprecated in favor of using the API. "
+        "This decorator will be removed in 5.0.0. "
+        "For more details, please consult https://workfloworchestrator.org/orchestrator-core/guides/upgrading/4.7/"
+    )
 )
 def scheduler(
     name: str,

orchestrator/schedules/service.py ADDED Viewed

@@ -0,0 +1,253 @@
+# Copyright 2019-2025 SURF.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+from uuid import UUID, uuid4
+from apscheduler.schedulers.base import BaseScheduler
+from apscheduler.triggers.cron import CronTrigger
+from apscheduler.triggers.date import DateTrigger
+from apscheduler.triggers.interval import IntervalTrigger
+from sqlalchemy import delete
+from orchestrator import app_settings
+from orchestrator.db import db
+from orchestrator.db.models import WorkflowApschedulerJob
+from orchestrator.schemas.schedules import (
+    APSchedulerJobCreate,
+    APSchedulerJobDelete,
+    APSchedulerJobs,
+    APSchedulerJobUpdate,
+    APSJobAdapter,
+)
+from orchestrator.services.processes import start_process
+from orchestrator.services.workflows import get_workflow_by_workflow_id
+from orchestrator.utils.redis_client import create_redis_client
+redis_connection = create_redis_client(app_settings.CACHE_URI)
+SCHEDULER_QUEUE = "scheduler:queue:"
+logger = logging.getLogger(__name__)
+def serialize_payload(payload: APSchedulerJobs) -> bytes:
+    """Serialize the payload to bytes for Redis storage.
+    Args:
+        payload: APSchedulerJobs The scheduled task payload.
+    """
+    data = json.loads(payload.model_dump_json())
+    data["scheduled_type"] = payload.scheduled_type
+    return json.dumps(data).encode()
+def deserialize_payload(bytes_dump: bytes) -> APSchedulerJobs:
+    """Deserialize the payload from bytes for Redis retrieval.
+    Args:
+        bytes_dump: bytes The serialized payload.
+    """
+    json_dump = bytes_dump.decode()
+    return APSJobAdapter.validate_json(json_dump)
+def add_scheduled_task_to_queue(payload: APSchedulerJobs) -> None:
+    """Create a scheduled task service function.
+    We need to create a apscheduler job, and put the workflow and schedule_id in
+    the linker table workflows_apscheduler_jobs.
+    Args:
+        payload: APSchedulerJobCreate The scheduled task to create.
+    """
+    bytes_dump = serialize_payload(payload)
+    redis_connection.lpush(SCHEDULER_QUEUE, bytes_dump)
+    logger.info("Added scheduled task to queue.")
+def get_linker_entries_by_schedule_ids(schedule_ids: list[str]) -> list[WorkflowApschedulerJob]:
+    """Get linker table entries for multiple schedule IDs in a single query.
+    Args:
+        schedule_ids: list[str] — One or many schedule IDs.
+    Returns:
+        list[WorkflowApschedulerJob]: All linker table rows matching those IDs.
+    """
+    if not schedule_ids:
+        return []
+    return db.session.query(WorkflowApschedulerJob).filter(WorkflowApschedulerJob.schedule_id.in_(schedule_ids)).all()
+def _add_linker_entry(workflow_id: UUID, schedule_id: str) -> None:
+    """Add an entry to the linker table workflows_apscheduler_jobs.
+    Args:
+        workflow_id: UUID The workflow ID.
+        schedule_id: str The schedule ID.
+    """
+    workflows_apscheduler_job = WorkflowApschedulerJob(workflow_id=workflow_id, schedule_id=schedule_id)
+    db.session.add(workflows_apscheduler_job)
+    db.session.commit()
+def _delete_linker_entry(workflow_id: UUID, schedule_id: str) -> None:
+    """Delete an entry from the linker table workflows_apscheduler_jobs.
+    Args:
+        workflow_id: UUID The workflow ID.
+        schedule_id: str The schedule ID.
+    """
+    db.session.execute(
+        delete(WorkflowApschedulerJob).where(
+            WorkflowApschedulerJob.workflow_id == workflow_id, WorkflowApschedulerJob.schedule_id == schedule_id
+        )
+    )
+    db.session.commit()
+def run_start_workflow_scheduler_task(workflow_name: str) -> None:
+    """Function to start a workflow from the scheduler.
+    Args:
+        workflow_name: str The name of the workflow to start.
+    """
+    logger.info(f"Starting workflow: {workflow_name}")
+    start_process(workflow_name)
+def _add_scheduled_task(payload: APSchedulerJobCreate, scheduler_connection: BaseScheduler) -> None:
+    """Create a new scheduled task in the scheduler and also in the linker table.
+    Args:
+        payload: APSchedulerJobCreate The scheduled task to create.
+        scheduler_connection: BaseScheduler The scheduler connection.
+    """
+    logger.info(f"Adding scheduled task: {payload}")
+    workflow_description = None
+    # Check if a workflow exists - we cannot schedule a non-existing workflow
+    workflow = get_workflow_by_workflow_id(str(payload.workflow_id))
+    if not workflow:
+        raise ValueError(f"Workflow with id {payload.workflow_id} does not exist.")
+    workflow_description = workflow.description
+    # This function is always the same for scheduled tasks, it will run the workflow
+    func = run_start_workflow_scheduler_task
+    # Ensure payload has required data
+    if not payload.trigger or not payload.workflow_name or not payload.trigger_kwargs or not payload.workflow_id:
+        raise ValueError("Trigger must be specified for scheduled tasks.")
+    schedule_id = str(uuid4())
+    scheduler_connection.add_job(
+        func=func,
+        trigger=payload.trigger,
+        id=schedule_id,
+        name=payload.name or workflow_description,
+        kwargs={"workflow_name": payload.workflow_name},
+        **(payload.trigger_kwargs or {}),
+    )
+    _add_linker_entry(workflow_id=payload.workflow_id, schedule_id=schedule_id)
+def _build_trigger_on_update(
+    trigger_name: str | None, trigger_kwargs: dict
+) -> IntervalTrigger | CronTrigger | DateTrigger | None:
+    if not trigger_name or not trigger_kwargs:
+        logger.info("Skipping building trigger as no trigger information is provided.")
+        return None
+    match trigger_name:
+        case "interval":
+            return IntervalTrigger(**trigger_kwargs)
+        case "cron":
+            return CronTrigger(**trigger_kwargs)
+        case "date":
+            return DateTrigger(**trigger_kwargs)
+        case _:
+            raise ValueError(f"Invalid trigger type: {trigger_name}")
+def _update_scheduled_task(payload: APSchedulerJobUpdate, scheduler_connection: BaseScheduler) -> None:
+    """Update an existing scheduled task in the scheduler.
+    Only allow update of name and trigger
+    Job id must be that of an existing job
+    Do not insert in linker table - it should already exist.
+    Args:
+        payload: APSchedulerJobUpdate The scheduled task to update.
+        scheduler_connection: BaseScheduler The scheduler connection.
+    """
+    logger.info(f"Updating scheduled task: {payload}")
+    schedule_id = str(payload.schedule_id)
+    job = scheduler_connection.get_job(job_id=schedule_id)
+    if not job:
+        raise ValueError(f"Schedule Job with id {schedule_id} does not exist.")
+    trigger = _build_trigger_on_update(payload.trigger, payload.trigger_kwargs or {})
+    modify_kwargs = {}
+    if trigger:
+        job = job.reschedule(trigger=trigger)
+    if payload.name:
+        modify_kwargs["name"] = payload.name
+    job.modify(**modify_kwargs)
+def _delete_scheduled_task(payload: APSchedulerJobDelete, scheduler_connection: BaseScheduler) -> None:
+    """Delete an existing scheduled task in the scheduler and also in the linker table.
+    Args:
+        payload: APSchedulerJobDelete The scheduled task to delete.
+        scheduler_connection: BaseScheduler The scheduler connection.
+    """
+    logger.info(f"Deleting scheduled task: {payload}")
+    schedule_id = str(payload.schedule_id)
+    scheduler_connection.remove_job(job_id=schedule_id)
+    _delete_linker_entry(workflow_id=payload.workflow_id, schedule_id=schedule_id)
+def workflow_scheduler_queue(queue_item: tuple[str, bytes], scheduler_connection: BaseScheduler) -> None:
+    """Process an item from the scheduler queue.
+    Args:
+        queue_item: tuple[str, bytes] The item from the scheduler queue.
+        scheduler_connection: BaseScheduler The scheduler connection.
+    """
+    try:
+        _, bytes_dump = queue_item
+        payload = deserialize_payload(bytes_dump)
+        match payload:
+            case APSchedulerJobCreate():
+                _add_scheduled_task(payload, scheduler_connection)
+            case APSchedulerJobUpdate():
+                _update_scheduled_task(payload, scheduler_connection)
+            case APSchedulerJobDelete():
+                _delete_scheduled_task(payload, scheduler_connection)
+            case _:
+                logger.warning(f"Unexpected schedule type: {payload}")  # type: ignore
+    except Exception:
+        logger.exception("Error processing scheduler queue item")

orchestrator/schemas/schedules.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2019-2025 SURF.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Annotated, Any, Literal, Union
+from uuid import UUID
+from pydantic import BaseModel, Field, TypeAdapter
+SCHEDULER_Q_CREATE = "create"
+SCHEDULER_Q_UPDATE = "update"
+SCHEDULER_Q_DELETE = "delete"
+class APSchedulerJob(BaseModel):
+    scheduled_type: Literal["create", "update", "delete"] = Field(..., description="Discriminator for job type")
+class APSchedulerJobCreate(APSchedulerJob):
+    name: str | None = Field(None, description="Human readable name e.g. 'My Process'")
+    workflow_name: str = Field(..., description="Name of the workflow to run e.g. 'my_workflow_name'")
+    workflow_id: UUID = Field(..., description="UUID of the workflow associated with this scheduled task")
+    trigger: Literal["interval", "cron", "date"] = Field(..., description="APScheduler trigger type")
+    trigger_kwargs: dict[str, Any] = Field(
+        default_factory=lambda: {},
+        description="Arguments passed to the trigger on job creation",
+        examples=[{"hours": 12}, {"minutes": 30}, {"days": 1, "hours": 2}],
+    )
+    scheduled_type: Literal["create"] = Field("create", frozen=True)
+class APSchedulerJobUpdate(APSchedulerJob):
+    name: str | None = Field(None, description="Human readable name e.g. 'My Process'")
+    schedule_id: UUID = Field(..., description="UUID of the scheduled task")
+    trigger: Literal["interval", "cron", "date"] | None = Field(None, description="APScheduler trigger type")
+    trigger_kwargs: dict[str, Any] | None = Field(
+        default=None,
+        description="Arguments passed to the job function",
+        examples=[{"hours": 12}, {"minutes": 30}, {"days": 1, "hours": 2}],
+    )
+    scheduled_type: Literal["update"] = Field("update", frozen=True)
+class APSchedulerJobDelete(APSchedulerJob):
+    workflow_id: UUID = Field(..., description="UUID of the workflow associated with this scheduled task")
+    schedule_id: UUID | None = Field(None, description="UUID of the scheduled task")
+    scheduled_type: Literal["delete"] = Field("delete", frozen=True)
+APSchedulerJobs = Annotated[
+    Union[
+        APSchedulerJobCreate,
+        APSchedulerJobUpdate,
+        APSchedulerJobDelete,
+    ],
+    Field(discriminator="scheduled_type"),
+]
+APSJobAdapter = TypeAdapter(APSchedulerJobs)  # type: ignore

orchestrator/search/agent/prompts.py CHANGED Viewed

@@ -26,7 +26,7 @@ logger = structlog.get_logger(__name__)
 async def get_base_instructions() -> str:
     return dedent(
-        """
+        f"""
         You are an expert assistant designed to find relevant information by building and running database queries.
         ---
@@ -50,17 +50,21 @@ async def get_base_instructions() -> str:
         Follow these steps:
-        1.  **Set Context**: Call `start_new_search` with appropriate entity_type and action
+        1.  **Set Context**: Call `start_new_search` with appropriate entity_type and action:
+            - `action={ActionType.SELECT.value}` for finding/searching entities
+            - `action={ActionType.COUNT.value}` for counting (e.g., "how many", "count by status", "monthly growth")
+            - `action={ActionType.AGGREGATE.value}` for numeric operations (SUM, AVG, MIN, MAX of specific fields)
         2.  **Set Filters** (if needed): Discover paths, build FilterTree, call `set_filter_tree`
             - IMPORTANT: Temporal constraints like "in 2025", "in January", "between X and Y" require filters on datetime fields
             - Filters restrict WHICH records to include; grouping controls HOW to aggregate them
-        3.  **Set Grouping/Aggregations** (for COUNT/AGGREGATE):
+        3.  **Set Grouping/Aggregations** (for {ActionType.COUNT.value}/{ActionType.AGGREGATE.value}):
             - For temporal grouping (per month, per year, per day, etc.): Use `set_temporal_grouping`
             - For regular grouping (by status, by name, etc.): Use `set_grouping`
-            - For aggregations: Use `set_aggregations`
+            - For {ActionType.AGGREGATE.value} action ONLY: Use `set_aggregations` to specify what to compute (SUM, AVG, etc.)
+            - For {ActionType.COUNT.value} action: Do NOT call `set_aggregations` (counting is automatic)
         4.  **Execute**:
-            - For SELECT action: Call `run_search()`
-            - For COUNT/AGGREGATE actions: Call `run_aggregation()`
+            - For {ActionType.SELECT.value} action: Call `run_search()`
+            - For {ActionType.COUNT.value}/{ActionType.AGGREGATE.value} actions: Call `run_aggregation()`
         After search execution, follow the dynamic instructions based on the current state.

orchestrator/search/agent/tools.py CHANGED Viewed

@@ -16,6 +16,7 @@ from typing import Any, cast
 import structlog
 from ag_ui.core import EventType, StateSnapshotEvent
+from pydantic import ValidationError
 from pydantic_ai import RunContext
 from pydantic_ai.ag_ui import StateDeps
 from pydantic_ai.exceptions import ModelRetry
@@ -39,13 +40,15 @@ from orchestrator.search.filters import FilterTree
 from orchestrator.search.query import engine
 from orchestrator.search.query.exceptions import PathNotFoundError, QueryValidationError
 from orchestrator.search.query.export import fetch_export_data
+from orchestrator.search.query.mixins import OrderBy
 from orchestrator.search.query.queries import AggregateQuery, CountQuery, Query, SelectQuery
 from orchestrator.search.query.results import AggregationResponse, AggregationResult, ExportData, VisualizationType
 from orchestrator.search.query.state import QueryState
 from orchestrator.search.query.validation import (
     validate_aggregation_field,
-    validate_filter_path,
     validate_filter_tree,
+    validate_grouping_fields,
+    validate_order_by_fields,
     validate_temporal_grouping_field,
 )
 from orchestrator.settings import app_settings
@@ -404,20 +407,30 @@ async def prepare_export(
 async def set_grouping(
     ctx: RunContext[StateDeps[SearchState]],
     group_by_paths: list[str],
+    order_by: list[OrderBy] | None = None,
 ) -> StateSnapshotEvent:
     """Set which field paths to group results by for aggregation.
     Only used with COUNT or AGGREGATE actions. Paths must exist in the schema; use discover_filter_paths to verify.
+    Optionally specify ordering for the grouped results.
+    For order_by: You can order by grouping field paths OR aggregation aliases (e.g., 'count').
+    Grouping field paths will be validated; aggregation aliases cannot be validated until execution.
     """
-    for path in group_by_paths:
-        field_type = validate_filter_path(path)
-        if field_type is None:
-            raise ModelRetry(
-                f"Path '{path}' not found in database schema. "
-                f"Use discover_filter_paths(['{path.split('.')[-1]}']) to find valid paths."
-            )
+    try:
+        validate_grouping_fields(group_by_paths)
+        validate_order_by_fields(order_by)
+    except PathNotFoundError as e:
+        raise ModelRetry(f"{str(e)} Use discover_filter_paths to find valid paths.")
-    ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update={"group_by": group_by_paths})
+    update_dict: dict[str, Any] = {"group_by": group_by_paths}
+    if order_by is not None:
+        update_dict["order_by"] = order_by
+    try:
+        ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update=update_dict)
+    except ValidationError as e:
+        raise ModelRetry(str(e))
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,
@@ -434,16 +447,26 @@ async def set_aggregations(
     """Define what aggregations to compute over the matching records.
     Only used with AGGREGATE action. See Aggregation model (CountAggregation, FieldAggregation) for structure and field requirements.
     """
     # Validate field paths for FieldAggregations
     try:
         for agg in aggregations:
             if isinstance(agg, FieldAggregation):
                 validate_aggregation_field(agg.type, agg.field)
-    except ValueError as e:
-        raise ModelRetry(f"{str(e)} Use discover_filter_paths to find valid paths.")
+    except PathNotFoundError as e:
+        raise ModelRetry(
+            f"{str(e)} "
+            f"You MUST call discover_filter_paths first to find valid fields. "
+            f"If the field truly doesn't exist, inform the user that this data is not available."
+        )
+    except QueryValidationError as e:
+        raise ModelRetry(f"{str(e)}")
-    ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update={"aggregations": aggregations})
+    try:
+        ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update={"aggregations": aggregations})
+    except ValidationError as e:
+        raise ModelRetry(str(e))
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,
@@ -456,19 +479,36 @@ async def set_aggregations(
 async def set_temporal_grouping(
     ctx: RunContext[StateDeps[SearchState]],
     temporal_groups: list[TemporalGrouping],
+    cumulative: bool = False,
+    order_by: list[OrderBy] | None = None,
 ) -> StateSnapshotEvent:
     """Set temporal grouping to group datetime fields by time periods.
     Only used with COUNT or AGGREGATE actions. See TemporalGrouping model for structure, periods, and examples.
+    Optionally enable cumulative aggregations (running totals) and specify ordering.
+    For order_by: You can order by temporal field paths OR aggregation aliases (e.g., 'count').
+    Temporal field paths will be validated; aggregation aliases cannot be validated until execution.
     """
-    # Validate that fields exist and are datetime types
     try:
         for tg in temporal_groups:
             validate_temporal_grouping_field(tg.field)
-    except ValueError as e:
+        validate_order_by_fields(order_by)
+    except PathNotFoundError as e:
+        raise ModelRetry(f"{str(e)} Use discover_filter_paths to find valid paths.")
+    except QueryValidationError as e:
         raise ModelRetry(f"{str(e)} Use discover_filter_paths to find datetime fields.")
-    ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update={"temporal_group_by": temporal_groups})
+    update_dict: dict[str, Any] = {"temporal_group_by": temporal_groups}
+    if cumulative:
+        update_dict["cumulative"] = cumulative
+    if order_by is not None:
+        update_dict["order_by"] = order_by
+    try:
+        ctx.deps.state.query = cast(Query, ctx.deps.state.query).model_copy(update=update_dict)
+    except ValidationError as e:
+        raise ModelRetry(str(e))
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,

orchestrator/search/aggregations/base.py CHANGED Viewed

@@ -61,6 +61,11 @@ class TemporalGrouping(BaseModel):
         },
     )
+    @property
+    def alias(self) -> str:
+        """Return the SQL-friendly alias for this temporal grouping."""
+        return f"{BaseAggregation.field_to_alias(self.field)}_{self.period.value}"
     def get_pivot_fields(self) -> list[str]:
         """Return fields that need to be pivoted for this temporal grouping."""
         return [self.field]
@@ -83,8 +88,7 @@ class TemporalGrouping(BaseModel):
         col = getattr(pivot_cte_columns, field_alias)
         truncated_col = func.date_trunc(self.period.value, cast(col, TIMESTAMP(timezone=True)))
-        # Column name without prefix
-        col_name = f"{field_alias}_{self.period.value}"
+        col_name = self.alias
         select_col = truncated_col.label(col_name)
         return select_col, truncated_col, col_name

orchestrator-core 4.6.5__py3-none-any.whl → 4.7.0__py3-none-any.whl

orchestrator-core 4.6.5py3-none-any.whl → 4.7.0py3-none-any.whl