PyPI - orchestrator-core - Versions diffs - 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl - Mend

orchestrator-core 4.6.1py3-none-any.whl → 4.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

orchestrator/__init__.py +1 -1
orchestrator/api/api_v1/endpoints/processes.py +4 -1
orchestrator/api/api_v1/endpoints/search.py +44 -34
orchestrator/{search/retrieval/utils.py → cli/search/display.py} +4 -29
orchestrator/cli/search/search_explore.py +22 -24
orchestrator/cli/search/speedtest.py +11 -9
orchestrator/db/models.py +6 -6
orchestrator/graphql/resolvers/helpers.py +15 -0
orchestrator/graphql/resolvers/process.py +5 -3
orchestrator/graphql/resolvers/product.py +3 -2
orchestrator/graphql/resolvers/product_block.py +3 -2
orchestrator/graphql/resolvers/resource_type.py +3 -2
orchestrator/graphql/resolvers/scheduled_tasks.py +3 -1
orchestrator/graphql/resolvers/settings.py +2 -0
orchestrator/graphql/resolvers/subscription.py +5 -3
orchestrator/graphql/resolvers/version.py +2 -0
orchestrator/graphql/resolvers/workflow.py +3 -2
orchestrator/graphql/schemas/process.py +3 -3
orchestrator/log_config.py +2 -0
orchestrator/schemas/search.py +1 -1
orchestrator/schemas/search_requests.py +59 -0
orchestrator/search/agent/handlers.py +129 -0
orchestrator/search/agent/prompts.py +54 -33
orchestrator/search/agent/state.py +9 -24
orchestrator/search/agent/tools.py +223 -144
orchestrator/search/agent/validation.py +80 -0
orchestrator/search/{schemas → aggregations}/__init__.py +20 -0
orchestrator/search/aggregations/base.py +201 -0
orchestrator/search/core/types.py +3 -2
orchestrator/search/filters/__init__.py +4 -0
orchestrator/search/filters/definitions.py +22 -1
orchestrator/search/filters/numeric_filter.py +3 -3
orchestrator/search/llm_migration.py +2 -1
orchestrator/search/query/__init__.py +90 -0
orchestrator/search/query/builder.py +285 -0
orchestrator/search/query/engine.py +162 -0
orchestrator/search/{retrieval → query}/exceptions.py +38 -7
orchestrator/search/query/mixins.py +95 -0
orchestrator/search/query/queries.py +129 -0
orchestrator/search/query/results.py +252 -0
orchestrator/search/{retrieval/query_state.py → query/state.py} +31 -11
orchestrator/search/{retrieval → query}/validation.py +58 -1
orchestrator/search/retrieval/__init__.py +0 -5
orchestrator/search/retrieval/pagination.py +7 -8
orchestrator/search/retrieval/retrievers/base.py +9 -9
orchestrator/workflows/translations/en-GB.json +1 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/METADATA +16 -15
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/RECORD +51 -45
orchestrator/search/retrieval/builder.py +0 -127
orchestrator/search/retrieval/engine.py +0 -197
orchestrator/search/schemas/parameters.py +0 -133
orchestrator/search/schemas/results.py +0 -80
/orchestrator/search/{export.py → query/export.py} +0 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/WHEEL +0 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/licenses/LICENSE +0 -0

orchestrator/graphql/resolvers/workflow.py CHANGED Viewed

@@ -9,7 +9,7 @@ from orchestrator.db.range.range import apply_range_to_statement
 from orchestrator.db.sorting import Sort
 from orchestrator.db.sorting.workflow import sort_workflows, workflow_sort_fields
 from orchestrator.graphql.pagination import Connection
-from orchestrator.graphql.resolvers.helpers import rows_from_statement
+from orchestrator.graphql.resolvers.helpers import make_async, rows_from_statement
 from orchestrator.graphql.schemas.workflow import Workflow
 from orchestrator.graphql.types import GraphqlFilter, GraphqlSort, OrchestratorInfo
 from orchestrator.graphql.utils import create_resolver_error_handler, is_querying_page_data, to_graphql_result_page
@@ -19,7 +19,8 @@ from orchestrator.utils.search_query import create_sqlalchemy_select
 logger = structlog.get_logger(__name__)
-async def resolve_workflows(
+@make_async
+def resolve_workflows(
     info: OrchestratorInfo,
     filter_by: list[GraphqlFilter] | None = None,
     sort_by: list[GraphqlSort] | None = None,

orchestrator/graphql/schemas/process.py CHANGED Viewed

@@ -86,11 +86,11 @@ class ProcessType:
         oidc_user = await info.context.get_current_user
         workflow = get_workflow(self.workflow_name)
         process = load_process(db.session.get(ProcessTable, self.process_id))  # type: ignore[arg-type]
-        auth_resume, auth_retry = get_auth_callbacks(get_steps_to_evaluate_for_rbac(process), workflow)  # type: ignore[arg-type]
+        auth_resume, auth_retry = get_auth_callbacks(get_steps_to_evaluate_for_rbac(process), workflow)
         return FormUserPermissionsType(
-            retryAllowed=auth_retry and auth_retry(oidc_user),  # type: ignore[arg-type]
-            resumeAllowed=auth_resume and auth_resume(oidc_user),  # type: ignore[arg-type]
+            retryAllowed=bool(auth_retry and auth_retry(oidc_user)),
+            resumeAllowed=bool(auth_resume and auth_resume(oidc_user)),
         )
     @authenticated_field(description="Returns list of subscriptions of the process")  # type: ignore

orchestrator/log_config.py CHANGED Viewed

@@ -41,9 +41,11 @@ LOGGER_OVERRIDES = dict(
     [
         logger_config("asyncio"),
         logger_config("httpcore"),
+        logger_config("openai", default_level="WARNING"),
         logger_config("orchestrator.graphql.autoregistration"),
         logger_config("sqlalchemy.engine", default_level="WARNING"),
         logger_config("uvicorn"),
         logger_config("LiteLLM", default_level="WARNING"),
+        logger_config("pydantic_ai", default_level="DEBUG"),
     ]
 )

orchestrator/schemas/search.py CHANGED Viewed

@@ -16,7 +16,7 @@ from typing import Generic, TypeVar
 from pydantic import BaseModel, ConfigDict, Field
 from orchestrator.search.core.types import SearchMetadata
-from orchestrator.search.schemas.results import ComponentInfo, LeafInfo
+from orchestrator.search.query.builder import ComponentInfo, LeafInfo
 T = TypeVar("T")

orchestrator/schemas/search_requests.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel, ConfigDict, Field
+from orchestrator.search.core.types import EntityType
+from orchestrator.search.filters import FilterTree
+from orchestrator.search.query.queries import SelectQuery
+class SearchRequest(BaseModel):
+    """API request model for search operations.
+    Only supports SELECT action, used by search endpoints.
+    """
+    filters: FilterTree | None = Field(
+        default=None,
+        description="Structured filters to apply to the search.",
+    )
+    query: str | None = Field(
+        default=None,
+        description="Text search query for semantic/fuzzy search.",
+    )
+    limit: int = Field(
+        default=SelectQuery.DEFAULT_LIMIT,
+        ge=SelectQuery.MIN_LIMIT,
+        le=SelectQuery.MAX_LIMIT,
+        description="Maximum number of search results to return.",
+    )
+    model_config = ConfigDict(extra="forbid")
+    def to_query(self, entity_type: EntityType) -> SelectQuery:
+        """Convert API request to SelectQuery domain model.
+        Args:
+            entity_type: The entity type to search (provided by endpoint)
+        Returns:
+            SelectQuery for search operation
+        """
+        return SelectQuery(
+            entity_type=entity_type,
+            filters=self.filters,
+            query_text=self.query,
+            limit=self.limit,
+        )

orchestrator/search/agent/handlers.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Handlers for search and aggregation execution with persistence."""
+from uuid import UUID
+import structlog
+from orchestrator.db import AgentRunTable, SearchQueryTable
+from orchestrator.db.database import WrappedSession
+from orchestrator.search.query import engine
+from orchestrator.search.query.queries import AggregateQuery, CountQuery, SelectQuery
+from orchestrator.search.query.results import AggregationResponse, SearchResponse
+from orchestrator.search.query.state import QueryState
+logger = structlog.get_logger(__name__)
+async def execute_search_with_persistence(
+    query: SelectQuery,
+    db_session: WrappedSession,
+    run_id: UUID | None,
+) -> tuple[SearchResponse, UUID, UUID]:
+    """Execute search, persist to DB, return response and IDs.
+    Args:
+        query: SelectQuery for search operation
+        db_session: Database session
+        run_id: Existing run ID or None to create new one
+    Returns:
+        Tuple of (search_response, run_id, query_id)
+    """
+    # Create agent run
+    if not run_id:
+        agent_run = AgentRunTable(agent_type="search")
+        db_session.add(agent_run)
+        db_session.commit()
+        db_session.expire_all()
+        run_id = agent_run.run_id
+        logger.debug("Created new agent run", run_id=str(run_id))
+    if run_id is None:
+        raise ValueError("run_id should not be None here")
+    # Execute search
+    search_response = await engine.execute_search(query, db_session)
+    # Save to database
+    query_embedding = search_response.query_embedding
+    query_state = QueryState(query=query, query_embedding=query_embedding)
+    query_number = db_session.query(SearchQueryTable).filter_by(run_id=run_id).count() + 1
+    search_query = SearchQueryTable.from_state(
+        state=query_state,
+        run_id=run_id,
+        query_number=query_number,
+    )
+    db_session.add(search_query)
+    db_session.commit()
+    db_session.expire_all()
+    logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
+    logger.debug(
+        "Search results",
+        results=[r.model_dump() for r in search_response.results],
+        total_count=len(search_response.results),
+        search_type=search_response.metadata.search_type,
+    )
+    return search_response, run_id, search_query.query_id
+async def execute_aggregation_with_persistence(
+    query: CountQuery | AggregateQuery,
+    db_session: WrappedSession,
+    run_id: UUID | None,
+) -> tuple[AggregationResponse, UUID, UUID]:
+    """Execute aggregation, persist to DB, return response and IDs.
+    Args:
+        query: CountQuery or AggregateQuery for aggregation operations
+        db_session: Database session
+        run_id: Existing run ID or None to create new one
+    Returns:
+        Tuple of (aggregation_response, run_id, query_id)
+    """
+    # Create agent run if needed
+    if not run_id:
+        agent_run = AgentRunTable(agent_type="search")
+        db_session.add(agent_run)
+        db_session.commit()
+        db_session.expire_all()
+        run_id = agent_run.run_id
+        logger.debug("Created new agent run", run_id=str(run_id))
+    if run_id is None:
+        raise ValueError("run_id should not be None here")
+    # Execute aggregation
+    aggregation_response = await engine.execute_aggregation(query, db_session)
+    # Save to database
+    query_state = QueryState(query=query, query_embedding=None)
+    query_number = db_session.query(SearchQueryTable).filter_by(run_id=run_id).count() + 1
+    search_query = SearchQueryTable.from_state(
+        state=query_state,
+        run_id=run_id,
+        query_number=query_number,
+    )
+    db_session.add(search_query)
+    db_session.commit()
+    db_session.expire_all()
+    logger.debug("Saved aggregation query", query_id=str(search_query.query_id), query_number=query_number)
+    return aggregation_response, run_id, search_query.query_id

orchestrator/search/agent/prompts.py CHANGED Viewed

@@ -19,6 +19,7 @@ from pydantic_ai import RunContext
 from pydantic_ai.ag_ui import StateDeps
 from orchestrator.search.agent.state import SearchState
+from orchestrator.search.core.types import ActionType
 logger = structlog.get_logger(__name__)
@@ -33,7 +34,6 @@ async def get_base_instructions() -> str:
         Your ultimate goal is to **find information** that answers the user's request.
-        To do this, you will perform either a broad search or a filtered search.
         For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
         To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
@@ -48,15 +48,19 @@ async def get_base_instructions() -> str:
         ---
         ### 3. Execution Workflow
-        Follow these steps in strict order:
+        Follow these steps:
-        1.  **Set Context**: If the user is asking for a NEW search, call `start_new_search`.
-        2.  **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
-            - **If filters ARE required**, follow these sub-steps:
-                a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
-                b. **Construct FilterTree**: Build the `FilterTree` object.
-                c. **Set Filters**: Call `set_filter_tree`.
-        3.  **Execute**: Call `run_search`. This is done for both filtered and non-filtered searches.
+        1.  **Set Context**: Call `start_new_search` with appropriate entity_type and action
+        2.  **Set Filters** (if needed): Discover paths, build FilterTree, call `set_filter_tree`
+            - IMPORTANT: Temporal constraints like "in 2025", "in January", "between X and Y" require filters on datetime fields
+            - Filters restrict WHICH records to include; grouping controls HOW to aggregate them
+        3.  **Set Grouping/Aggregations** (for COUNT/AGGREGATE):
+            - For temporal grouping (per month, per year, per day, etc.): Use `set_temporal_grouping`
+            - For regular grouping (by status, by name, etc.): Use `set_grouping`
+            - For aggregations: Use `set_aggregations`
+        4.  **Execute**:
+            - For SELECT action: Call `run_search()`
+            - For COUNT/AGGREGATE actions: Call `run_aggregation()`
         After search execution, follow the dynamic instructions based on the current state.
@@ -73,31 +77,46 @@ async def get_base_instructions() -> str:
 async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
     """Dynamically provides 'next step' coaching based on the current state."""
     state = ctx.deps.state
-    param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
-    results_count = state.results_data.total_count if state.results_data else 0
+    query_state_str = json.dumps(state.query.model_dump(), indent=2, default=str) if state.query else "Not set."
+    results_count = state.results_count or 0
+    action = state.action or ActionType.SELECT
-    if state.export_data:
+    if not state.query:
         next_step_guidance = (
-            "INSTRUCTION: Export has been prepared successfully. "
-            "Simply confirm to the user that the export is ready for download. "
-            "DO NOT include or mention the download URL - the UI will display it automatically."
-        )
-    elif not state.parameters or not state.parameters.get("entity_type"):
-        next_step_guidance = (
-            "INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`."
+            f"INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`. "
+            f"For counting or aggregation queries, set action='{ActionType.COUNT.value}' or action='{ActionType.AGGREGATE.value}'."
         )
     elif results_count > 0:
-        next_step_guidance = dedent(
-            f"""
-            INSTRUCTION: Search completed successfully.
-            Found {results_count} results containing only: entity_id, title, score.
-            Choose your next action based on what the user requested:
-            1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do nothing else.
-            2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
-            3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
-            4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly.
-            """
+        if action in (ActionType.COUNT, ActionType.AGGREGATE):
+            # Aggregation completed
+            next_step_guidance = (
+                "INSTRUCTION: Aggregation completed successfully. "
+                "The results are already displayed in the UI. "
+                "Simply confirm completion to the user in a brief sentence. "
+                "DO NOT repeat, summarize, or restate the aggregation data."
+            )
+        else:
+            # Search completed
+            next_step_guidance = dedent(
+                f"""
+                INSTRUCTION: Search completed successfully.
+                Found {results_count} results containing only: entity_id, title, score.
+                Choose your next action based on what the user requested:
+                1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do not repeat the results.
+                2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
+                3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
+                4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly. Simply confirm the export is ready. Do not repeat the results.
+                """
+            )
+    elif action in (ActionType.COUNT, ActionType.AGGREGATE):
+        # COUNT or AGGREGATE action but no results yet
+        next_step_guidance = (
+            "INSTRUCTION: Aggregation context is set. "
+            "For temporal queries (per month, per year, over time): call `set_temporal_grouping` with datetime field and period. "
+            "For regular grouping: call `set_grouping` with paths to group by. "
+            f"For {ActionType.AGGREGATE.value.upper()}: call `set_aggregations` with aggregation specs. "
+            "Then call `run_aggregation`."
         )
     else:
         next_step_guidance = (
@@ -106,17 +125,19 @@ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> s
             "If no specific filters are needed, you can proceed directly to `run_search`."
         )
+    status_summary = f"Results: {results_count}" if results_count > 0 else "No results yet"
     return dedent(
         f"""
         ---
         ## CURRENT STATE
-        **Current Search Parameters:**
+        **Current Query:**
         ```json
-        {param_state_str}
+        {query_state_str}
         ```
-        **Current Results Count:** {results_count}
+        **Status:** {status_summary}
         ---
         ## NEXT ACTION REQUIRED

orchestrator/search/agent/state.py CHANGED Viewed

@@ -11,37 +11,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any
 from uuid import UUID
 from pydantic import BaseModel
-from orchestrator.search.schemas.results import SearchResult
+from orchestrator.search.core.types import ActionType
+from orchestrator.search.query.queries import Query
-class ExportData(BaseModel):
-    """Export metadata for download."""
-    action: str = "export"
-    query_id: str
-    download_url: str
-    message: str
-class SearchResultsData(BaseModel):
-    """Search results data for frontend display and agent context."""
-    action: str = "view_results"
-    query_id: str
-    results_url: str
-    total_count: int
-    message: str
-    results: list[SearchResult] = []
+class SearchState(BaseModel):
+    """Agent state for search operations.
+    Tracks the current search context and execution status.
+    """
-class SearchState(BaseModel):
     run_id: UUID | None = None
     query_id: UUID | None = None
-    parameters: dict[str, Any] | None = None
-    results_data: SearchResultsData | None = None
-    export_data: ExportData | None = None
+    action: ActionType | None = None
+    query: Query | None = None
+    results_count: int | None = None  # Number of results from last executed search/aggregation

orchestrator-core 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl

orchestrator-core 4.6.1py3-none-any.whl → 4.6.3py3-none-any.whl