PyPI - orchestrator-core - Versions diffs - 4.5.3__py3-none-any.whl → 4.6.0rc2__py3-none-any.whl - Mend

orchestrator-core 4.5.3py3-none-any.whl → 4.6.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

orchestrator/__init__.py +1 -1
orchestrator/agentic_app.py +1 -21
orchestrator/api/api_v1/api.py +5 -0
orchestrator/api/api_v1/endpoints/agent.py +50 -0
orchestrator/api/api_v1/endpoints/search.py +120 -201
orchestrator/cli/database.py +3 -0
orchestrator/cli/generate.py +11 -4
orchestrator/cli/generator/generator/migration.py +7 -3
orchestrator/cli/scheduler.py +15 -22
orchestrator/cli/search/resize_embedding.py +28 -22
orchestrator/cli/search/speedtest.py +4 -6
orchestrator/db/__init__.py +6 -0
orchestrator/db/models.py +75 -0
orchestrator/migrations/helpers.py +46 -38
orchestrator/schedules/scheduler.py +32 -15
orchestrator/schedules/validate_products.py +1 -1
orchestrator/schemas/search.py +8 -85
orchestrator/search/agent/__init__.py +2 -2
orchestrator/search/agent/agent.py +25 -29
orchestrator/search/agent/json_patch.py +51 -0
orchestrator/search/agent/prompts.py +35 -9
orchestrator/search/agent/state.py +28 -2
orchestrator/search/agent/tools.py +192 -53
orchestrator/search/core/exceptions.py +6 -0
orchestrator/search/core/types.py +1 -0
orchestrator/search/export.py +199 -0
orchestrator/search/indexing/indexer.py +13 -4
orchestrator/search/indexing/registry.py +14 -1
orchestrator/search/llm_migration.py +55 -0
orchestrator/search/retrieval/__init__.py +3 -2
orchestrator/search/retrieval/builder.py +5 -1
orchestrator/search/retrieval/engine.py +66 -23
orchestrator/search/retrieval/pagination.py +46 -56
orchestrator/search/retrieval/query_state.py +61 -0
orchestrator/search/retrieval/retrievers/base.py +26 -40
orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
orchestrator/search/retrieval/retrievers/semantic.py +9 -8
orchestrator/search/retrieval/retrievers/structured.py +6 -6
orchestrator/search/schemas/parameters.py +17 -13
orchestrator/search/schemas/results.py +4 -1
orchestrator/settings.py +1 -0
orchestrator/utils/auth.py +3 -2
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/METADATA +3 -3
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/RECORD +47 -43
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0rc2.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/agent/json_patch.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Literal
+from pydantic import BaseModel, Field
+class JSONPatchOp(BaseModel):
+    """A JSON Patch operation (RFC 6902).
+    Docs reference: https://docs.ag-ui.com/concepts/state
+    """
+    op: Literal["add", "remove", "replace", "move", "copy", "test"] = Field(
+        description="The operation to perform: add, remove, replace, move, copy, or test"
+    )
+    path: str = Field(description="JSON Pointer (RFC 6901) to the target location")
+    value: Any | None = Field(
+        default=None,
+        description="The value to apply (for add, replace operations)",
+    )
+    from_: str | None = Field(
+        default=None,
+        alias="from",
+        description="Source path (for move, copy operations)",
+    )
+    @classmethod
+    def upsert(cls, path: str, value: Any, existed: bool) -> "JSONPatchOp":
+        """Create an add or replace operation depending on whether the path existed.
+        Args:
+            path: JSON Pointer path to the target location
+            value: The value to set
+            existed: True if the path already exists (use replace), False otherwise (use add)
+        Returns:
+            JSONPatchOp with 'replace' if existed is True, 'add' otherwise
+        """
+        return cls(op="replace" if existed else "add", path=path, value=value)

orchestrator/search/agent/prompts.py CHANGED Viewed

@@ -50,14 +50,15 @@ async def get_base_instructions() -> str:
         Follow these steps in strict order:
-        1.  **Set Context**: Always begin by calling `set_search_parameters`.
+        1.  **Set Context**: If the user is asking for a NEW search, call `start_new_search`.
         2.  **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
             - **If filters ARE required**, follow these sub-steps:
                 a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
                 b. **Construct FilterTree**: Build the `FilterTree` object.
                 c. **Set Filters**: Call `set_filter_tree`.
-        3.  **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
-        4.  **Report**: Answer the users' question directly and summarize when appropiate.
+        3.  **Execute**: Call `run_search`. This is done for both filtered and non-filtered searches.
+        After search execution, follow the dynamic instructions based on the current state.
         ---
         ### 4. Critical Rules
@@ -73,28 +74,53 @@ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> s
     """Dynamically provides 'next step' coaching based on the current state."""
     state = ctx.deps.state
     param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
+    results_count = state.results_data.total_count if state.results_data else 0
-    next_step_guidance = ""
-    if not state.parameters or not state.parameters.get("entity_type"):
+    if state.export_data:
+        next_step_guidance = (
+            "INSTRUCTION: Export has been prepared successfully. "
+            "Simply confirm to the user that the export is ready for download. "
+            "DO NOT include or mention the download URL - the UI will display it automatically."
+        )
+    elif not state.parameters or not state.parameters.get("entity_type"):
         next_step_guidance = (
-            "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
+            "INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`."
+        )
+    elif results_count > 0:
+        next_step_guidance = dedent(
+            f"""
+            INSTRUCTION: Search completed successfully.
+            Found {results_count} results containing only: entity_id, title, score.
+            Choose your next action based on what the user requested:
+            1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do nothing else.
+            2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
+            3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
+            4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly.
+            """
         )
     else:
         next_step_guidance = (
             "INSTRUCTION: Context is set. Now, analyze the user's request. "
             "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
-            "If no specific filters are needed, you can proceed directly to `execute_search`."
+            "If no specific filters are needed, you can proceed directly to `run_search`."
         )
     return dedent(
         f"""
         ---
-        ### Current State & Next Action
+        ## CURRENT STATE
         **Current Search Parameters:**
         ```json
         {param_state_str}
         ```
-        **{next_step_guidance}**
+        **Current Results Count:** {results_count}
+        ---
+        ## NEXT ACTION REQUIRED
+        {next_step_guidance}
         """
     )

orchestrator/search/agent/state.py CHANGED Viewed

@@ -12,10 +12,36 @@
 # limitations under the License.
 from typing import Any
+from uuid import UUID
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
+from orchestrator.search.schemas.results import SearchResult
+class ExportData(BaseModel):
+    """Export metadata for download."""
+    action: str = "export"
+    query_id: str
+    download_url: str
+    message: str
+class SearchResultsData(BaseModel):
+    """Search results data for frontend display and agent context."""
+    action: str = "view_results"
+    query_id: str
+    results_url: str
+    total_count: int
+    message: str
+    results: list[SearchResult] = []
 class SearchState(BaseModel):
+    run_id: UUID | None = None
+    query_id: UUID | None = None
     parameters: dict[str, Any] | None = None
-    results: list[dict[str, Any]] = Field(default_factory=list)
+    results_data: SearchResultsData | None = None
+    export_data: ExportData | None = None

orchestrator/search/agent/tools.py CHANGED Viewed

@@ -11,11 +11,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
+import json
+from typing import Any
 import structlog
-from ag_ui.core import EventType, StateSnapshotEvent
+from ag_ui.core import EventType, StateDeltaEvent, StateSnapshotEvent
 from pydantic_ai import RunContext
 from pydantic_ai.ag_ui import StateDeps
 from pydantic_ai.exceptions import ModelRetry
@@ -25,34 +25,22 @@ from pydantic_ai.toolsets import FunctionToolset
 from orchestrator.api.api_v1.endpoints.search import (
     get_definitions,
     list_paths,
-    search_processes,
-    search_products,
-    search_subscriptions,
-    search_workflows,
 )
-from orchestrator.schemas.search import SearchResultsSchema
+from orchestrator.db import AgentRunTable, SearchQueryTable, db
+from orchestrator.search.agent.json_patch import JSONPatchOp
+from orchestrator.search.agent.state import ExportData, SearchResultsData, SearchState
 from orchestrator.search.core.types import ActionType, EntityType, FilterOp
+from orchestrator.search.export import fetch_export_data
 from orchestrator.search.filters import FilterTree
+from orchestrator.search.retrieval.engine import execute_search
 from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
+from orchestrator.search.retrieval.query_state import SearchQueryState
 from orchestrator.search.retrieval.validation import validate_filter_tree
-from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
-from .state import SearchState
+from orchestrator.search.schemas.parameters import BaseSearchParameters
+from orchestrator.settings import app_settings
 logger = structlog.get_logger(__name__)
-P = TypeVar("P", bound=BaseSearchParameters)
-SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
-SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
-    EntityType.SUBSCRIPTION: search_subscriptions,
-    EntityType.WORKFLOW: search_workflows,
-    EntityType.PRODUCT: search_products,
-    EntityType.PROCESS: search_processes,
-}
 search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
@@ -65,32 +53,50 @@ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
     return None
+def _set_parameters(
+    ctx: RunContext[StateDeps[SearchState]],
+    entity_type: EntityType,
+    action: str | ActionType,
+    query: str,
+    filters: Any | None,
+) -> None:
+    """Internal helper to set parameters."""
+    ctx.deps.state.parameters = {
+        "action": action,
+        "entity_type": entity_type,
+        "filters": filters,
+        "query": query,
+    }
 @search_toolset.tool
-async def set_search_parameters(
+async def start_new_search(
     ctx: RunContext[StateDeps[SearchState]],
     entity_type: EntityType,
     action: str | ActionType = ActionType.SELECT,
 ) -> StateSnapshotEvent:
-    """Sets the initial search context, like the entity type and the user's query.
+    """Starts a completely new search, clearing all previous state.
-    This MUST be the first tool called to start any new search.
-    Warning: Calling this tool will erase any existing filters and search results from the state.
+    This MUST be the first tool called when the user asks for a NEW search.
+    Warning: This will erase any existing filters, results, and search state.
     """
-    params = ctx.deps.state.parameters or {}
-    is_new_search = params.get("entity_type") != entity_type.value
-    final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
+    final_query = last_user_message(ctx) or ""
     logger.debug(
-        "Setting search parameters",
+        "Starting new search",
         entity_type=entity_type.value,
         action=action,
-        is_new_search=is_new_search,
         query=final_query,
     )
-    ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
-    ctx.deps.state.results = []
-    logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
+    # Clear all state
+    ctx.deps.state.results_data = None
+    ctx.deps.state.export_data = None
+    # Set fresh parameters with no filters
+    _set_parameters(ctx, entity_type, action, final_query, None)
+    logger.debug("New search started", parameters=ctx.deps.state.parameters)
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,
@@ -102,7 +108,7 @@ async def set_search_parameters(
 async def set_filter_tree(
     ctx: RunContext[StateDeps[SearchState]],
     filters: FilterTree | None,
-) -> StateSnapshotEvent:
+) -> StateDeltaEvent:
     """Replace current filters atomically with a full FilterTree, or clear with None.
     Requirements:
@@ -111,7 +117,7 @@ async def set_filter_tree(
     - See the FilterTree schema examples for the exact shape.
     """
     if ctx.deps.state.parameters is None:
-        raise ModelRetry("Search parameters are not initialized. Call set_search_parameters first.")
+        raise ModelRetry("Search parameters are not initialized. Call start_new_search first.")
     entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
@@ -136,28 +142,33 @@ async def set_filter_tree(
         raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
     filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
+    filters_existed = "filters" in ctx.deps.state.parameters
     ctx.deps.state.parameters["filters"] = filter_data
-    return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
+    return StateDeltaEvent(
+        type=EventType.STATE_DELTA,
+        delta=[
+            JSONPatchOp.upsert(
+                path="/parameters/filters",
+                value=filter_data,
+                existed=filters_existed,
+            )
+        ],
+    )
 @search_toolset.tool
-async def execute_search(
+async def run_search(
     ctx: RunContext[StateDeps[SearchState]],
     limit: int = 10,
-) -> StateSnapshotEvent:
-    """Execute the search with the current parameters."""
+) -> StateDeltaEvent:
+    """Execute the search with the current parameters and save to database."""
     if not ctx.deps.state.parameters:
         raise ValueError("No search parameters set")
-    entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
-    param_class = PARAMETER_REGISTRY.get(entity_type)
-    if not param_class:
-        raise ValueError(f"Unknown entity type: {entity_type}")
-    params = param_class(**ctx.deps.state.parameters)
+    params = BaseSearchParameters.create(**ctx.deps.state.parameters)
     logger.debug(
         "Executing database search",
-        search_entity_type=entity_type.value,
+        search_entity_type=params.entity_type.value,
         limit=limit,
         has_filters=params.filters is not None,
         query=params.query,
@@ -169,17 +180,61 @@ async def execute_search(
     params.limit = limit
-    fn = SEARCH_FN_MAP[entity_type]
-    search_results = await fn(params)
+    changes: list[JSONPatchOp] = []
+    if not ctx.deps.state.run_id:
+        agent_run = AgentRunTable(agent_type="search")
+        db.session.add(agent_run)
+        db.session.commit()
+        db.session.expire_all()  # Release connection to prevent stacking while agent runs
+        ctx.deps.state.run_id = agent_run.run_id
+        logger.debug("Created new agent run", run_id=str(agent_run.run_id))
+        changes.append(JSONPatchOp(op="add", path="/run_id", value=str(ctx.deps.state.run_id)))
+    # Get query with embedding and save to DB
+    search_response = await execute_search(params, db.session)
+    query_embedding = search_response.query_embedding
+    query_state = SearchQueryState(parameters=params, query_embedding=query_embedding)
+    query_number = db.session.query(SearchQueryTable).filter_by(run_id=ctx.deps.state.run_id).count() + 1
+    search_query = SearchQueryTable.from_state(
+        state=query_state,
+        run_id=ctx.deps.state.run_id,
+        query_number=query_number,
+    )
+    db.session.add(search_query)
+    db.session.commit()
+    db.session.expire_all()
+    query_id_existed = ctx.deps.state.query_id is not None
+    ctx.deps.state.query_id = search_query.query_id
+    logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
+    changes.append(JSONPatchOp.upsert(path="/query_id", value=str(ctx.deps.state.query_id), existed=query_id_existed))
     logger.debug(
         "Search completed",
-        total_results=len(search_results.data) if search_results.data else 0,
+        total_results=len(search_response.results),
     )
-    ctx.deps.state.results = search_results.data
+    # Store results data for both frontend display and agent context
+    results_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}"
+    results_data_existed = ctx.deps.state.results_data is not None
+    ctx.deps.state.results_data = SearchResultsData(
+        query_id=str(ctx.deps.state.query_id),
+        results_url=results_url,
+        total_count=len(search_response.results),
+        message=f"Found {len(search_response.results)} results.",
+        results=search_response.results,  # Include actual results in state
+    )
+    changes.append(
+        JSONPatchOp.upsert(
+            path="/results_data", value=ctx.deps.state.results_data.model_dump(), existed=results_data_existed
+        )
+    )
-    return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
+    return StateDeltaEvent(type=EventType.STATE_DELTA, delta=changes)
 @search_toolset.tool
@@ -256,3 +311,87 @@ async def get_valid_operators() -> dict[str, list[FilterOp]]:
         if hasattr(type_def, "operators"):
             operator_map[key] = type_def.operators
     return operator_map
+@search_toolset.tool
+async def fetch_entity_details(
+    ctx: RunContext[StateDeps[SearchState]],
+    limit: int = 10,
+) -> str:
+    """Fetch detailed entity information to answer user questions.
+    Use this tool when you need detailed information about entities from the search results
+    to answer the user's question. This provides the same detailed data that would be
+    included in an export (e.g., subscription status, product details, workflow info, etc.).
+    Args:
+        ctx: Runtime context for agent (injected).
+        limit: Maximum number of entities to fetch details for (default 10).
+    Returns:
+        JSON string containing detailed entity information.
+    Raises:
+        ValueError: If no search results are available.
+    """
+    if not ctx.deps.state.results_data or not ctx.deps.state.results_data.results:
+        raise ValueError("No search results available. Run a search first before fetching entity details.")
+    if not ctx.deps.state.parameters:
+        raise ValueError("No search parameters found.")
+    entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
+    entity_ids = [r.entity_id for r in ctx.deps.state.results_data.results[:limit]]
+    logger.debug(
+        "Fetching detailed entity data",
+        entity_type=entity_type.value,
+        entity_count=len(entity_ids),
+    )
+    detailed_data = fetch_export_data(entity_type, entity_ids)
+    return json.dumps(detailed_data, indent=2)
+@search_toolset.tool
+async def prepare_export(
+    ctx: RunContext[StateDeps[SearchState]],
+) -> StateSnapshotEvent:
+    """Prepares export URL using the last executed search query."""
+    if not ctx.deps.state.query_id or not ctx.deps.state.run_id:
+        raise ValueError("No search has been executed yet. Run a search first before exporting.")
+    if not ctx.deps.state.parameters:
+        raise ValueError("No search parameters found. Run a search first before exporting.")
+    # Validate that export is only available for SELECT actions
+    action = ctx.deps.state.parameters.get("action", ActionType.SELECT)
+    if action != ActionType.SELECT:
+        raise ValueError(
+            f"Export is only available for SELECT actions. Current action is '{action}'. "
+            "Please run a SELECT search first."
+        )
+    logger.debug(
+        "Prepared query for export",
+        query_id=str(ctx.deps.state.query_id),
+    )
+    download_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}/export"
+    ctx.deps.state.export_data = ExportData(
+        query_id=str(ctx.deps.state.query_id),
+        download_url=download_url,
+        message="Export ready for download.",
+    )
+    logger.debug("Export data set in state", export_data=ctx.deps.state.export_data.model_dump())
+    # Should use StateDelta here? Use snapshot to workaround state persistence issue
+    # TODO: Fix root cause; state is empty on frontend when it should have data from run_search
+    return StateSnapshotEvent(
+        type=EventType.STATE_SNAPSHOT,
+        snapshot=ctx.deps.state.model_dump(),
+    )

orchestrator/search/core/exceptions.py CHANGED Viewed

@@ -34,3 +34,9 @@ class InvalidCursorError(SearchUtilsError):
     """Raised when cursor cannot be decoded."""
     pass
+class QueryStateNotFoundError(SearchUtilsError):
+    """Raised when a query state cannot be found in the database."""
+    pass

orchestrator/search/core/types.py CHANGED Viewed

@@ -289,6 +289,7 @@ class ExtractedField(NamedTuple):
 class IndexableRecord(TypedDict):
     entity_id: str
     entity_type: str
+    entity_title: str
     path: Ltree
     value: Any
     value_type: Any

orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0rc2__py3-none-any.whl

orchestrator-core 4.5.3py3-none-any.whl → 4.6.0rc2py3-none-any.whl