PyPI - orchestrator-core - Versions diffs - 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

orchestrator-core 4.5.2py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

orchestrator/__init__.py +2 -2
orchestrator/agentic_app.py +3 -23
orchestrator/api/api_v1/api.py +5 -0
orchestrator/api/api_v1/endpoints/agent.py +49 -0
orchestrator/api/api_v1/endpoints/search.py +120 -201
orchestrator/app.py +1 -1
orchestrator/cli/database.py +3 -0
orchestrator/cli/generate.py +11 -4
orchestrator/cli/generator/generator/migration.py +7 -3
orchestrator/cli/main.py +1 -1
orchestrator/cli/scheduler.py +15 -22
orchestrator/cli/search/resize_embedding.py +28 -22
orchestrator/cli/search/speedtest.py +4 -6
orchestrator/db/__init__.py +6 -0
orchestrator/db/models.py +75 -0
orchestrator/llm_settings.py +18 -1
orchestrator/migrations/helpers.py +47 -39
orchestrator/schedules/scheduler.py +32 -15
orchestrator/schedules/validate_products.py +1 -1
orchestrator/schemas/search.py +8 -85
orchestrator/search/agent/__init__.py +2 -2
orchestrator/search/agent/agent.py +26 -30
orchestrator/search/agent/json_patch.py +51 -0
orchestrator/search/agent/prompts.py +35 -9
orchestrator/search/agent/state.py +28 -2
orchestrator/search/agent/tools.py +192 -53
orchestrator/search/core/embedding.py +2 -2
orchestrator/search/core/exceptions.py +6 -0
orchestrator/search/core/types.py +1 -0
orchestrator/search/export.py +199 -0
orchestrator/search/indexing/indexer.py +13 -4
orchestrator/search/indexing/registry.py +14 -1
orchestrator/search/llm_migration.py +55 -0
orchestrator/search/retrieval/__init__.py +3 -2
orchestrator/search/retrieval/builder.py +5 -1
orchestrator/search/retrieval/engine.py +66 -23
orchestrator/search/retrieval/pagination.py +46 -56
orchestrator/search/retrieval/query_state.py +61 -0
orchestrator/search/retrieval/retrievers/base.py +26 -40
orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
orchestrator/search/retrieval/retrievers/semantic.py +9 -8
orchestrator/search/retrieval/retrievers/structured.py +6 -6
orchestrator/search/schemas/parameters.py +17 -13
orchestrator/search/schemas/results.py +4 -1
orchestrator/settings.py +1 -0
orchestrator/utils/auth.py +3 -2
orchestrator/workflow.py +23 -6
orchestrator/workflows/tasks/validate_product_type.py +3 -3
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +17 -12
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +53 -49
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0

orchestrator/schemas/search.py CHANGED Viewed

@@ -11,14 +11,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from datetime import datetime
-from typing import Any, Generic, TypeVar
-from uuid import UUID
+from typing import Generic, TypeVar
 from pydantic import BaseModel, ConfigDict, Field
 from orchestrator.search.core.types import SearchMetadata
-from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
+from orchestrator.search.schemas.results import ComponentInfo, LeafInfo
 T = TypeVar("T")
@@ -36,95 +34,20 @@ class ProductSchema(BaseModel):
     product_type: str
-class SubscriptionSearchResult(BaseModel):
-    score: float
-    perfect_match: int
-    matching_field: MatchingField | None = None
-    subscription: dict[str, Any]
 class SearchResultsSchema(BaseModel, Generic[T]):
     data: list[T] = Field(default_factory=list)
     page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
     search_metadata: SearchMetadata | None = None
-class WorkflowProductSchema(BaseModel):
-    """Product associated with a workflow."""
-    model_config = ConfigDict(from_attributes=True)
-    product_type: str
-    product_id: UUID
-    name: str
-class WorkflowSearchSchema(BaseModel):
-    """Schema for workflow search results."""
-    model_config = ConfigDict(from_attributes=True)
-    name: str
-    products: list[WorkflowProductSchema]
-    description: str | None = None
-    created_at: datetime | None = None
-class ProductSearchSchema(BaseModel):
-    """Schema for product search results."""
-    model_config = ConfigDict(from_attributes=True)
-    product_id: UUID
-    name: str
-    product_type: str
-    tag: str | None = None
-    description: str | None = None
-    status: str | None = None
-    created_at: datetime | None = None
-class ProcessSearchSchema(BaseModel):
-    """Schema for process search results."""
-    model_config = ConfigDict(from_attributes=True)
-    process_id: UUID
-    workflow_name: str
-    workflow_id: UUID
-    last_status: str
-    is_task: bool
-    created_by: str | None = None
-    started_at: datetime
-    last_modified_at: datetime
-    last_step: str | None = None
-    failed_reason: str | None = None
-    subscription_ids: list[UUID] | None = None
-class WorkflowSearchResult(BaseModel):
-    score: float
-    perfect_match: int
-    matching_field: MatchingField | None = None
-    workflow: WorkflowSearchSchema
-class ProductSearchResult(BaseModel):
-    score: float
-    perfect_match: int
-    matching_field: MatchingField | None = None
-    product: ProductSearchSchema
-class ProcessSearchResult(BaseModel):
-    score: float
-    perfect_match: int
-    matching_field: MatchingField | None = None
-    process: ProcessSearchSchema
 class PathsResponse(BaseModel):
     leaves: list[LeafInfo]
     components: list[ComponentInfo]
     model_config = ConfigDict(extra="forbid", use_enum_values=True)
+class ExportResponse(BaseModel):
+    page: list[dict]
+    model_config = ConfigDict(extra="forbid")

orchestrator/search/agent/__init__.py CHANGED Viewed

@@ -14,8 +14,8 @@
 # This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
-from orchestrator.search.agent.agent import build_agent_router
+from orchestrator.search.agent.agent import build_agent_instance
 __all__ = [
-    "build_agent_router",
+    "build_agent_instance",
 ]

orchestrator/search/agent/agent.py CHANGED Viewed

@@ -14,13 +14,11 @@
 from typing import Any
 import structlog
-from fastapi import APIRouter, HTTPException, Request
-from pydantic_ai.ag_ui import StateDeps, handle_ag_ui_request
+from pydantic_ai.ag_ui import StateDeps
 from pydantic_ai.agent import Agent
-from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.settings import ModelSettings
 from pydantic_ai.toolsets import FunctionToolset
-from starlette.responses import Response
 from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
 from orchestrator.search.agent.state import SearchState
@@ -29,34 +27,32 @@ from orchestrator.search.agent.tools import search_toolset
 logger = structlog.get_logger(__name__)
-def build_agent_router(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> APIRouter:
-    router = APIRouter()
+def build_agent_instance(
+    model: str | OpenAIChatModel, agent_tools: list[FunctionToolset[Any]] | None = None
+) -> Agent[StateDeps[SearchState], str]:
+    """Build and configure the search agent instance.
-    try:
-        toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
+    Args:
+        model: The LLM model to use (string or OpenAIChatModel instance)
+        agent_tools: Optional list of additional toolsets to include
-        agent = Agent(
-            model=model,
-            deps_type=StateDeps[SearchState],
-            model_settings=ModelSettings(
-                parallel_tool_calls=False,
-            ),  # https://github.com/pydantic/pydantic-ai/issues/562
-            toolsets=toolsets,
-        )
-        agent.instructions(get_base_instructions)
-        agent.instructions(get_dynamic_instructions)
+    Returns:
+        Configured Agent instance with StateDeps[SearchState] dependencies
-        @router.post("/")
-        async def agent_endpoint(request: Request) -> Response:
-            return await handle_ag_ui_request(agent, request, deps=StateDeps(SearchState()))
+    Raises:
+        Exception: If agent initialization fails
+    """
+    toolsets = agent_tools + [search_toolset] if agent_tools else [search_toolset]
-        return router
-    except Exception as e:
-        logger.error("Agent init failed; serving disabled stub.", error=str(e))
-        error_msg = f"Agent disabled: {str(e)}"
+    agent = Agent(
+        model=model,
+        deps_type=StateDeps[SearchState],
+        model_settings=ModelSettings(
+            parallel_tool_calls=False,
+        ),  # https://github.com/pydantic/pydantic-ai/issues/562
+        toolsets=toolsets,
+    )
+    agent.instructions(get_base_instructions)
+    agent.instructions(get_dynamic_instructions)
-        @router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
-        async def _disabled(path: str) -> None:
-            raise HTTPException(status_code=503, detail=error_msg)
-        return router
+    return agent

orchestrator/search/agent/json_patch.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Literal
+from pydantic import BaseModel, Field
+class JSONPatchOp(BaseModel):
+    """A JSON Patch operation (RFC 6902).
+    Docs reference: https://docs.ag-ui.com/concepts/state
+    """
+    op: Literal["add", "remove", "replace", "move", "copy", "test"] = Field(
+        description="The operation to perform: add, remove, replace, move, copy, or test"
+    )
+    path: str = Field(description="JSON Pointer (RFC 6901) to the target location")
+    value: Any | None = Field(
+        default=None,
+        description="The value to apply (for add, replace operations)",
+    )
+    from_: str | None = Field(
+        default=None,
+        alias="from",
+        description="Source path (for move, copy operations)",
+    )
+    @classmethod
+    def upsert(cls, path: str, value: Any, existed: bool) -> "JSONPatchOp":
+        """Create an add or replace operation depending on whether the path existed.
+        Args:
+            path: JSON Pointer path to the target location
+            value: The value to set
+            existed: True if the path already exists (use replace), False otherwise (use add)
+        Returns:
+            JSONPatchOp with 'replace' if existed is True, 'add' otherwise
+        """
+        return cls(op="replace" if existed else "add", path=path, value=value)

orchestrator/search/agent/prompts.py CHANGED Viewed

@@ -50,14 +50,15 @@ async def get_base_instructions() -> str:
         Follow these steps in strict order:
-        1.  **Set Context**: Always begin by calling `set_search_parameters`.
+        1.  **Set Context**: If the user is asking for a NEW search, call `start_new_search`.
         2.  **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
             - **If filters ARE required**, follow these sub-steps:
                 a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
                 b. **Construct FilterTree**: Build the `FilterTree` object.
                 c. **Set Filters**: Call `set_filter_tree`.
-        3.  **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
-        4.  **Report**: Answer the users' question directly and summarize when appropiate.
+        3.  **Execute**: Call `run_search`. This is done for both filtered and non-filtered searches.
+        After search execution, follow the dynamic instructions based on the current state.
         ---
         ### 4. Critical Rules
@@ -73,28 +74,53 @@ async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> s
     """Dynamically provides 'next step' coaching based on the current state."""
     state = ctx.deps.state
     param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
+    results_count = state.results_data.total_count if state.results_data else 0
-    next_step_guidance = ""
-    if not state.parameters or not state.parameters.get("entity_type"):
+    if state.export_data:
+        next_step_guidance = (
+            "INSTRUCTION: Export has been prepared successfully. "
+            "Simply confirm to the user that the export is ready for download. "
+            "DO NOT include or mention the download URL - the UI will display it automatically."
+        )
+    elif not state.parameters or not state.parameters.get("entity_type"):
         next_step_guidance = (
-            "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
+            "INSTRUCTION: The search context is not set. Your next action is to call `start_new_search`."
+        )
+    elif results_count > 0:
+        next_step_guidance = dedent(
+            f"""
+            INSTRUCTION: Search completed successfully.
+            Found {results_count} results containing only: entity_id, title, score.
+            Choose your next action based on what the user requested:
+            1. **Broad/generic search** (e.g., 'show me subscriptions'): Confirm search completed and report count. Do nothing else.
+            2. **Question answerable with entity_id/title/score**: Answer directly using the current results.
+            3. **Question requiring other details**: Call `fetch_entity_details` first, then answer with the detailed data.
+            4. **Export request** (phrases like 'export', 'download', 'save as CSV'): Call `prepare_export` directly.
+            """
         )
     else:
         next_step_guidance = (
             "INSTRUCTION: Context is set. Now, analyze the user's request. "
             "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
-            "If no specific filters are needed, you can proceed directly to `execute_search`."
+            "If no specific filters are needed, you can proceed directly to `run_search`."
         )
     return dedent(
         f"""
         ---
-        ### Current State & Next Action
+        ## CURRENT STATE
         **Current Search Parameters:**
         ```json
         {param_state_str}
         ```
-        **{next_step_guidance}**
+        **Current Results Count:** {results_count}
+        ---
+        ## NEXT ACTION REQUIRED
+        {next_step_guidance}
         """
     )

orchestrator/search/agent/state.py CHANGED Viewed

@@ -12,10 +12,36 @@
 # limitations under the License.
 from typing import Any
+from uuid import UUID
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
+from orchestrator.search.schemas.results import SearchResult
+class ExportData(BaseModel):
+    """Export metadata for download."""
+    action: str = "export"
+    query_id: str
+    download_url: str
+    message: str
+class SearchResultsData(BaseModel):
+    """Search results data for frontend display and agent context."""
+    action: str = "view_results"
+    query_id: str
+    results_url: str
+    total_count: int
+    message: str
+    results: list[SearchResult] = []
 class SearchState(BaseModel):
+    run_id: UUID | None = None
+    query_id: UUID | None = None
     parameters: dict[str, Any] | None = None
-    results: list[dict[str, Any]] = Field(default_factory=list)
+    results_data: SearchResultsData | None = None
+    export_data: ExportData | None = None

orchestrator-core 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl

orchestrator-core 4.5.2py3-none-any.whl → 4.6.0py3-none-any.whl