PyPI - orchestrator-core - Versions diffs - 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl - Mend

orchestrator-core 4.4.1py3-none-any.whl → 4.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

orchestrator/__init__.py +17 -2
orchestrator/agentic_app.py +103 -0
orchestrator/api/api_v1/api.py +14 -2
orchestrator/api/api_v1/endpoints/processes.py +2 -0
orchestrator/api/api_v1/endpoints/search.py +296 -0
orchestrator/app.py +32 -0
orchestrator/cli/main.py +22 -1
orchestrator/cli/search/__init__.py +32 -0
orchestrator/cli/search/index_llm.py +73 -0
orchestrator/cli/search/resize_embedding.py +135 -0
orchestrator/cli/search/search_explore.py +208 -0
orchestrator/cli/search/speedtest.py +151 -0
orchestrator/db/models.py +37 -1
orchestrator/devtools/populator.py +16 -0
orchestrator/domain/base.py +2 -7
orchestrator/domain/lifecycle.py +24 -7
orchestrator/llm_settings.py +57 -0
orchestrator/log_config.py +1 -0
orchestrator/migrations/helpers.py +7 -1
orchestrator/schemas/search.py +130 -0
orchestrator/schemas/workflow.py +1 -0
orchestrator/search/__init__.py +12 -0
orchestrator/search/agent/__init__.py +21 -0
orchestrator/search/agent/agent.py +62 -0
orchestrator/search/agent/prompts.py +100 -0
orchestrator/search/agent/state.py +21 -0
orchestrator/search/agent/tools.py +258 -0
orchestrator/search/core/__init__.py +12 -0
orchestrator/search/core/embedding.py +73 -0
orchestrator/search/core/exceptions.py +36 -0
orchestrator/search/core/types.py +296 -0
orchestrator/search/core/validators.py +40 -0
orchestrator/search/docs/index.md +37 -0
orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
orchestrator/search/filters/__init__.py +40 -0
orchestrator/search/filters/base.py +295 -0
orchestrator/search/filters/date_filters.py +88 -0
orchestrator/search/filters/definitions.py +107 -0
orchestrator/search/filters/ltree_filters.py +56 -0
orchestrator/search/filters/numeric_filter.py +73 -0
orchestrator/search/indexing/__init__.py +16 -0
orchestrator/search/indexing/indexer.py +334 -0
orchestrator/search/indexing/registry.py +101 -0
orchestrator/search/indexing/tasks.py +69 -0
orchestrator/search/indexing/traverse.py +334 -0
orchestrator/search/llm_migration.py +108 -0
orchestrator/search/retrieval/__init__.py +16 -0
orchestrator/search/retrieval/builder.py +123 -0
orchestrator/search/retrieval/engine.py +154 -0
orchestrator/search/retrieval/exceptions.py +90 -0
orchestrator/search/retrieval/pagination.py +96 -0
orchestrator/search/retrieval/retrievers/__init__.py +26 -0
orchestrator/search/retrieval/retrievers/base.py +123 -0
orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
orchestrator/search/retrieval/retrievers/semantic.py +94 -0
orchestrator/search/retrieval/retrievers/structured.py +39 -0
orchestrator/search/retrieval/utils.py +120 -0
orchestrator/search/retrieval/validation.py +152 -0
orchestrator/search/schemas/__init__.py +12 -0
orchestrator/search/schemas/parameters.py +129 -0
orchestrator/search/schemas/results.py +77 -0
orchestrator/services/processes.py +2 -1
orchestrator/services/settings_env_variables.py +2 -2
orchestrator/settings.py +8 -1
orchestrator/utils/state.py +6 -1
orchestrator/workflows/steps.py +15 -1
orchestrator/workflows/tasks/validate_products.py +1 -1
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0

orchestrator/llm_settings.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright 2019-2025 SURF
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings
+from structlog import get_logger
+logger = get_logger(__name__)
+class LLMSettings(BaseSettings):
+    # Feature flags for LLM functionality
+    SEARCH_ENABLED: bool = False  # Enable search/indexing with embeddings
+    AGENT_ENABLED: bool = False  # Enable agentic functionality
+    # Pydantic-ai Agent settings
+    AGENT_MODEL: str = "gpt-4o-mini"  # See pydantic-ai docs for supported models.
+    AGENT_MODEL_VERSION: str = "2025-01-01-preview"
+    OPENAI_API_KEY: str = ""  # Change per provider (Azure, etc).
+    # Embedding settings
+    EMBEDDING_DIMENSION: int = 1536
+    EMBEDDING_MODEL: str = "openai/text-embedding-3-small"  # See litellm docs for supported models.
+    EMBEDDING_SAFE_MARGIN_PERCENT: float = Field(
+        0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1
+    )
+    # The following settings are only needed for local models or system constraints.
+    # By default, they are set conservative assuming a small model like All-MiniLM-L6-V2.
+    OPENAI_BASE_URL: str | None = None
+    EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512
+    EMBEDDING_MAX_BATCH_SIZE: int | None = None
+    # General LiteLLM settings
+    LLM_MAX_RETRIES: int = 3
+    LLM_TIMEOUT: int = 30
+    # Toggle creation of extensions
+    LLM_FORCE_EXTENTION_MIGRATION: bool = False
+    @field_validator("EMBEDDING_MODEL")
+    def validate_embedding_model_format(cls, v: str) -> str:
+        """Validate that embedding model is in 'vendor/model' format."""
+        if "/" not in v:
+            raise ValueError("EMBEDDING_MODEL must be in format 'vendor/model'")
+        return v
+llm_settings = LLMSettings()

orchestrator/log_config.py CHANGED Viewed

@@ -44,5 +44,6 @@ LOGGER_OVERRIDES = dict(
         logger_config("orchestrator.graphql.autoregistration"),
         logger_config("sqlalchemy.engine", default_level="WARNING"),
         logger_config("uvicorn"),
+        logger_config("LiteLLM", default_level="WARNING"),
     ]
 )

orchestrator/migrations/helpers.py CHANGED Viewed

@@ -155,7 +155,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
         conn: DB connection as available in migration main file.
         workflow: Dict with data for a new workflow.
             name: Name of the workflow.
-            target: Target of the workflow ("CREATE", "MODIFY", "TERMINATE", "SYSTEM")
+            target: Target of the workflow ("CREATE", "MODIFY", "RECONCILE", "TERMINATE", "SYSTEM")
             description: Description of the workflow.
             product_type: Product type to add the workflow to.
@@ -166,12 +166,16 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
             "is_task": False,
             "description": "workflow description",
             "product_type": "product_type",
+            "product_tag": "product_tag",
         }
         >>> create_workflow(conn, workflow)
     """
     if not workflow.get("is_task", False):
         workflow["is_task"] = False
+    if not workflow.get("product_tag"):
+        workflow["product_tag"] = None
     if has_table_column(table_name="workflows", column_name="is_task", conn=conn):
         query = """
                 WITH new_workflow AS (
@@ -186,6 +190,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
                 FROM products AS p
                          CROSS JOIN new_workflow AS nw
                 WHERE p.product_type = :product_type
+                AND (:product_tag IS NULL OR p.tag = :product_tag)
                 ON CONFLICT DO NOTHING
                 """
     else:
@@ -203,6 +208,7 @@ def create_workflow(conn: sa.engine.Connection, workflow: dict) -> None:
                 FROM products AS p
                          CROSS JOIN new_workflow AS nw
                 WHERE p.product_type = :product_type
+                AND (:product_tag IS NULL OR p.tag = :product_tag)
                 ON CONFLICT DO NOTHING
                 """

orchestrator/schemas/search.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime
+from typing import Any, Generic, TypeVar
+from uuid import UUID
+from pydantic import BaseModel, ConfigDict, Field
+from orchestrator.search.core.types import SearchMetadata
+from orchestrator.search.schemas.results import ComponentInfo, LeafInfo, MatchingField
+T = TypeVar("T")
+class PageInfoSchema(BaseModel):
+    has_next_page: bool = False
+    next_page_cursor: str | None = None
+class ProductSchema(BaseModel):
+    model_config = ConfigDict(from_attributes=True)
+    name: str
+    tag: str
+    product_type: str
+class SubscriptionSearchResult(BaseModel):
+    score: float
+    perfect_match: int
+    matching_field: MatchingField | None = None
+    subscription: dict[str, Any]
+class SearchResultsSchema(BaseModel, Generic[T]):
+    data: list[T] = Field(default_factory=list)
+    page_info: PageInfoSchema = Field(default_factory=PageInfoSchema)
+    search_metadata: SearchMetadata | None = None
+class WorkflowProductSchema(BaseModel):
+    """Product associated with a workflow."""
+    model_config = ConfigDict(from_attributes=True)
+    product_type: str
+    product_id: UUID
+    name: str
+class WorkflowSearchSchema(BaseModel):
+    """Schema for workflow search results."""
+    model_config = ConfigDict(from_attributes=True)
+    name: str
+    products: list[WorkflowProductSchema]
+    description: str | None = None
+    created_at: datetime | None = None
+class ProductSearchSchema(BaseModel):
+    """Schema for product search results."""
+    model_config = ConfigDict(from_attributes=True)
+    product_id: UUID
+    name: str
+    product_type: str
+    tag: str | None = None
+    description: str | None = None
+    status: str | None = None
+    created_at: datetime | None = None
+class ProcessSearchSchema(BaseModel):
+    """Schema for process search results."""
+    model_config = ConfigDict(from_attributes=True)
+    process_id: UUID
+    workflow_name: str
+    workflow_id: UUID
+    last_status: str
+    is_task: bool
+    created_by: str | None = None
+    started_at: datetime
+    last_modified_at: datetime
+    last_step: str | None = None
+    failed_reason: str | None = None
+    subscription_ids: list[UUID] | None = None
+class WorkflowSearchResult(BaseModel):
+    score: float
+    perfect_match: int
+    matching_field: MatchingField | None = None
+    workflow: WorkflowSearchSchema
+class ProductSearchResult(BaseModel):
+    score: float
+    perfect_match: int
+    matching_field: MatchingField | None = None
+    product: ProductSearchSchema
+class ProcessSearchResult(BaseModel):
+    score: float
+    perfect_match: int
+    matching_field: MatchingField | None = None
+    process: ProcessSearchSchema
+class PathsResponse(BaseModel):
+    leaves: list[LeafInfo]
+    components: list[ComponentInfo]
+    model_config = ConfigDict(extra="forbid", use_enum_values=True)

orchestrator/schemas/workflow.py CHANGED Viewed

@@ -60,6 +60,7 @@ class SubscriptionWorkflowListsSchema(OrchestratorBaseModel):
     modify: list[WorkflowListItemSchema]
     terminate: list[WorkflowListItemSchema]
     system: list[WorkflowListItemSchema]
+    reconcile: list[WorkflowListItemSchema]
     validate_: list[WorkflowListItemSchema] = Field(default_factory=list, alias="validate")

orchestrator/search/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+# Copyright 2019-2025 SURF.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

orchestrator/search/agent/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8
+from orchestrator.search.agent.agent import build_agent_router
+__all__ = [
+    "build_agent_router",
+]

orchestrator/search/agent/agent.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+import structlog
+from fastapi import APIRouter, HTTPException, Request
+from pydantic_ai.ag_ui import StateDeps, handle_ag_ui_request
+from pydantic_ai.agent import Agent
+from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.settings import ModelSettings
+from pydantic_ai.toolsets import FunctionToolset
+from starlette.responses import Response
+from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions
+from orchestrator.search.agent.state import SearchState
+from orchestrator.search.agent.tools import search_toolset
+logger = structlog.get_logger(__name__)
+def build_agent_router(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> APIRouter:
+    router = APIRouter()
+    try:
+        toolsets = toolsets + [search_toolset] if toolsets else [search_toolset]
+        agent = Agent(
+            model=model,
+            deps_type=StateDeps[SearchState],
+            model_settings=ModelSettings(
+                parallel_tool_calls=False,
+            ),  # https://github.com/pydantic/pydantic-ai/issues/562
+            toolsets=toolsets,
+        )
+        agent.instructions(get_base_instructions)
+        agent.instructions(get_dynamic_instructions)
+        @router.post("/")
+        async def agent_endpoint(request: Request) -> Response:
+            return await handle_ag_ui_request(agent, request, deps=StateDeps(SearchState()))
+        return router
+    except Exception as e:
+        logger.error("Agent init failed; serving disabled stub.", error=str(e))
+        error_msg = f"Agent disabled: {str(e)}"
+        @router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"])
+        async def _disabled(path: str) -> None:
+            raise HTTPException(status_code=503, detail=error_msg)
+        return router

orchestrator/search/agent/prompts.py ADDED Viewed

@@ -0,0 +1,100 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+from textwrap import dedent
+import structlog
+from pydantic_ai import RunContext
+from pydantic_ai.ag_ui import StateDeps
+from orchestrator.search.agent.state import SearchState
+logger = structlog.get_logger(__name__)
+async def get_base_instructions() -> str:
+    return dedent(
+        """
+        You are an expert assistant designed to find relevant information by building and running database queries.
+        ---
+        ### 1. Your Goal and Method
+        Your ultimate goal is to **find information** that answers the user's request.
+        To do this, you will perform either a broad search or a filtered search.
+        For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
+        To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
+        ---
+        ### 2. Information-Gathering Tools
+        **If you determine that a `FilterTree` is needed**, use these tools to gather information first:
+        - **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
+        - **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
+        ---
+        ### 3. Execution Workflow
+        Follow these steps in strict order:
+        1.  **Set Context**: Always begin by calling `set_search_parameters`.
+        2.  **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
+            - **If filters ARE required**, follow these sub-steps:
+                a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
+                b. **Construct FilterTree**: Build the `FilterTree` object.
+                c. **Set Filters**: Call `set_filter_tree`.
+        3.  **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
+        4.  **Report**: Answer the users' question directly and summarize when appropiate.
+        ---
+        ### 4. Critical Rules
+        - **NEVER GUESS PATHS IN THE DATABASE**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you may attempt to map the question on an existing paths that are valid and available from `discover_filter_paths`. If you cannot infer a match, inform the user and do not include it in the `FilterTree`.
+        - **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
+        - **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
+        """
+    )
+async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
+    """Dynamically provides 'next step' coaching based on the current state."""
+    state = ctx.deps.state
+    param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
+    next_step_guidance = ""
+    if not state.parameters or not state.parameters.get("entity_type"):
+        next_step_guidance = (
+            "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
+        )
+    else:
+        next_step_guidance = (
+            "INSTRUCTION: Context is set. Now, analyze the user's request. "
+            "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
+            "If no specific filters are needed, you can proceed directly to `execute_search`."
+        )
+    return dedent(
+        f"""
+        ---
+        ### Current State & Next Action
+        **Current Search Parameters:**
+        ```json
+        {param_state_str}
+        ```
+        **{next_step_guidance}**
+        """
+    )

orchestrator/search/agent/state.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+from pydantic import BaseModel, Field
+class SearchState(BaseModel):
+    parameters: dict[str, Any] | None = None
+    results: list[dict[str, Any]] = Field(default_factory=list)

orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl

orchestrator-core 4.4.1py3-none-any.whl → 4.5.0py3-none-any.whl