PyPI - orchestrator-core - Versions diffs - 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl - Mend

orchestrator-core 4.5.1a1py3-none-any.whl → 4.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

orchestrator/__init__.py +3 -12
orchestrator/agentic_app.py +48 -29
orchestrator/api/api_v1/api.py +8 -6
orchestrator/api/api_v1/endpoints/processes.py +2 -0
orchestrator/api/api_v1/endpoints/search.py +26 -7
orchestrator/cli/main.py +2 -2
orchestrator/cli/search/__init__.py +32 -0
orchestrator/devtools/populator.py +16 -0
orchestrator/domain/base.py +2 -7
orchestrator/domain/lifecycle.py +24 -7
orchestrator/llm_settings.py +9 -3
orchestrator/log_config.py +1 -0
orchestrator/migrations/helpers.py +7 -1
orchestrator/schemas/search.py +13 -0
orchestrator/schemas/workflow.py +1 -0
orchestrator/search/agent/__init__.py +15 -2
orchestrator/search/agent/agent.py +30 -15
orchestrator/search/agent/prompts.py +75 -37
orchestrator/search/agent/state.py +13 -0
orchestrator/search/agent/tools.py +148 -11
orchestrator/search/core/__init__.py +12 -0
orchestrator/search/core/embedding.py +13 -4
orchestrator/search/core/exceptions.py +14 -0
orchestrator/search/core/types.py +15 -0
orchestrator/search/core/validators.py +13 -0
orchestrator/search/docs/running_local_text_embedding_inference.md +1 -0
orchestrator/search/filters/__init__.py +13 -0
orchestrator/search/filters/base.py +84 -61
orchestrator/search/filters/date_filters.py +13 -0
orchestrator/search/filters/definitions.py +16 -2
orchestrator/search/filters/ltree_filters.py +16 -3
orchestrator/search/filters/numeric_filter.py +13 -0
orchestrator/search/indexing/__init__.py +13 -0
orchestrator/search/indexing/indexer.py +14 -3
orchestrator/search/indexing/registry.py +13 -0
orchestrator/search/indexing/tasks.py +17 -1
orchestrator/search/indexing/traverse.py +17 -5
orchestrator/search/llm_migration.py +108 -0
orchestrator/search/retrieval/__init__.py +13 -0
orchestrator/search/retrieval/builder.py +23 -8
orchestrator/search/retrieval/engine.py +36 -34
orchestrator/search/retrieval/exceptions.py +90 -0
orchestrator/search/retrieval/pagination.py +13 -0
orchestrator/search/retrieval/retrievers/__init__.py +26 -0
orchestrator/search/retrieval/retrievers/base.py +123 -0
orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
orchestrator/search/retrieval/retrievers/semantic.py +94 -0
orchestrator/search/retrieval/retrievers/structured.py +39 -0
orchestrator/search/retrieval/utils.py +21 -7
orchestrator/search/retrieval/validation.py +54 -76
orchestrator/search/schemas/__init__.py +12 -0
orchestrator/search/schemas/parameters.py +13 -0
orchestrator/search/schemas/results.py +15 -1
orchestrator/services/processes.py +2 -1
orchestrator/settings.py +7 -0
orchestrator/utils/state.py +6 -1
orchestrator/workflows/steps.py +16 -1
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/METADATA +13 -11
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/RECORD +66 -59
orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +0 -95
orchestrator/search/retrieval/retriever.py +0 -447
/orchestrator/cli/{index_llm.py → search/index_llm.py} +0 -0
/orchestrator/cli/{resize_embedding.py → search/resize_embedding.py} +0 -0
/orchestrator/cli/{search_explore.py → search/search_explore.py} +0 -0
/orchestrator/cli/{speedtest.py → search/speedtest.py} +0 -0
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/agent/prompts.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 from textwrap import dedent
@@ -6,57 +19,82 @@ from pydantic_ai import RunContext
 from pydantic_ai.ag_ui import StateDeps
 from orchestrator.search.agent.state import SearchState
-from orchestrator.search.retrieval.validation import get_structured_filter_schema
 logger = structlog.get_logger(__name__)
 async def get_base_instructions() -> str:
+    return dedent(
+        """
+        You are an expert assistant designed to find relevant information by building and running database queries.
-    try:
-        schema_dict = get_structured_filter_schema()
-        if schema_dict:
-            schema_info = "\n".join([f"    {path}: {field_type}" for path, field_type in schema_dict.items()])
-        else:
-            schema_info = "    No filterable fields available"
-    except Exception as e:
-        logger.warning(f"Failed to load schema for prompt: {e}")
-        schema_info = "    Schema temporarily unavailable"
-    logger.error(f"Generated schema for agent prompt:\n{schema_info}")
+        ---
+        ### 1. Your Goal and Method
-    return dedent(
-        f"""
-    You are a helpful assistant for building and running database queries.
-    **Available Data Schema:**
-    Use the following schema to understand the available fields.
-    When you build filters, each `path` MUST be a valid path from this schema,
-    and the operator/value MUST match that path's type.
-    ```
-{schema_info}
-    ```
-    **Workflow (do in order):**
-    1) `set_search_parameters`  to define the main entity being searched.
-    2) Build a complete `FilterTree` (AND at root unless the user asks for OR).
-    3) `set_filter_tree(filters=<FilterTree or null>)`.
-    4) `execute_search()`.
-    5) Summarize the results for the user.
-    """
+        Your ultimate goal is to **find information** that answers the user's request.
+        To do this, you will perform either a broad search or a filtered search.
+        For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
+        To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
+        ---
+        ### 2. Information-Gathering Tools
+        **If you determine that a `FilterTree` is needed**, use these tools to gather information first:
+        - **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
+        - **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
+        ---
+        ### 3. Execution Workflow
+        Follow these steps in strict order:
+        1.  **Set Context**: Always begin by calling `set_search_parameters`.
+        2.  **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
+            - **If filters ARE required**, follow these sub-steps:
+                a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
+                b. **Construct FilterTree**: Build the `FilterTree` object.
+                c. **Set Filters**: Call `set_filter_tree`.
+        3.  **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
+        4.  **Report**: Answer the users' question directly and summarize when appropiate.
+        ---
+        ### 4. Critical Rules
+        - **NEVER GUESS PATHS IN THE DATABASE**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you may attempt to map the question on an existing paths that are valid and available from `discover_filter_paths`. If you cannot infer a match, inform the user and do not include it in the `FilterTree`.
+        - **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
+        - **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
+        """
     )
 async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
-    """Dynamically generate the system prompt for the agent."""
-    param_state = json.dumps(ctx.deps.state.parameters, indent=2, default=str) if ctx.deps.state.parameters else "{}"
+    """Dynamically provides 'next step' coaching based on the current state."""
+    state = ctx.deps.state
+    param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
+    next_step_guidance = ""
+    if not state.parameters or not state.parameters.get("entity_type"):
+        next_step_guidance = (
+            "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
+        )
+    else:
+        next_step_guidance = (
+            "INSTRUCTION: Context is set. Now, analyze the user's request. "
+            "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
+            "If no specific filters are needed, you can proceed directly to `execute_search`."
+        )
     return dedent(
         f"""
-        Current search parameters state:
-        {param_state}
+        ---
+        ### Current State & Next Action
+        **Current Search Parameters:**
+        ```json
+        {param_state_str}
+        ```
-        Remember:
-        - If filters are missing or incomplete, construct a full FilterTree and call `set_filter_tree`.
-        - Then call `execute_search`.
+        **{next_step_guidance}**
         """
     )

orchestrator/search/agent/state.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Any
 from pydantic import BaseModel, Field

orchestrator/search/agent/tools.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from collections.abc import Awaitable, Callable
 from typing import Any, TypeVar
@@ -10,20 +23,25 @@ from pydantic_ai.messages import ModelRequest, UserPromptPart
 from pydantic_ai.toolsets import FunctionToolset
 from orchestrator.api.api_v1.endpoints.search import (
+    get_definitions,
+    list_paths,
     search_processes,
     search_products,
     search_subscriptions,
     search_workflows,
 )
 from orchestrator.schemas.search import SearchResultsSchema
-from orchestrator.search.core.types import ActionType, EntityType
+from orchestrator.search.core.types import ActionType, EntityType, FilterOp
 from orchestrator.search.filters import FilterTree
+from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
 from orchestrator.search.retrieval.validation import validate_filter_tree
 from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
 from .state import SearchState
 logger = structlog.get_logger(__name__)
 P = TypeVar("P", bound=BaseSearchParameters)
 SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
@@ -53,13 +71,26 @@ async def set_search_parameters(
     entity_type: EntityType,
     action: str | ActionType = ActionType.SELECT,
 ) -> StateSnapshotEvent:
+    """Sets the initial search context, like the entity type and the user's query.
+    This MUST be the first tool called to start any new search.
+    Warning: Calling this tool will erase any existing filters and search results from the state.
+    """
     params = ctx.deps.state.parameters or {}
     is_new_search = params.get("entity_type") != entity_type.value
     final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
+    logger.debug(
+        "Setting search parameters",
+        entity_type=entity_type.value,
+        action=action,
+        is_new_search=is_new_search,
+        query=final_query,
+    )
     ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
     ctx.deps.state.results = []
-    logger.info(f"Set search parameters: entity_type={entity_type}, action={action}")
+    logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,
@@ -84,23 +115,35 @@ async def set_filter_tree(
     entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
+    logger.debug(
+        "Setting filter tree",
+        entity_type=entity_type.value,
+        has_filters=filters is not None,
+        filter_summary=f"{len(filters.get_all_leaves())} filters" if filters else "no filters",
+    )
     try:
         await validate_filter_tree(filters, entity_type)
-    except Exception as e:
+    except PathNotFoundError as e:
+        logger.debug(f"{PathNotFoundError.__name__}: {str(e)}")
+        raise ModelRetry(f"{str(e)} Use discover_filter_paths tool to find valid paths.")
+    except FilterValidationError as e:
+        # ModelRetry will trigger an agent retry, containing the specific validation error.
+        logger.debug(f"Filter validation failed: {str(e)}")
         raise ModelRetry(str(e))
+    except Exception as e:
+        logger.error("Unexpected Filter validation exception", error=str(e))
+        raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
-    ctx.deps.state.parameters["filters"] = None if filters is None else filters.model_dump(mode="json", by_alias=True)
-    logger.info(
-        "Set filter tree",
-        filters=None if filters is None else filters.model_dump(mode="json", by_alias=True),
-    )
+    filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
+    ctx.deps.state.parameters["filters"] = filter_data
     return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
 @search_toolset.tool
 async def execute_search(
     ctx: RunContext[StateDeps[SearchState]],
-    limit: int = 5,
+    limit: int = 10,
 ) -> StateSnapshotEvent:
     """Execute the search with the current parameters."""
     if not ctx.deps.state.parameters:
@@ -112,10 +155,104 @@ async def execute_search(
         raise ValueError(f"Unknown entity type: {entity_type}")
     params = param_class(**ctx.deps.state.parameters)
-    logger.info("Executing database search", **params.model_dump(mode="json"))
+    logger.debug(
+        "Executing database search",
+        search_entity_type=entity_type.value,
+        limit=limit,
+        has_filters=params.filters is not None,
+        query=params.query,
+        action=params.action,
+    )
+    if params.filters:
+        logger.debug("Search filters", filters=params.filters)
+    params.limit = limit
     fn = SEARCH_FN_MAP[entity_type]
     search_results = await fn(params)
-    ctx.deps.state.results = search_results.data[:limit]
+    logger.debug(
+        "Search completed",
+        total_results=len(search_results.data) if search_results.data else 0,
+    )
+    ctx.deps.state.results = search_results.data
     return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
+@search_toolset.tool
+async def discover_filter_paths(
+    ctx: RunContext[StateDeps[SearchState]],
+    field_names: list[str],
+    entity_type: EntityType | None = None,
+) -> dict[str, dict[str, Any]]:
+    """Discovers available filter paths for a list of field names.
+    Returns a dictionary where each key is a field_name from the input list and
+    the value is its discovery result.
+    """
+    if not entity_type and ctx.deps.state.parameters:
+        entity_type = EntityType(ctx.deps.state.parameters.get("entity_type"))
+    if not entity_type:
+        entity_type = EntityType.SUBSCRIPTION
+    all_results = {}
+    for field_name in field_names:
+        paths_response = await list_paths(prefix="", q=field_name, entity_type=entity_type, limit=100)
+        matching_leaves = []
+        for leaf in paths_response.leaves:
+            if field_name.lower() in leaf.name.lower():
+                matching_leaves.append(
+                    {
+                        "name": leaf.name,
+                        "value_kind": leaf.ui_types,
+                        "paths": leaf.paths,
+                    }
+                )
+        matching_components = []
+        for comp in paths_response.components:
+            if field_name.lower() in comp.name.lower():
+                matching_components.append(
+                    {
+                        "name": comp.name,
+                        "value_kind": comp.ui_types,
+                    }
+                )
+        result_for_field: dict[str, Any]
+        if not matching_leaves and not matching_components:
+            result_for_field = {
+                "status": "NOT_FOUND",
+                "guidance": f"No filterable paths found containing '{field_name}'. Do not create a filter for this.",
+                "leaves": [],
+                "components": [],
+            }
+        else:
+            result_for_field = {
+                "status": "OK",
+                "guidance": f"Found {len(matching_leaves)} field(s) and {len(matching_components)} component(s) for '{field_name}'.",
+                "leaves": matching_leaves,
+                "components": matching_components,
+            }
+        all_results[field_name] = result_for_field
+    logger.debug("Returning found fieldname - path mapping", all_results=all_results)
+    return all_results
+@search_toolset.tool
+async def get_valid_operators() -> dict[str, list[FilterOp]]:
+    """Gets the mapping of field types to their valid filter operators."""
+    definitions = await get_definitions()
+    operator_map = {}
+    for ui_type, type_def in definitions.items():
+        key = ui_type.value
+        if hasattr(type_def, "operators"):
+            operator_map[key] = type_def.operators
+    return operator_map

orchestrator/search/core/__init__.py CHANGED Viewed

@@ -0,0 +1,12 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

orchestrator/search/core/embedding.py CHANGED Viewed

@@ -1,4 +1,16 @@
-import logging
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import structlog
 from litellm import aembedding as llm_aembedding
@@ -9,9 +21,6 @@ from orchestrator.llm_settings import llm_settings
 logger = structlog.get_logger(__name__)
-# Its logging alot of noise such as embedding vectors.
-logging.getLogger("LiteLLM").setLevel(logging.WARNING)
 class EmbeddingIndexer:

orchestrator/search/core/exceptions.py CHANGED Viewed

@@ -1,3 +1,17 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 class SearchUtilsError(Exception):
     """Base exception for this module."""

orchestrator/search/core/types.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from dataclasses import dataclass
 from datetime import date, datetime
 from enum import Enum, IntEnum
@@ -14,6 +27,8 @@ from .validators import is_bool_string, is_iso_date, is_uuid
 SQLAColumn: TypeAlias = ColumnElement[Any] | InstrumentedAttribute[Any]
+LTREE_SEPARATOR = "."
 @dataclass
 class SearchMetadata:

orchestrator/search/core/validators.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import uuid
 from dateutil.parser import isoparse

orchestrator/search/docs/running_local_text_embedding_inference.md CHANGED Viewed

@@ -18,6 +18,7 @@ Point your backend to the local endpoint and declare the new vector size:
 ```env
 OPENAI_BASE_URL=http://localhost:8080/v1
 EMBEDDING_DIMENSION=384
+EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
 ```
 Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.

orchestrator/search/filters/__init__.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .base import (
     EqualityFilter,
     FilterCondition,

orchestrator-core 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl

orchestrator-core 4.5.1a1py3-none-any.whl → 4.5.2py3-none-any.whl