PyPI - orchestrator-core - Versions diffs - 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl - Mend

orchestrator-core 4.6.1py3-none-any.whl → 4.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

orchestrator/__init__.py +1 -1
orchestrator/api/api_v1/endpoints/processes.py +4 -1
orchestrator/api/api_v1/endpoints/search.py +44 -34
orchestrator/{search/retrieval/utils.py → cli/search/display.py} +4 -29
orchestrator/cli/search/search_explore.py +22 -24
orchestrator/cli/search/speedtest.py +11 -9
orchestrator/db/models.py +6 -6
orchestrator/graphql/resolvers/helpers.py +15 -0
orchestrator/graphql/resolvers/process.py +5 -3
orchestrator/graphql/resolvers/product.py +3 -2
orchestrator/graphql/resolvers/product_block.py +3 -2
orchestrator/graphql/resolvers/resource_type.py +3 -2
orchestrator/graphql/resolvers/scheduled_tasks.py +3 -1
orchestrator/graphql/resolvers/settings.py +2 -0
orchestrator/graphql/resolvers/subscription.py +5 -3
orchestrator/graphql/resolvers/version.py +2 -0
orchestrator/graphql/resolvers/workflow.py +3 -2
orchestrator/graphql/schemas/process.py +3 -3
orchestrator/log_config.py +2 -0
orchestrator/schemas/search.py +1 -1
orchestrator/schemas/search_requests.py +59 -0
orchestrator/search/agent/handlers.py +129 -0
orchestrator/search/agent/prompts.py +54 -33
orchestrator/search/agent/state.py +9 -24
orchestrator/search/agent/tools.py +223 -144
orchestrator/search/agent/validation.py +80 -0
orchestrator/search/{schemas → aggregations}/__init__.py +20 -0
orchestrator/search/aggregations/base.py +201 -0
orchestrator/search/core/types.py +3 -2
orchestrator/search/filters/__init__.py +4 -0
orchestrator/search/filters/definitions.py +22 -1
orchestrator/search/filters/numeric_filter.py +3 -3
orchestrator/search/llm_migration.py +2 -1
orchestrator/search/query/__init__.py +90 -0
orchestrator/search/query/builder.py +285 -0
orchestrator/search/query/engine.py +162 -0
orchestrator/search/{retrieval → query}/exceptions.py +38 -7
orchestrator/search/query/mixins.py +95 -0
orchestrator/search/query/queries.py +129 -0
orchestrator/search/query/results.py +252 -0
orchestrator/search/{retrieval/query_state.py → query/state.py} +31 -11
orchestrator/search/{retrieval → query}/validation.py +58 -1
orchestrator/search/retrieval/__init__.py +0 -5
orchestrator/search/retrieval/pagination.py +7 -8
orchestrator/search/retrieval/retrievers/base.py +9 -9
orchestrator/workflows/translations/en-GB.json +1 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/METADATA +16 -15
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/RECORD +51 -45
orchestrator/search/retrieval/builder.py +0 -127
orchestrator/search/retrieval/engine.py +0 -197
orchestrator/search/schemas/parameters.py +0 -133
orchestrator/search/schemas/results.py +0 -80
/orchestrator/search/{export.py → query/export.py} +0 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/WHEEL +0 -0
{orchestrator_core-4.6.1.dist-info → orchestrator_core-4.6.3.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/query/engine.py ADDED Viewed

@@ -0,0 +1,162 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import structlog
+from sqlalchemy.orm import Session
+from orchestrator.search.core.embedding import QueryEmbedder
+from orchestrator.search.core.types import SearchMetadata
+from orchestrator.search.query.results import (
+    AggregationResponse,
+    SearchResponse,
+    format_aggregation_response,
+    format_search_response,
+)
+from orchestrator.search.retrieval.pagination import PageCursor
+from orchestrator.search.retrieval.retrievers import Retriever
+from .builder import build_aggregation_query, build_candidate_query, build_simple_count_query
+from .export import fetch_export_data
+from .queries import AggregateQuery, CountQuery, ExportQuery, SelectQuery
+logger = structlog.get_logger(__name__)
+async def _execute_search(
+    query: SelectQuery | ExportQuery,
+    db_session: Session,
+    limit: int,
+    cursor: PageCursor | None = None,
+    query_embedding: list[float] | None = None,
+) -> SearchResponse:
+    """Internal implementation to execute search with specified query.
+    Args:
+        query: The SELECT or EXPORT query with vector, fuzzy, or filter criteria.
+        db_session: The active SQLAlchemy session for executing the query.
+        limit: Maximum number of results to return.
+        cursor: Optional pagination cursor.
+        query_embedding: Optional pre-computed query embedding to use instead of generating a new one.
+    Returns:
+        SearchResponse with results and embedding (for internal use).
+    """
+    if not query.vector_query and not query.filters and not query.fuzzy_term:
+        logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
+        return SearchResponse(results=[], metadata=SearchMetadata.empty())
+    candidate_query = build_candidate_query(query)
+    if query.vector_query and not query_embedding:
+        query_embedding = await QueryEmbedder.generate_for_text_async(query.vector_query)
+    retriever = Retriever.route(query, cursor, query_embedding)
+    logger.debug("Using retriever", retriever_type=retriever.__class__.__name__)
+    final_stmt = retriever.apply(candidate_query)
+    final_stmt = final_stmt.limit(limit)
+    logger.debug(final_stmt)
+    result = db_session.execute(final_stmt).mappings().all()
+    response = format_search_response(result, query, retriever.metadata)
+    # Store embedding in response for agent to save to DB
+    response.query_embedding = query_embedding
+    return response
+async def execute_search(
+    query: SelectQuery,
+    db_session: Session,
+    cursor: PageCursor | None = None,
+    query_embedding: list[float] | None = None,
+) -> SearchResponse:
+    """Execute a SELECT search query.
+    This executes a SELECT action search using vector/fuzzy/filter search with ranking.
+    Args:
+        query: SelectQuery with search criteria
+        db_session: Database session
+        cursor: Optional pagination cursor
+        query_embedding: Optional pre-computed embedding
+    Returns:
+        SearchResponse with ranked results
+    """
+    # Fetch one extra to determine if there is a next page
+    fetch_limit = query.limit + 1 if query.limit > 0 else query.limit
+    response = await _execute_search(query, db_session, fetch_limit, cursor, query_embedding)
+    has_more = len(response.results) > query.limit and query.limit > 0
+    # Trim to requested limit
+    response.results = response.results[: query.limit]
+    response.has_more = has_more
+    return response
+async def execute_export(
+    query: ExportQuery,
+    db_session: Session,
+    query_embedding: list[float] | None = None,
+) -> list[dict]:
+    """Execute a search and export flattened entity data.
+    Args:
+        query: ExportQuery with search criteria
+        db_session: Database session
+        query_embedding: Optional pre-computed embedding
+    Returns:
+        List of flattened entity records suitable for export.
+    """
+    search_response = await _execute_search(
+        query=query,
+        db_session=db_session,
+        limit=query.limit,
+        query_embedding=query_embedding,
+    )
+    entity_ids = [res.entity_id for res in search_response.results]
+    return fetch_export_data(query.entity_type, entity_ids)
+async def execute_aggregation(
+    query: CountQuery | AggregateQuery,
+    db_session: Session,
+) -> AggregationResponse:
+    """Execute aggregation query and return formatted results.
+    Args:
+        query: CountQuery or AggregateQuery
+        db_session: Database session
+    Returns:
+        AggregationResponse with results and metadata
+    """
+    candidate_query = build_candidate_query(query)
+    if isinstance(query, CountQuery) and not query.group_by and not query.temporal_group_by:
+        # Simple count without grouping
+        agg_query = build_simple_count_query(candidate_query)
+        group_column_names: list[str] = []
+    else:
+        # Grouped aggregation - needs pivoting
+        agg_query, group_column_names = build_aggregation_query(query, candidate_query)
+    logger.debug("Executing aggregation query", sql=str(agg_query))
+    result_rows = db_session.execute(agg_query).mappings().all()
+    return format_aggregation_response(result_rows, group_column_names, query)

orchestrator/search/{retrieval → query}/exceptions.py RENAMED Viewed

@@ -14,13 +14,13 @@
 from orchestrator.search.core.types import FilterOp
-class FilterValidationError(Exception):
-    """Base exception for filter validation errors."""
+class QueryValidationError(Exception):
+    """Base exception for all query validation errors."""
     pass
-class InvalidLtreePatternError(FilterValidationError):
+class InvalidLtreePatternError(QueryValidationError):
     """Raised when an ltree pattern has invalid ltree query syntax."""
     def __init__(self, pattern: str) -> None:
@@ -28,7 +28,7 @@ class InvalidLtreePatternError(FilterValidationError):
         super().__init__(message)
-class EmptyFilterPathError(FilterValidationError):
+class EmptyFilterPathError(QueryValidationError):
     """Raised when a filter path is empty or contains only whitespace."""
     def __init__(self) -> None:
@@ -38,7 +38,7 @@ class EmptyFilterPathError(FilterValidationError):
         super().__init__(message)
-class PathNotFoundError(FilterValidationError):
+class PathNotFoundError(QueryValidationError):
     """Raised when a filter path doesn't exist in the database schema.
     Examples:
@@ -53,7 +53,7 @@ class PathNotFoundError(FilterValidationError):
         super().__init__(message)
-class IncompatibleFilterTypeError(FilterValidationError):
+class IncompatibleFilterTypeError(QueryValidationError):
     """Raised when a filter operator is incompatible with the field's data type.
     Examples:
@@ -75,7 +75,7 @@ class IncompatibleFilterTypeError(FilterValidationError):
         super().__init__(message)
-class InvalidEntityPrefixError(FilterValidationError):
+class InvalidEntityPrefixError(QueryValidationError):
     """Raised when a filter path doesn't have the correct entity type prefix.
     Examples:
@@ -88,3 +88,34 @@ class InvalidEntityPrefixError(FilterValidationError):
     def __init__(self, path: str, expected_prefix: str, entity_type: str) -> None:
         message = f"Filter path '{path}' must start with '{expected_prefix}' for {entity_type} searches, or use '*' for wildcard paths."
         super().__init__(message)
+class IncompatibleAggregationTypeError(QueryValidationError):
+    """Raised when an aggregation function is incompatible with the field's data type.
+    Examples:
+        Using SUM on a string field:
+        >>> print(IncompatibleAggregationTypeError('sum', 'string', 'subscription.name', ['integer', 'float']))
+        Aggregation 'sum' requires numeric fields (integer, float), but 'subscription.name' has type 'string'.
+    """
+    def __init__(self, agg_type: str, field_type: str, path: str, expected_types: list[str]) -> None:
+        expected_types_str = ", ".join(expected_types)
+        message = f"Aggregation '{agg_type}' requires numeric fields ({expected_types_str}), but '{path}' has type '{field_type}'."
+        super().__init__(message)
+class IncompatibleTemporalGroupingTypeError(QueryValidationError):
+    """Raised when temporal grouping is used on a non-datetime field.
+    Examples:
+        Using temporal grouping on a string field:
+        >>> print(IncompatibleTemporalGroupingTypeError('subscription.name', 'string'))
+        Temporal grouping requires a datetime field, but 'subscription.name' has type 'string'.
+    """
+    def __init__(self, path: str, field_type: str) -> None:
+        message = f"Temporal grouping requires a datetime field, but '{path}' has type '{field_type}'."
+        super().__init__(message)

orchestrator/search/query/mixins.py ADDED Viewed

@@ -0,0 +1,95 @@
+import uuid
+from pydantic import BaseModel, Field
+from orchestrator.search.aggregations import Aggregation, TemporalGrouping
+__all__ = [
+    "SearchMixin",
+    "GroupingMixin",
+    "AggregationMixin",
+]
+class SearchMixin(BaseModel):
+    """Mixin providing text search capability.
+    Provides query text processing and derived properties for vector and fuzzy search.
+    """
+    query_text: str | None = Field(default=None, description="Text query for semantic/fuzzy search")
+    @property
+    def vector_query(self) -> str | None:
+        """Extract vector query from query text.
+        Returns None if query_text is empty or is a UUID (UUIDs are not vectorized).
+        This matches the original logic from BaseQuery.
+        """
+        if not self.query_text:
+            return None
+        try:
+            uuid.UUID(self.query_text)
+            return None  # It's a UUID, disable vector search
+        except ValueError:
+            return self.query_text
+    @property
+    def fuzzy_term(self) -> str | None:
+        """Extract fuzzy term from query text.
+        Only single-word queries are used for fuzzy search to avoid
+        the trigram operator filtering out too many results.
+        This matches the original logic from BaseQuery.
+        """
+        if not self.query_text:
+            return None
+        words = self.query_text.split()
+        return self.query_text if len(words) == 1 else None
+class GroupingMixin(BaseModel):
+    """Mixin providing grouping capability.
+    Used by COUNT and AGGREGATE queries for grouping results.
+    """
+    group_by: list[str] | None = Field(default=None, description="Field paths to group by")
+    temporal_group_by: list[TemporalGrouping] | None = Field(
+        default=None,
+        description="Temporal grouping specifications (group by month, year, etc.)",
+    )
+    def get_pivot_fields(self) -> list[str]:
+        """Get all fields needed for EAV pivot from grouping.
+        Returns deduplicated list maintaining insertion order.
+        This matches the original logic from BaseQuery.get_pivot_fields().
+        """
+        fields = list(self.group_by or [])
+        # Collect from temporal groupings
+        if self.temporal_group_by:
+            for temp_group in self.temporal_group_by:
+                fields.extend(temp_group.get_pivot_fields())
+        return list(dict.fromkeys(fields))
+class AggregationMixin(BaseModel):
+    """Mixin providing aggregation computation capability.
+    Used by AGGREGATE queries to define what statistics to compute.
+    """
+    aggregations: list[Aggregation] = Field(description="Aggregations to compute (SUM, AVG, MIN, MAX, COUNT)")
+    def get_aggregation_pivot_fields(self) -> list[str]:
+        """Get fields needed for EAV pivot from aggregations.
+        Returns deduplicated list maintaining insertion order.
+        """
+        fields = []
+        for agg in self.aggregations:
+            fields.extend(agg.get_pivot_fields())
+        return list(dict.fromkeys(fields))

orchestrator/search/query/queries.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Annotated, Any, ClassVar, Literal, Self, Union
+from pydantic import BaseModel, ConfigDict, Discriminator, Field
+from orchestrator.search.core.types import ActionType, EntityType
+from orchestrator.search.filters import FilterTree
+from .mixins import (
+    AggregationMixin,
+    GroupingMixin,
+    SearchMixin,
+)
+class BaseQuery(BaseModel):
+    """Base model for all query types.
+    Contains shared constants, properties, and utilities.
+    """
+    MIN_LIMIT: ClassVar[int] = 1
+    DEFAULT_LIMIT: ClassVar[int] = 10
+    MAX_LIMIT: ClassVar[int] = 30
+    DEFAULT_EXPORT_LIMIT: ClassVar[int] = 1000
+    MAX_EXPORT_LIMIT: ClassVar[int] = 10000
+    _action: ClassVar[ActionType]
+    entity_type: EntityType
+    filters: FilterTree | None = Field(default=None, description="Structured filters to apply")
+    model_config = ConfigDict(extra="forbid")
+    @property
+    def action(self) -> ActionType:
+        return self._action
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> Self:
+        """Build query from a dictionary.
+        Args:
+            data: Dictionary with query parameters
+        Returns:
+            Query instance of the appropriate type
+        """
+        return cls.model_validate(data)
+class SelectQuery(BaseQuery, SearchMixin):
+    """Query for SELECT operations.
+    Composes BaseQuery with SearchMixin for text search, with strict result limits.
+    """
+    query_type: Literal["select"] = "select"
+    _action: ClassVar[ActionType] = ActionType.SELECT
+    limit: int = Field(
+        default=BaseQuery.DEFAULT_LIMIT,
+        ge=BaseQuery.MIN_LIMIT,
+        le=BaseQuery.MAX_LIMIT,
+        description="Maximum number of search results to return",
+    )
+class ExportQuery(BaseQuery, SearchMixin):
+    """Query for EXPORT operations .
+    Similar to SelectQuery but with higher limits for bulk exports.
+    """
+    query_type: Literal["export"] = "export"
+    _action: ClassVar[ActionType] = ActionType.SELECT
+    limit: int = Field(
+        default=BaseQuery.DEFAULT_EXPORT_LIMIT,
+        ge=BaseQuery.MIN_LIMIT,
+        le=BaseQuery.MAX_EXPORT_LIMIT,
+        description="Maximum number of results to export",
+    )
+class CountQuery(BaseQuery, GroupingMixin):
+    """Query for COUNT operations with optional grouping."""
+    query_type: Literal["count"] = "count"
+    _action: ClassVar[ActionType] = ActionType.COUNT
+class AggregateQuery(BaseQuery, GroupingMixin, AggregationMixin):
+    """Query for AGGREGATE operations.
+    Composes BaseQuery with GroupingMixin and AggregationMixin
+    to provide both grouping and aggregation capabilities.
+    """
+    query_type: Literal["aggregate"] = "aggregate"
+    _action: ClassVar[ActionType] = ActionType.AGGREGATE
+    def get_pivot_fields(self) -> list[str]:
+        """Get all fields needed for EAV pivot including aggregation fields."""
+        # Get grouping fields from GroupingMixin
+        fields = super().get_pivot_fields()
+        # Add aggregation fields
+        fields.extend(self.get_aggregation_pivot_fields())
+        return list(dict.fromkeys(fields))
+Query = Annotated[
+    Union[SelectQuery, ExportQuery, CountQuery, AggregateQuery],
+    Discriminator("query_type"),
+]

orchestrator-core 4.6.1__py3-none-any.whl → 4.6.3__py3-none-any.whl

orchestrator-core 4.6.1py3-none-any.whl → 4.6.3py3-none-any.whl