PyPI - orchestrator-core - Versions diffs - 4.4.0rc1__py3-none-any.whl → 5.0.0a1__py3-none-any.whl - Mend

orchestrator-core 4.4.0rc1py3-none-any.whl → 5.0.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

orchestrator/__init__.py +1 -1
orchestrator/api/api_v1/api.py +7 -0
orchestrator/api/api_v1/endpoints/agent.py +62 -0
orchestrator/api/api_v1/endpoints/processes.py +6 -12
orchestrator/api/api_v1/endpoints/search.py +197 -0
orchestrator/app.py +4 -0
orchestrator/cli/index_llm.py +73 -0
orchestrator/cli/main.py +8 -1
orchestrator/cli/resize_embedding.py +136 -0
orchestrator/cli/scheduler.py +29 -39
orchestrator/cli/search_explore.py +203 -0
orchestrator/db/models.py +37 -1
orchestrator/graphql/schema.py +0 -5
orchestrator/graphql/schemas/process.py +2 -2
orchestrator/graphql/utils/create_resolver_error_handler.py +1 -1
orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
orchestrator/schedules/__init__.py +2 -1
orchestrator/schedules/resume_workflows.py +2 -2
orchestrator/schedules/scheduling.py +24 -64
orchestrator/schedules/task_vacuum.py +2 -2
orchestrator/schedules/validate_products.py +2 -8
orchestrator/schedules/validate_subscriptions.py +2 -2
orchestrator/schemas/search.py +101 -0
orchestrator/search/__init__.py +0 -0
orchestrator/search/agent/__init__.py +1 -0
orchestrator/search/agent/prompts.py +62 -0
orchestrator/search/agent/state.py +8 -0
orchestrator/search/agent/tools.py +122 -0
orchestrator/search/core/__init__.py +0 -0
orchestrator/search/core/embedding.py +64 -0
orchestrator/search/core/exceptions.py +16 -0
orchestrator/search/core/types.py +162 -0
orchestrator/search/core/validators.py +27 -0
orchestrator/search/docs/index.md +37 -0
orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
orchestrator/search/filters/__init__.py +27 -0
orchestrator/search/filters/base.py +236 -0
orchestrator/search/filters/date_filters.py +75 -0
orchestrator/search/filters/definitions.py +76 -0
orchestrator/search/filters/ltree_filters.py +31 -0
orchestrator/search/filters/numeric_filter.py +60 -0
orchestrator/search/indexing/__init__.py +3 -0
orchestrator/search/indexing/indexer.py +316 -0
orchestrator/search/indexing/registry.py +88 -0
orchestrator/search/indexing/tasks.py +53 -0
orchestrator/search/indexing/traverse.py +209 -0
orchestrator/search/retrieval/__init__.py +3 -0
orchestrator/search/retrieval/builder.py +64 -0
orchestrator/search/retrieval/engine.py +96 -0
orchestrator/search/retrieval/ranker.py +202 -0
orchestrator/search/retrieval/utils.py +88 -0
orchestrator/search/retrieval/validation.py +174 -0
orchestrator/search/schemas/__init__.py +0 -0
orchestrator/search/schemas/parameters.py +114 -0
orchestrator/search/schemas/results.py +47 -0
orchestrator/services/processes.py +11 -16
orchestrator/settings.py +29 -1
orchestrator/workflow.py +1 -8
{orchestrator_core-4.4.0rc1.dist-info → orchestrator_core-5.0.0a1.dist-info}/METADATA +6 -3
{orchestrator_core-4.4.0rc1.dist-info → orchestrator_core-5.0.0a1.dist-info}/RECORD +62 -26
orchestrator/graphql/resolvers/scheduled_tasks.py +0 -36
orchestrator/graphql/schemas/scheduled_task.py +0 -8
orchestrator/schedules/scheduler.py +0 -153
{orchestrator_core-4.4.0rc1.dist-info → orchestrator_core-5.0.0a1.dist-info}/WHEEL +0 -0
{orchestrator_core-4.4.0rc1.dist-info → orchestrator_core-5.0.0a1.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/filters/base.py ADDED Viewed

@@ -0,0 +1,236 @@
+from __future__ import annotations
+from itertools import count
+from typing import Any, ClassVar, Literal
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from sqlalchemy import and_, exists, or_, select
+from sqlalchemy.sql.elements import ColumnElement
+from sqlalchemy_utils.types.ltree import Ltree
+from orchestrator.db.models import AiSearchIndex
+from orchestrator.search.core.types import BooleanOperator, FilterOp, SQLAColumn
+from .date_filters import DateFilter
+from .ltree_filters import LtreeFilter
+from .numeric_filter import NumericFilter
+class EqualityFilter(BaseModel):
+    op: Literal[FilterOp.EQ, FilterOp.NEQ]
+    value: Any  # bool, str (UUID), str (enum values)
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        str_value = str(self.value)
+        match self.op:
+            case FilterOp.EQ:
+                return column == str_value
+            case FilterOp.NEQ:
+                return column != str_value
+class StringFilter(BaseModel):
+    op: Literal[FilterOp.LIKE]
+    value: str
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        return column.like(self.value)
+    @model_validator(mode="after")
+    def validate_like_pattern(self) -> StringFilter:
+        """If the operation is 'like', the value must contain a wildcard."""
+        if self.op == FilterOp.LIKE:
+            if "%" not in self.value and "_" not in self.value:
+                raise ValueError("The value for a 'like' operation must contain a wildcard character ('%' or '_').")
+        return self
+FilterCondition = (
+    DateFilter  # DATETIME
+    | NumericFilter  # INT/FLOAT
+    | EqualityFilter  # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE
+    | StringFilter  # STRING TODO: convert to hybrid search
+    | LtreeFilter  # Path
+)
+class PathFilter(BaseModel):
+    path: str = Field(description="The ltree path of the field to filter on, e.g., 'subscription.customer_id'.")
+    condition: FilterCondition = Field(description="The filter condition to apply.")
+    model_config = ConfigDict(
+        json_schema_extra={
+            "examples": [
+                {
+                    "path": "subscription.status",
+                    "condition": {"op": "eq", "value": "active"},
+                },
+                {
+                    "path": "subscription.customer_id",
+                    "condition": {"op": "ne", "value": "acme"},
+                },
+                {
+                    "path": "subscription.start_date",
+                    "condition": {"op": "gt", "value": "2025-01-01"},
+                },
+                {
+                    "path": "subscription.end_date",
+                    "condition": {
+                        "op": "between",
+                        "value": {"from": "2025-06-01", "to": "2025-07-01"},
+                    },
+                },
+                {
+                    "path": "subscription.*.name",
+                    "condition": {"op": "matches_lquery", "value": "*.foo_*"},
+                },
+            ]
+        }
+    )
+    def to_expression(self, value_column: SQLAColumn) -> ColumnElement[bool]:
+        """Convert the path filter into a SQLAlchemy expression.
+        This method delegates to the specific filter condition's ``to_expression``
+        implementation, passing along the column and path for context.
+        Parameters
+        ----------
+        value_column : ColumnElement
+            The SQLAlchemy column element representing the value to be filtered.
+        Returns:
+        -------
+        ColumnElement[bool]
+            A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
+        """
+        return self.condition.to_expression(value_column, self.path)
+class FilterTree(BaseModel):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "description": (
+                "Boolean filter tree. Operators must be UPPERCASE: AND / OR.\n"
+                "Node shapes:\n"
+                "  • Group: {'op':'AND'|'OR', 'children': [<PathFilter|FilterTree>, ...]}\n"
+                "  • Leaf (PathFilter): {'path':'<ltree>', 'condition': {...}}\n"
+                "Rules:\n"
+                "  • Do NOT put 'op' or 'children' inside a leaf 'condition'.\n"
+                "  • Max depth = 5.\n"
+                "  • Use from_flat_and() for a flat list of leaves."
+            ),
+            "examples": [
+                {
+                    "op": "AND",
+                    "children": [
+                        {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}},
+                        {"path": "subscription.start_date", "condition": {"op": "gt", "value": "2021-01-01"}},
+                    ],
+                },
+                {
+                    "op": "AND",
+                    "children": [
+                        {"path": "subscription.start_date", "condition": {"op": "gte", "value": "2024-01-01"}},
+                        {
+                            "op": "OR",
+                            "children": [
+                                {"path": "subscription.product_name", "condition": {"op": "like", "value": "%fiber%"}},
+                                {"path": "subscription.customer_id", "condition": {"op": "eq", "value": "Surf"}},
+                            ],
+                        },
+                    ],
+                },
+            ],
+        }
+    )
+    op: BooleanOperator = Field(
+        description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
+    )
+    children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
+    MAX_DEPTH: ClassVar[int] = 5
+    @model_validator(mode="after")
+    def _validate_depth(self) -> FilterTree:
+        def depth(node: "FilterTree | PathFilter") -> int:
+            return 1 + max(depth(c) for c in node.children) if isinstance(node, FilterTree) else 1
+        if depth(self) > self.MAX_DEPTH:
+            raise ValueError(f"FilterTree nesting exceeds MAX_DEPTH={self.MAX_DEPTH}")
+        return self
+    @classmethod
+    def from_flat_and(cls, filters: list[PathFilter]) -> FilterTree | None:
+        """Wrap a flat list of PathFilter into an AND group (or None)."""
+        return None if not filters else cls(op=BooleanOperator.AND, children=list(filters))
+    def get_all_paths(self) -> set[str]:
+        """Collects all unique paths from the PathFilter leaves in the tree."""
+        return {leaf.path for leaf in self.get_all_leaves()}
+    def get_all_leaves(self) -> list[PathFilter]:
+        """Collect all PathFilter leaves in the tree."""
+        leaves: list[PathFilter] = []
+        for child in self.children:
+            if isinstance(child, PathFilter):
+                leaves.append(child)
+            else:
+                leaves.extend(child.get_all_leaves())
+        return leaves
+    def to_expression(
+        self,
+        entity_id_col: SQLAColumn,
+        *,
+        entity_type_value: str | None = None,
+    ) -> ColumnElement[bool]:
+        """Compile this tree into a SQLAlchemy boolean expression.
+        Parameters
+        ----------
+        entity_id_col : SQLAColumn
+            Column in the outer query representing the entity ID.
+        entity_type_value : str, optional
+            If provided, each subquery is additionally constrained to this entity type.
+        Returns:
+        -------
+        ColumnElement[bool]
+            A SQLAlchemy expression suitable for use in a WHERE clause.
+        """
+        alias_idx = count(1)
+        def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
+            from sqlalchemy.orm import aliased
+            alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
+            correlates = [alias.entity_id == entity_id_col]
+            if entity_type_value is not None:
+                correlates.append(alias.entity_type == entity_type_value)
+            if isinstance(pf.condition, LtreeFilter):
+                # Path-only condition acts on path column
+                pred = pf.condition.to_expression(alias.path, pf.path)
+                where_clause = and_(*correlates, pred)
+            else:
+                where_clause = and_(
+                    *correlates,
+                    alias.path == Ltree(pf.path),
+                    pf.condition.to_expression(alias.value, pf.path),
+                )
+            subq = select(1).select_from(alias).where(where_clause)
+            return exists(subq)
+        def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
+            if isinstance(node, FilterTree):
+                compiled = [compile_node(ch) for ch in node.children]
+                return and_(*compiled) if node.op == BooleanOperator.AND else or_(*compiled)
+            return leaf_exists(node)
+        return compile_node(self)

orchestrator/search/filters/date_filters.py ADDED Viewed

@@ -0,0 +1,75 @@
+from datetime import date, datetime
+from typing import Annotated, Any, Literal
+from dateutil.parser import parse as dt_parse
+from pydantic import BaseModel, BeforeValidator, Field, model_validator
+from sqlalchemy import TIMESTAMP, and_
+from sqlalchemy import cast as sa_cast
+from sqlalchemy.sql.elements import ColumnElement
+from orchestrator.search.core.types import FilterOp, SQLAColumn
+def _validate_date_string(v: Any) -> Any:
+    if not isinstance(v, str):
+        return v
+    try:
+        dt_parse(v)
+        return v
+    except Exception as exc:
+        raise ValueError("is not a valid date or datetime string") from exc
+DateValue = datetime | date | str
+ValidatedDateValue = Annotated[DateValue, BeforeValidator(_validate_date_string)]
+class DateRange(BaseModel):
+    start: ValidatedDateValue
+    end: ValidatedDateValue
+    @model_validator(mode="after")
+    def _order(self) -> "DateRange":
+        to_datetime = dt_parse(str(self.end))
+        from_datetime = dt_parse(str(self.start))
+        if to_datetime <= from_datetime:
+            raise ValueError("'to' must be after 'from'")
+        return self
+class DateValueFilter(BaseModel):
+    """A filter that operates on a single date value."""
+    op: Literal[FilterOp.EQ, FilterOp.NEQ, FilterOp.LT, FilterOp.LTE, FilterOp.GT, FilterOp.GTE]
+    value: ValidatedDateValue
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        date_column = sa_cast(column, TIMESTAMP(timezone=True))
+        match self.op:
+            case FilterOp.EQ:
+                return date_column == self.value
+            case FilterOp.NEQ:
+                return date_column != self.value
+            case FilterOp.LT:
+                return date_column < self.value
+            case FilterOp.LTE:
+                return date_column <= self.value
+            case FilterOp.GT:
+                return date_column > self.value
+            case FilterOp.GTE:
+                return date_column >= self.value
+class DateRangeFilter(BaseModel):
+    """A filter that operates on a range of dates."""
+    op: Literal[FilterOp.BETWEEN]
+    value: DateRange
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        date_column = sa_cast(column, TIMESTAMP(timezone=True))
+        return and_(date_column >= self.value.start, date_column < self.value.end)
+DateFilter = Annotated[DateValueFilter | DateRangeFilter, Field(discriminator="op")]

orchestrator/search/filters/definitions.py ADDED Viewed

@@ -0,0 +1,76 @@
+from orchestrator.search.core.types import FieldType, FilterOp, UIType
+from orchestrator.search.schemas.results import TypeDefinition, ValueSchema
+def operators_for(ft: FieldType) -> list[FilterOp]:
+    """Return the list of valid operators for a given FieldType."""
+    return list(value_schema_for(ft).keys())
+def value_schema_for(ft: FieldType) -> dict[FilterOp, ValueSchema]:
+    """Return the value schema map for a given FieldType."""
+    if ft in (FieldType.INTEGER, FieldType.FLOAT):
+        return {
+            FilterOp.EQ: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.NEQ: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.LT: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.LTE: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.GT: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.GTE: ValueSchema(kind=UIType.NUMBER),
+            FilterOp.BETWEEN: ValueSchema(
+                kind="object",
+                fields={
+                    "start": ValueSchema(kind=UIType.NUMBER),
+                    "end": ValueSchema(kind=UIType.NUMBER),
+                },
+            ),
+        }
+    if ft == FieldType.BOOLEAN:
+        return {
+            FilterOp.EQ: ValueSchema(kind=UIType.BOOLEAN),
+            FilterOp.NEQ: ValueSchema(kind=UIType.BOOLEAN),
+        }
+    if ft == FieldType.DATETIME:
+        return {
+            FilterOp.EQ: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.NEQ: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.LT: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.LTE: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.GT: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.GTE: ValueSchema(kind=UIType.DATETIME),
+            FilterOp.BETWEEN: ValueSchema(
+                kind="object",
+                fields={
+                    "start": ValueSchema(kind=UIType.DATETIME),
+                    "end": ValueSchema(kind=UIType.DATETIME),
+                },
+            ),
+        }
+    return {
+        FilterOp.EQ: ValueSchema(kind=UIType.STRING),
+        FilterOp.NEQ: ValueSchema(kind=UIType.STRING),
+    }
+def generate_definitions() -> dict[UIType, TypeDefinition]:
+    """Generate the full definitions dictionary for all UI types."""
+    definitions = {}
+    for ui_type in UIType:
+        if ui_type == UIType.NUMBER:
+            rep_ft = FieldType.INTEGER
+        elif ui_type == UIType.DATETIME:
+            rep_ft = FieldType.DATETIME
+        elif ui_type == UIType.BOOLEAN:
+            rep_ft = FieldType.BOOLEAN
+        else:
+            rep_ft = FieldType.STRING
+        definitions[ui_type] = TypeDefinition(
+            operators=operators_for(rep_ft),
+            valueSchema=value_schema_for(rep_ft),
+        )
+    return definitions

orchestrator/search/filters/ltree_filters.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import Literal
+from pydantic import BaseModel, Field
+from sqlalchemy import TEXT, bindparam
+from sqlalchemy.sql.elements import ColumnElement
+from sqlalchemy_utils.types.ltree import Ltree
+from orchestrator.search.core.types import FilterOp, SQLAColumn
+class LtreeFilter(BaseModel):
+    """Filter for ltree path operations."""
+    op: Literal[FilterOp.MATCHES_LQUERY, FilterOp.IS_ANCESTOR, FilterOp.IS_DESCENDANT, FilterOp.PATH_MATCH]
+    value: str = Field(description="The ltree path or lquery pattern to compare against.")
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        """Converts the filter condition into a SQLAlchemy expression."""
+        match self.op:
+            case FilterOp.IS_DESCENDANT:
+                ltree_value = Ltree(self.value)
+                return column.op("<@")(ltree_value)
+            case FilterOp.IS_ANCESTOR:
+                ltree_value = Ltree(self.value)
+                return column.op("@>")(ltree_value)
+            case FilterOp.MATCHES_LQUERY:
+                param = bindparam("lquery_pattern", self.value, type_=TEXT)
+                return column.op("~")(param)
+            case FilterOp.PATH_MATCH:
+                ltree_value = Ltree(path)
+                return column == ltree_value

orchestrator/search/filters/numeric_filter.py ADDED Viewed

@@ -0,0 +1,60 @@
+from typing import Annotated, Any, Literal
+from pydantic import BaseModel, Field, model_validator
+from sqlalchemy import DOUBLE_PRECISION, INTEGER, and_
+from sqlalchemy import cast as sa_cast
+from sqlalchemy.sql.elements import ColumnElement
+from typing_extensions import Self
+from orchestrator.search.core.types import FilterOp, SQLAColumn
+class NumericRange(BaseModel):
+    start: int | float
+    end: int | float
+    @model_validator(mode="after")
+    def validate_order(self) -> Self:
+        if self.end <= self.start:
+            raise ValueError("'end' must be greater than 'start'")
+        return self
+class NumericValueFilter(BaseModel):
+    """A filter for single numeric value comparisons (int or float)."""
+    op: Literal[FilterOp.EQ, FilterOp.NEQ, FilterOp.LT, FilterOp.LTE, FilterOp.GT, FilterOp.GTE]
+    value: int | float
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        cast_type = INTEGER if isinstance(self.value, int) else DOUBLE_PRECISION
+        numeric_column: ColumnElement[Any] = sa_cast(column, cast_type)
+        match self.op:
+            case FilterOp.EQ:
+                return numeric_column == self.value
+            case FilterOp.NEQ:
+                return numeric_column != self.value
+            case FilterOp.LT:
+                return numeric_column < self.value
+            case FilterOp.LTE:
+                return numeric_column <= self.value
+            case FilterOp.GT:
+                return numeric_column > self.value
+            case FilterOp.GTE:
+                return numeric_column >= self.value
+class NumericRangeFilter(BaseModel):
+    """A filter for a range of numeric values (int or float)."""
+    op: Literal[FilterOp.BETWEEN]
+    value: NumericRange
+    def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
+        cast_type = INTEGER if isinstance(self.value.start, int) else DOUBLE_PRECISION
+        numeric_column: ColumnElement[Any] = sa_cast(column, cast_type)
+        return and_(numeric_column >= self.value.start, numeric_column <= self.value.end)
+NumericFilter = Annotated[NumericValueFilter | NumericRangeFilter, Field(discriminator="op")]

orchestrator/search/indexing/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .tasks import run_indexing_for_entity
+__all__ = ["run_indexing_for_entity"]

orchestrator-core 4.4.0rc1__py3-none-any.whl → 5.0.0a1__py3-none-any.whl

orchestrator-core 4.4.0rc1py3-none-any.whl → 5.0.0a1py3-none-any.whl