PyPI - orchestrator-core - Versions diffs - 4.4.1__py3-none-any.whl → 4.5.0a2__py3-none-any.whl - Mend

orchestrator-core 4.4.1py3-none-any.whl → 4.5.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

orchestrator/__init__.py +26 -2
orchestrator/agentic_app.py +84 -0
orchestrator/api/api_v1/api.py +10 -0
orchestrator/api/api_v1/endpoints/search.py +277 -0
orchestrator/app.py +32 -0
orchestrator/cli/index_llm.py +73 -0
orchestrator/cli/main.py +22 -1
orchestrator/cli/resize_embedding.py +135 -0
orchestrator/cli/search_explore.py +208 -0
orchestrator/cli/speedtest.py +151 -0
orchestrator/db/models.py +37 -1
orchestrator/llm_settings.py +51 -0
orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
orchestrator/schemas/search.py +117 -0
orchestrator/search/__init__.py +12 -0
orchestrator/search/agent/__init__.py +8 -0
orchestrator/search/agent/agent.py +47 -0
orchestrator/search/agent/prompts.py +87 -0
orchestrator/search/agent/state.py +8 -0
orchestrator/search/agent/tools.py +236 -0
orchestrator/search/core/__init__.py +0 -0
orchestrator/search/core/embedding.py +64 -0
orchestrator/search/core/exceptions.py +22 -0
orchestrator/search/core/types.py +281 -0
orchestrator/search/core/validators.py +27 -0
orchestrator/search/docs/index.md +37 -0
orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
orchestrator/search/filters/__init__.py +27 -0
orchestrator/search/filters/base.py +275 -0
orchestrator/search/filters/date_filters.py +75 -0
orchestrator/search/filters/definitions.py +93 -0
orchestrator/search/filters/ltree_filters.py +43 -0
orchestrator/search/filters/numeric_filter.py +60 -0
orchestrator/search/indexing/__init__.py +3 -0
orchestrator/search/indexing/indexer.py +323 -0
orchestrator/search/indexing/registry.py +88 -0
orchestrator/search/indexing/tasks.py +53 -0
orchestrator/search/indexing/traverse.py +322 -0
orchestrator/search/retrieval/__init__.py +3 -0
orchestrator/search/retrieval/builder.py +113 -0
orchestrator/search/retrieval/engine.py +152 -0
orchestrator/search/retrieval/pagination.py +83 -0
orchestrator/search/retrieval/retriever.py +447 -0
orchestrator/search/retrieval/utils.py +106 -0
orchestrator/search/retrieval/validation.py +174 -0
orchestrator/search/schemas/__init__.py +0 -0
orchestrator/search/schemas/parameters.py +116 -0
orchestrator/search/schemas/results.py +64 -0
orchestrator/services/settings_env_variables.py +2 -2
orchestrator/settings.py +1 -1
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/METADATA +8 -3
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/RECORD +54 -11
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/WHEEL +0 -0
{orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a2.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/core/exceptions.py ADDED Viewed

@@ -0,0 +1,22 @@
+class SearchUtilsError(Exception):
+    """Base exception for this module."""
+    pass
+class ProductNotInRegistryError(SearchUtilsError):
+    """Raised when a product is not found in the model registry."""
+    pass
+class ModelLoadError(SearchUtilsError):
+    """Raised when a Pydantic model fails to load from a subscription."""
+    pass
+class InvalidCursorError(SearchUtilsError):
+    """Raised when cursor cannot be decoded."""
+    pass

orchestrator/search/core/types.py ADDED Viewed

@@ -0,0 +1,281 @@
+from dataclasses import dataclass
+from datetime import date, datetime
+from enum import Enum, IntEnum
+from typing import Annotated, Any, Literal, NamedTuple, TypeAlias, TypedDict, get_args, get_origin
+from uuid import UUID
+from sqlalchemy.orm.attributes import InstrumentedAttribute
+from sqlalchemy.sql.elements import ColumnElement
+from sqlalchemy_utils.types.ltree import Ltree
+from orchestrator.types import filter_nonetype, get_origin_and_args, is_optional_type, is_union_type
+from .validators import is_bool_string, is_iso_date, is_uuid
+SQLAColumn: TypeAlias = ColumnElement[Any] | InstrumentedAttribute[Any]
+@dataclass
+class SearchMetadata:
+    """Metadata about the search operation performed."""
+    search_type: str
+    description: str
+    @classmethod
+    def structured(cls) -> "SearchMetadata":
+        return cls(
+            search_type="structured", description="This search performs a filter-based search using structured queries."
+        )
+    @classmethod
+    def fuzzy(cls) -> "SearchMetadata":
+        return cls(
+            search_type="fuzzy",
+            description="This search performs a trigram similarity search.",
+        )
+    @classmethod
+    def semantic(cls) -> "SearchMetadata":
+        return cls(
+            search_type="semantic",
+            description="This search performs a vector similarity search, using L2 distance on embeddings with minimum distance scoring (normalized).",
+        )
+    @classmethod
+    def hybrid(cls) -> "SearchMetadata":
+        return cls(
+            search_type="hybrid",
+            description="This search performs reciprocal rank fusion combining trigram similarity, word_similarity, and L2 vector distance.",
+        )
+    @classmethod
+    def empty(cls) -> "SearchMetadata":
+        return cls(search_type="empty", description="Empty search - no criteria provided")
+class BooleanOperator(str, Enum):
+    AND = "AND"
+    OR = "OR"
+class FilterOp(str, Enum):
+    EQ = "eq"
+    NEQ = "neq"
+    LT = "lt"
+    LIKE = "like"
+    LTE = "lte"
+    GT = "gt"
+    GTE = "gte"
+    BETWEEN = "between"
+    MATCHES_LQUERY = "matches_lquery"  # The ~ operator for wildcard matching
+    IS_ANCESTOR = "is_ancestor"  # The @> operator
+    IS_DESCENDANT = "is_descendant"  # The <@ operator
+    PATH_MATCH = "path_match"
+    HAS_COMPONENT = "has_component"  # Path contains this segment
+    NOT_HAS_COMPONENT = "not_has_component"  # Path doesn't contain segment
+    ENDS_WITH = "ends_with"
+class EntityType(str, Enum):
+    SUBSCRIPTION = "SUBSCRIPTION"
+    PRODUCT = "PRODUCT"
+    WORKFLOW = "WORKFLOW"
+    PROCESS = "PROCESS"
+class ActionType(str, Enum):
+    """Defines the explicit, safe actions the agent can request."""
+    SELECT = "select"  # Retrieve a list of matching records.
+    # COUNT = "count"  # For phase1; the agent will not support this yet.
+class UIType(str, Enum):
+    STRING = "string"
+    NUMBER = "number"
+    BOOLEAN = "boolean"
+    DATETIME = "datetime"
+    COMPONENT = "component"
+    @classmethod
+    def from_field_type(cls, ft: "FieldType") -> "UIType":
+        """Create a UIType from a backend FieldType to indicate how a value must be rendered."""
+        if ft in (FieldType.INTEGER, FieldType.FLOAT):
+            return cls.NUMBER
+        if ft == FieldType.BOOLEAN:
+            return cls.BOOLEAN
+        if ft == FieldType.DATETIME:
+            return cls.DATETIME
+        return cls.STRING
+class FieldType(str, Enum):
+    STRING = "string"
+    INTEGER = "integer"
+    FLOAT = "float"
+    BOOLEAN = "boolean"
+    DATETIME = "datetime"
+    UUID = "uuid"
+    BLOCK = "block"
+    RESOURCE_TYPE = "resource_type"
+    @classmethod
+    def infer(cls, val: Any) -> "FieldType":
+        if isinstance(val, TypedValue):
+            return cls._infer_typed_value(val)
+        if isinstance(val, bool):
+            return cls.BOOLEAN
+        if isinstance(val, int):
+            return cls.INTEGER
+        if isinstance(val, float):
+            return cls.FLOAT
+        if isinstance(val, UUID):
+            return cls.UUID
+        if isinstance(val, (datetime, date)):
+            return cls.DATETIME
+        if isinstance(val, str):
+            return cls._infer_from_str(val)
+        return cls.STRING
+    @classmethod
+    def _infer_typed_value(cls, val: "TypedValue") -> "FieldType":
+        if val.type == cls.BLOCK:
+            return cls.BLOCK
+        if val.type == cls.RESOURCE_TYPE:
+            return cls.RESOURCE_TYPE
+        return cls.STRING
+    @classmethod
+    def _infer_from_str(cls, val: str) -> "FieldType":
+        if is_uuid(val):
+            return cls.UUID
+        if is_iso_date(val):
+            return cls.DATETIME
+        if is_bool_string(val):
+            return cls.BOOLEAN
+        if val.isdigit():
+            return cls.INTEGER
+        try:
+            float(val)
+            return cls.FLOAT
+        except ValueError:
+            return cls.STRING
+    @classmethod
+    def from_type_hint(cls, type_hint: object) -> "FieldType":
+        """Convert type hint to FieldType."""
+        _type_mapping = {
+            int: cls.INTEGER,
+            float: cls.FLOAT,
+            bool: cls.BOOLEAN,
+            str: cls.STRING,
+            datetime: cls.DATETIME,
+            UUID: cls.UUID,
+        }
+        if type_hint in _type_mapping:
+            return _type_mapping[type_hint]  # type: ignore[index]
+        if get_origin(type_hint) is Annotated:
+            inner_type = get_args(type_hint)[0]
+            return cls.from_type_hint(inner_type)
+        origin, args = get_origin_and_args(type_hint)
+        if origin is list:
+            return cls._handle_list_type(args)
+        if origin is Literal:
+            return cls._handle_literal_type(args)
+        if is_optional_type(type_hint) or is_union_type(type_hint):
+            return cls._handle_union_type(args)
+        if isinstance(type_hint, type):
+            return cls._handle_class_type(type_hint)
+        return cls.STRING
+    @classmethod
+    def _handle_list_type(cls, args: tuple) -> "FieldType":
+        if args:
+            element_type = args[0]
+            return cls.from_type_hint(element_type)
+        return cls.STRING
+    @classmethod
+    def _handle_literal_type(cls, args: tuple) -> "FieldType":
+        if not args:
+            return cls.STRING
+        first_value = args[0]
+        if isinstance(first_value, bool):
+            return cls.BOOLEAN
+        if isinstance(first_value, int):
+            return cls.INTEGER
+        if isinstance(first_value, str):
+            return cls.STRING
+        if isinstance(first_value, float):
+            return cls.FLOAT
+        return cls.STRING
+    @classmethod
+    def _handle_union_type(cls, args: tuple) -> "FieldType":
+        non_none_types = list(filter_nonetype(args))
+        if non_none_types:
+            return cls.from_type_hint(non_none_types[0])
+        return cls.STRING
+    @classmethod
+    def _handle_class_type(cls, type_hint: type) -> "FieldType":
+        if issubclass(type_hint, IntEnum):
+            return cls.INTEGER
+        if issubclass(type_hint, Enum):
+            return cls.STRING
+        from orchestrator.domain.base import ProductBlockModel
+        if issubclass(type_hint, ProductBlockModel):
+            return cls.BLOCK
+        return cls.STRING
+    def is_embeddable(self, value: str | None) -> bool:
+        """Check if a field should be embedded."""
+        if value is None:
+            return False
+        # If inference suggests it's not actually a string, don't embed it
+        return FieldType._infer_from_str(value) == FieldType.STRING
+@dataclass(frozen=True)
+class TypedValue:
+    value: Any
+    type: FieldType
+class ExtractedField(NamedTuple):
+    path: str
+    value: str
+    value_type: FieldType
+    @classmethod
+    def from_raw(cls, path: str, raw_value: Any) -> "ExtractedField":
+        value = str(raw_value.value if isinstance(raw_value, TypedValue) else raw_value)
+        value_type = FieldType.infer(raw_value)
+        return cls(path=path, value=value, value_type=value_type)
+class IndexableRecord(TypedDict):
+    entity_id: str
+    entity_type: str
+    path: Ltree
+    value: Any
+    value_type: Any
+    content_hash: str
+    embedding: list[float] | None

orchestrator/search/core/validators.py ADDED Viewed

@@ -0,0 +1,27 @@
+import uuid
+from dateutil.parser import isoparse
+def is_uuid(value: str) -> bool:
+    """Check if a string is a valid UUID."""
+    try:
+        uuid.UUID(value)
+        return True
+    except (ValueError, TypeError):
+        return False
+def is_iso_date(value: str) -> bool:
+    """Check if a string is a valid ISO 8601 date."""
+    try:
+        isoparse(value)
+        return True
+    except (ValueError, TypeError):
+        return False
+def is_bool_string(value: str) -> bool:
+    """Check if a string explicitly represents a boolean value with true/false."""
+    return value.strip().lower() in {"true", "false"}

orchestrator/search/docs/index.md ADDED Viewed

@@ -0,0 +1,37 @@
+# Search Indexing CLI
+Typer-based CLI for maintaining search indexes (subscriptions, products, processes, workflows).
+## Usage
+Run from project root:
+```
+dotenv run python main.py index [COMMAND] [OPTIONS]
+```
+### Commands
+- `subscriptions` – index `subscription_search_index`
+- `products` – index `product_search_index`
+- `processes` – index `process_search_index`
+- `workflows` – index `workflow_search_index`
+### Options
+- `--<id>` – UUID of a specific entity (default: all)
+- `--dry-run` – no DB writes
+- `--force-index` – re-index even if unchanged
+### Examples
+```
+# Index all subscriptions
+dotenv run python main.py index subscriptions
+# Re-index all subscriptions
+dotenv run python main.py index subscriptions --force-index
+# Index a single subscription
+dotenv run python main.py index subscriptions --subscription-id=<UUID>
+```

orchestrator/search/docs/running_local_text_embedding_inference.md ADDED Viewed

@@ -0,0 +1,45 @@
+# Running a local MiniLM embedding server with Hugging Face TEI
+Only **OpenAI-compatible endpoints** are supported locally.
+You can spin up a embedding API based on **sentence-transformers/all-MiniLM-L6-v2** using [Hugging Face TEI](https://github.com/huggingface/text-embeddings-inference):
+```bash
+docker run --rm -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cpu-1.8 \
+    --model-id sentence-transformers/all-MiniLM-L6-v2
+```
+---
+## Environment variables
+Point your backend to the local endpoint and declare the new vector size:
+```env
+OPENAI_BASE_URL=http://localhost:8080/v1
+EMBEDDING_DIMENSION=384
+```
+Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.
+---
+## Apply the schema change
+With these new settings run:
+```bash
+dotenv run python main.py embedding resize
+```
+**Note** that this will delete all records and you will have to re-index.
+---
+## Re-index embeddings
+```bash
+dotenv run python main.py index subscriptions
+```
+The search index now uses **384-dimension MiniLM vectors** served from your local Docker container. That’s it! 🚀

orchestrator/search/filters/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+from .base import (
+    EqualityFilter,
+    FilterCondition,
+    FilterTree,
+    PathFilter,
+    StringFilter,
+)
+from .date_filters import DateFilter, DateRangeFilter, DateValueFilter
+from .ltree_filters import LtreeFilter
+from .numeric_filter import NumericFilter, NumericRangeFilter, NumericValueFilter
+__all__ = [
+    # Base filter classes
+    "PathFilter",
+    "FilterTree",
+    "FilterCondition",
+    "StringFilter",
+    "EqualityFilter",
+    # Filters for specific value types
+    "NumericValueFilter",
+    "NumericRangeFilter",
+    "DateValueFilter",
+    "DateRangeFilter",
+    "DateFilter",
+    "LtreeFilter",
+    "NumericFilter",
+]

orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0a2__py3-none-any.whl

orchestrator-core 4.4.1py3-none-any.whl → 4.5.0a2py3-none-any.whl