PyPI - orchestrator-core - Versions diffs - 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl - Mend

orchestrator-core 4.5.1a1py3-none-any.whl → 4.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

orchestrator/__init__.py +3 -12
orchestrator/agentic_app.py +48 -29
orchestrator/api/api_v1/api.py +8 -6
orchestrator/api/api_v1/endpoints/processes.py +2 -0
orchestrator/api/api_v1/endpoints/search.py +26 -7
orchestrator/cli/main.py +2 -2
orchestrator/cli/search/__init__.py +32 -0
orchestrator/devtools/populator.py +16 -0
orchestrator/domain/base.py +2 -7
orchestrator/domain/lifecycle.py +24 -7
orchestrator/llm_settings.py +9 -3
orchestrator/log_config.py +1 -0
orchestrator/migrations/helpers.py +7 -1
orchestrator/schemas/search.py +13 -0
orchestrator/schemas/workflow.py +1 -0
orchestrator/search/agent/__init__.py +15 -2
orchestrator/search/agent/agent.py +30 -15
orchestrator/search/agent/prompts.py +75 -37
orchestrator/search/agent/state.py +13 -0
orchestrator/search/agent/tools.py +148 -11
orchestrator/search/core/__init__.py +12 -0
orchestrator/search/core/embedding.py +13 -4
orchestrator/search/core/exceptions.py +14 -0
orchestrator/search/core/types.py +15 -0
orchestrator/search/core/validators.py +13 -0
orchestrator/search/docs/running_local_text_embedding_inference.md +1 -0
orchestrator/search/filters/__init__.py +13 -0
orchestrator/search/filters/base.py +84 -61
orchestrator/search/filters/date_filters.py +13 -0
orchestrator/search/filters/definitions.py +16 -2
orchestrator/search/filters/ltree_filters.py +16 -3
orchestrator/search/filters/numeric_filter.py +13 -0
orchestrator/search/indexing/__init__.py +13 -0
orchestrator/search/indexing/indexer.py +14 -3
orchestrator/search/indexing/registry.py +13 -0
orchestrator/search/indexing/tasks.py +17 -1
orchestrator/search/indexing/traverse.py +17 -5
orchestrator/search/llm_migration.py +108 -0
orchestrator/search/retrieval/__init__.py +13 -0
orchestrator/search/retrieval/builder.py +23 -8
orchestrator/search/retrieval/engine.py +36 -34
orchestrator/search/retrieval/exceptions.py +90 -0
orchestrator/search/retrieval/pagination.py +13 -0
orchestrator/search/retrieval/retrievers/__init__.py +26 -0
orchestrator/search/retrieval/retrievers/base.py +123 -0
orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
orchestrator/search/retrieval/retrievers/semantic.py +94 -0
orchestrator/search/retrieval/retrievers/structured.py +39 -0
orchestrator/search/retrieval/utils.py +21 -7
orchestrator/search/retrieval/validation.py +54 -76
orchestrator/search/schemas/__init__.py +12 -0
orchestrator/search/schemas/parameters.py +13 -0
orchestrator/search/schemas/results.py +15 -1
orchestrator/services/processes.py +2 -1
orchestrator/settings.py +7 -0
orchestrator/utils/state.py +6 -1
orchestrator/workflows/steps.py +16 -1
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/METADATA +13 -11
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/RECORD +66 -59
orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +0 -95
orchestrator/search/retrieval/retriever.py +0 -447
/orchestrator/cli/{index_llm.py → search/index_llm.py} +0 -0
/orchestrator/cli/{resize_embedding.py → search/resize_embedding.py} +0 -0
/orchestrator/cli/{search_explore.py → search/search_explore.py} +0 -0
/orchestrator/cli/{speedtest.py → search/speedtest.py} +0 -0
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/filters/base.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from __future__ import annotations
 from itertools import count
@@ -45,12 +58,13 @@ class StringFilter(BaseModel):
         return self
+# Order matters! Ambiguous ops (like 'eq') are resolved by first matching filter
 FilterCondition = (
     DateFilter  # DATETIME
     | NumericFilter  # INT/FLOAT
-    | EqualityFilter  # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE
-    | StringFilter  # STRING TODO: convert to hybrid search
+    | StringFilter  # STRING TODO: convert to hybrid search?
     | LtreeFilter  # Path
+    | EqualityFilter  # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE - most generic, try last
 )
@@ -64,28 +78,29 @@ class PathFilter(BaseModel):
     model_config = ConfigDict(
         json_schema_extra={
             "examples": [
-                {
-                    "path": "subscription.status",
-                    "condition": {"op": "eq", "value": "active"},
-                },
+                {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}, "value_kind": "string"},
                 {
                     "path": "subscription.customer_id",
-                    "condition": {"op": "ne", "value": "acme"},
+                    "condition": {"op": "neq", "value": "acme"},
+                    "value_kind": "string",
                 },
                 {
                     "path": "subscription.start_date",
                     "condition": {"op": "gt", "value": "2025-01-01"},
+                    "value_kind": "datetime",
                 },
                 {
                     "path": "subscription.end_date",
                     "condition": {
                         "op": "between",
-                        "value": {"from": "2025-06-01", "to": "2025-07-01"},
+                        "value": {"start": "2025-06-01", "end": "2025-07-01"},
                     },
+                    "value_kind": "datetime",
                 },
                 {
-                    "path": "subscription.*.name",
-                    "condition": {"op": "matches_lquery", "value": "*.foo_*"},
+                    "path": "subscription",
+                    "condition": {"op": "has_component", "value": "node"},
+                    "value_kind": "component",
                 },
             ]
         }
@@ -121,18 +136,14 @@ class PathFilter(BaseModel):
         This method creates a type guard to ensure we only match compatible field types,
         then delegates to the specific filter condition.
-        Parameters
-        ----------
-        value_column : ColumnElement
-            The SQLAlchemy column element representing the value to be filtered.
-        value_type_column : ColumnElement
-            The SQLAlchemy column element representing the field type.
+        Args:
+            value_column (ColumnElement): The SQLAlchemy column element representing the value to be filtered.
+            value_type_column (ColumnElement): The SQLAlchemy column element representing the field type.
         Returns:
-        -------
-        ColumnElement[bool]
-            A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
+            ColumnElement[bool]: A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
         """
         # Type guard - only match compatible field types
         allowed_field_types = [ft.value for ft in FieldType if UIType.from_field_type(ft) == self.value_kind]
         type_guard = value_type_column.in_(allowed_field_types) if allowed_field_types else literal(True)
@@ -141,6 +152,14 @@ class PathFilter(BaseModel):
 class FilterTree(BaseModel):
+    op: BooleanOperator = Field(
+        description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
+    )
+    children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
+    MAX_DEPTH: ClassVar[int] = 5
     model_config = ConfigDict(
         json_schema_extra={
             "description": (
@@ -150,11 +169,11 @@ class FilterTree(BaseModel):
                 "  • Leaf (PathFilter): {'path':'<ltree>', 'condition': {...}}\n"
                 "Rules:\n"
                 "  • Do NOT put 'op' or 'children' inside a leaf 'condition'.\n"
-                "  • Max depth = 5.\n"
-                "  • Use from_flat_and() for a flat list of leaves."
+                f"  • Max depth = {MAX_DEPTH}.\n"
             ),
             "examples": [
                 {
+                    "description": "Simple filters",
                     "op": "AND",
                     "children": [
                         {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}},
@@ -162,13 +181,14 @@ class FilterTree(BaseModel):
                     ],
                 },
                 {
+                    "description": "Complex filters with OR group",
                     "op": "AND",
                     "children": [
                         {"path": "subscription.start_date", "condition": {"op": "gte", "value": "2024-01-01"}},
                         {
                             "op": "OR",
                             "children": [
-                                {"path": "subscription.product_name", "condition": {"op": "like", "value": "%fiber%"}},
+                                {"path": "subscription.product.name", "condition": {"op": "like", "value": "%fiber%"}},
                                 {"path": "subscription.customer_id", "condition": {"op": "eq", "value": "Surf"}},
                             ],
                         },
@@ -178,14 +198,6 @@ class FilterTree(BaseModel):
         }
     )
-    op: BooleanOperator = Field(
-        description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
-    )
-    children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
-    MAX_DEPTH: ClassVar[int] = 5
     @model_validator(mode="after")
     def _validate_depth(self) -> FilterTree:
         def depth(node: "FilterTree | PathFilter") -> int:
@@ -214,6 +226,38 @@ class FilterTree(BaseModel):
                 leaves.extend(child.get_all_leaves())
         return leaves
+    @staticmethod
+    def _build_correlates(
+        alias: Any, entity_id_col: SQLAColumn, entity_type_value: str | None
+    ) -> list[ColumnElement[bool]]:
+        """Build the correlation predicates that link the subquery to the outer query."""
+        correlates = [alias.entity_id == entity_id_col]
+        if entity_type_value is not None:
+            correlates.append(alias.entity_type == entity_type_value)
+        return correlates
+    @staticmethod
+    def _handle_ltree_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
+        """Handle path-only filters (has_component, not_has_component, ends_with)."""
+        # row-level predicate is always positive
+        positive = pf.condition.to_expression(alias.path, pf.path)
+        subq = select(1).select_from(alias).where(and_(*correlates, positive))
+        if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
+            return ~exists(subq)  # NOT at the entity level
+        return exists(subq)
+    @staticmethod
+    def _handle_value_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
+        """Handle value-based filters (equality, comparison, etc)."""
+        if "." not in pf.path:
+            path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
+        else:
+            path_pred = alias.path == Ltree(pf.path)
+        value_pred = pf.to_expression(alias.value, alias.value_type)
+        subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
+        return exists(subq)
     def to_expression(
         self,
         entity_id_col: SQLAColumn,
@@ -222,46 +266,25 @@ class FilterTree(BaseModel):
     ) -> ColumnElement[bool]:
         """Compile this tree into a SQLAlchemy boolean expression.
-        Parameters
-        ----------
-        entity_id_col : SQLAColumn
-            Column in the outer query representing the entity ID.
-        entity_type_value : str, optional
-            If provided, each subquery is additionally constrained to this entity type.
+        Args:
+            entity_id_col (SQLAColumn): Column in the outer query representing the entity ID.
+            entity_type_value (str, optional): If provided, each subquery is additionally constrained to this entity type.
         Returns:
-        -------
-        ColumnElement[bool]
-            A SQLAlchemy expression suitable for use in a WHERE clause.
+            ColumnElement[bool]: A SQLAlchemy expression suitable for use in a WHERE clause.
         """
+        from sqlalchemy.orm import aliased
         alias_idx = count(1)
         def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
-            from sqlalchemy.orm import aliased
+            """Convert a PathFilter into an EXISTS subquery."""
             alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
-            correlates = [alias.entity_id == entity_id_col]
-            if entity_type_value is not None:
-                correlates.append(alias.entity_type == entity_type_value)
+            correlates = self._build_correlates(alias, entity_id_col, entity_type_value)
             if isinstance(pf.condition, LtreeFilter):
-                # row-level predicate is always positive
-                positive = pf.condition.to_expression(alias.path, pf.path)
-                subq = select(1).select_from(alias).where(and_(*correlates, positive))
-                if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
-                    return ~exists(subq)  # NOT at the entity level
-                return exists(subq)
-            # value leaf: path predicate + typed value compare
-            if "." not in pf.path:
-                path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
-            else:
-                path_pred = alias.path == Ltree(pf.path)
-            value_pred = pf.to_expression(alias.value, alias.value_type)
-            subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
-            return exists(subq)
+                return self._handle_ltree_filter(pf, alias, correlates)
+            return self._handle_value_filter(pf, alias, correlates)
         def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
             if isinstance(node, FilterTree):

orchestrator/search/filters/date_filters.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from datetime import date, datetime
 from typing import Annotated, Any, Literal

orchestrator/search/filters/definitions.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from orchestrator.search.core.types import FieldType, FilterOp, UIType
 from orchestrator.search.schemas.results import TypeDefinition, ValueSchema
@@ -60,6 +73,7 @@ def value_schema_for(ft: FieldType) -> dict[FilterOp, ValueSchema]:
     return {
         FilterOp.EQ: ValueSchema(kind=UIType.STRING),
         FilterOp.NEQ: ValueSchema(kind=UIType.STRING),
+        FilterOp.LIKE: ValueSchema(kind=UIType.STRING),
     }
@@ -73,7 +87,7 @@ def generate_definitions() -> dict[UIType, TypeDefinition]:
             comp_ops = component_operators()
             definitions[ui_type] = TypeDefinition(
                 operators=list(comp_ops.keys()),
-                valueSchema=comp_ops,
+                value_schema=comp_ops,
             )
         else:
             # Regular field types
@@ -88,6 +102,6 @@ def generate_definitions() -> dict[UIType, TypeDefinition]:
             definitions[ui_type] = TypeDefinition(
                 operators=operators_for(rep_ft),
-                valueSchema=value_schema_for(rep_ft),
+                value_schema=value_schema_for(rep_ft),
             )
     return definitions

orchestrator/search/filters/ltree_filters.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Literal
 from pydantic import BaseModel, Field
@@ -5,7 +18,7 @@ from sqlalchemy import TEXT, bindparam
 from sqlalchemy.sql.elements import ColumnElement
 from sqlalchemy_utils.types.ltree import Ltree
-from orchestrator.search.core.types import FilterOp, SQLAColumn
+from orchestrator.search.core.types import LTREE_SEPARATOR, FilterOp, SQLAColumn
 class LtreeFilter(BaseModel):
@@ -38,6 +51,6 @@ class LtreeFilter(BaseModel):
                 ltree_value = Ltree(path)
                 return column == ltree_value
             case FilterOp.HAS_COMPONENT | FilterOp.NOT_HAS_COMPONENT:
-                return column.op("~")(bindparam(None, f"*.{self.value}.*", type_=TEXT))
+                return column.op("~")(bindparam(None, f"*{LTREE_SEPARATOR}{self.value}{LTREE_SEPARATOR}*", type_=TEXT))
             case FilterOp.ENDS_WITH:
-                return column.op("~")(bindparam(None, f"*.{self.value}", type_=TEXT))
+                return column.op("~")(bindparam(None, f"*{LTREE_SEPARATOR}{self.value}", type_=TEXT))

orchestrator/search/filters/numeric_filter.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator

orchestrator/search/indexing/__init__.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .tasks import run_indexing_for_entity
 __all__ = ["run_indexing_for_entity"]

orchestrator/search/indexing/indexer.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import hashlib
 from collections.abc import Generator, Iterable, Iterator
 from contextlib import contextmanager, nullcontext
@@ -213,9 +226,7 @@ class Indexer:
         safe_margin = int(max_ctx * llm_settings.EMBEDDING_SAFE_MARGIN_PERCENT)
         token_budget = max(1, max_ctx - safe_margin)
-        max_batch_size = None
-        if llm_settings.OPENAI_BASE_URL:  # We are using a local model
-            max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
+        max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
         for entity_id, field in fields_to_upsert:
             if field.value_type.is_embeddable(field.value):

orchestrator/search/indexing/registry.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from dataclasses import dataclass
 from typing import Generic, TypeVar
 from uuid import UUID

orchestrator/search/indexing/tasks.py CHANGED Viewed

@@ -1,7 +1,21 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import structlog
 from sqlalchemy.orm import Query
 from orchestrator.db import db
+from orchestrator.domain.context_cache import cache_subscription_models
 from orchestrator.search.core.types import EntityType
 from orchestrator.search.indexing.indexer import Indexer
 from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
@@ -50,4 +64,6 @@ def run_indexing_for_entity(
     entities = db.session.execute(stmt).scalars()
     indexer = Indexer(config=config, dry_run=dry_run, force_index=force_index, chunk_size=chunk_size)
-    indexer.run(entities)
+    with cache_subscription_models():
+        indexer.run(entities)

orchestrator/search/indexing/traverse.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import re
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
@@ -19,7 +32,7 @@ from orchestrator.domain.lifecycle import (
 from orchestrator.schemas.process import ProcessSchema
 from orchestrator.schemas.workflow import WorkflowSchema
 from orchestrator.search.core.exceptions import ModelLoadError, ProductNotInRegistryError
-from orchestrator.search.core.types import ExtractedField, FieldType
+from orchestrator.search.core.types import LTREE_SEPARATOR, ExtractedField, FieldType
 from orchestrator.types import SubscriptionLifecycle
 logger = structlog.get_logger(__name__)
@@ -30,7 +43,6 @@ DatabaseEntity = SubscriptionTable | ProductTable | ProcessTable | WorkflowTable
 class BaseTraverser(ABC):
     """Base class for traversing database models and extracting searchable fields."""
-    _LTREE_SEPARATOR = "."
     _MAX_DEPTH = 40
     @classmethod
@@ -62,7 +74,7 @@ class BaseTraverser(ABC):
             except Exception as e:
                 logger.error(f"Failed to access field '{name}' on {model_class.__name__}", error=str(e))
                 continue
-            new_path = f"{path}{cls._LTREE_SEPARATOR}{name}" if path else name
+            new_path = f"{path}{LTREE_SEPARATOR}{name}" if path else name
             annotation = field.annotation if hasattr(field, "annotation") else field.return_type
             yield from cls._yield_fields_for_value(value, new_path, annotation)
@@ -197,7 +209,7 @@ class ProductTraverser(BaseTraverser):
         fields = []
         # Add the block itself as a BLOCK type
-        block_name = block_path.split(cls._LTREE_SEPARATOR)[-1]
+        block_name = block_path.split(LTREE_SEPARATOR)[-1]
         fields.append(ExtractedField(path=block_path, value=block_name, value_type=FieldType.BLOCK))
         # Extract all field names from the block as RESOURCE_TYPE
@@ -223,7 +235,7 @@ class ProductTraverser(BaseTraverser):
                             ExtractedField(path=field_path, value=field_name, value_type=FieldType.RESOURCE_TYPE)
                         )
                         # And potentially traverse the first item for schema
-                        first_item_path = f"{field_path}{cls._LTREE_SEPARATOR}0"
+                        first_item_path = f"{field_path}{LTREE_SEPARATOR}0"
                         nested_fields = cls._extract_block_schema(field_value[0], first_item_path)
                         fields.extend(nested_fields)
                     else:

orchestrator/search/llm_migration.py ADDED Viewed

@@ -0,0 +1,108 @@
+# Copyright 2019-2025 SURF
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Simple search migration function that runs when SEARCH_ENABLED = True."""
+from sqlalchemy import text
+from sqlalchemy.engine import Connection
+from structlog import get_logger
+from orchestrator.llm_settings import llm_settings
+from orchestrator.search.core.types import FieldType
+logger = get_logger(__name__)
+TABLE = "ai_search_index"
+TARGET_DIM = 1536
+def run_migration(connection: Connection) -> None:
+    """Run LLM migration with ON CONFLICT DO NOTHING pattern."""
+    logger.info("Running LLM migration")
+    try:
+        # Test to see if the extenstion exists and then skip the migration; Needed for certain situations where db user
+        # has insufficient priviledges to run the `CREATE EXTENSION ...` command.
+        res = connection.execute(text("SELECT * FROM pg_extension where extname = 'vector';"))
+        if llm_settings.LLM_FORCE_EXTENTION_MIGRATION or res.rowcount == 0:
+            # Create PostgreSQL extensions
+            logger.info("Attempting to run the extention creation;")
+            connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
+            connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
+            connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
+            connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
+        # Create field_type enum
+        field_type_values = "', '".join([ft.value for ft in FieldType])
+        connection.execute(
+            text(
+                f"""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'field_type') THEN
+                    CREATE TYPE field_type AS ENUM ('{field_type_values}');
+                END IF;
+            END $$;
+        """
+            )
+        )
+        # Create table with ON CONFLICT DO NOTHING pattern
+        connection.execute(
+            text(
+                f"""
+            CREATE TABLE IF NOT EXISTS {TABLE} (
+                entity_type TEXT NOT NULL,
+                entity_id UUID NOT NULL,
+                path LTREE NOT NULL,
+                value TEXT NOT NULL,
+                embedding VECTOR({TARGET_DIM}),
+                content_hash VARCHAR(64) NOT NULL,
+                value_type field_type NOT NULL DEFAULT '{FieldType.STRING.value}',
+                CONSTRAINT pk_ai_search_index PRIMARY KEY (entity_id, path)
+            );
+        """
+            )
+        )
+        # Drop default
+        connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
+        # Create indexes with IF NOT EXISTS
+        connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
+        connection.execute(
+            text(f"CREATE INDEX IF NOT EXISTS idx_ai_search_index_content_hash ON {TABLE} (content_hash);")
+        )
+        connection.execute(
+            text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_gist ON {TABLE} USING GIST (path gist_ltree_ops);")
+        )
+        connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_btree ON {TABLE} (path);"))
+        connection.execute(
+            text(f"CREATE INDEX IF NOT EXISTS ix_flat_value_trgm ON {TABLE} USING GIN (value gin_trgm_ops);")
+        )
+        connection.execute(
+            text(
+                f"CREATE INDEX IF NOT EXISTS ix_flat_embed_hnsw ON {TABLE} USING HNSW (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);"
+            )
+        )
+        connection.commit()
+        logger.info("LLM migration completed successfully")
+    except Exception as e:
+        logger.error("LLM migration failed", error=str(e))
+        raise Exception(
+            f"LLM migration failed. This likely means the pgvector extension "
+            f"is not installed. Please install pgvector and ensure your PostgreSQL "
+            f"version supports it. Error: {e}"
+        ) from e

orchestrator/search/retrieval/__init__.py CHANGED Viewed

@@ -1,3 +1,16 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .engine import execute_search
 __all__ = ["execute_search"]

orchestrator-core 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl

orchestrator-core 4.5.1a1py3-none-any.whl → 4.5.2py3-none-any.whl