PyPI - orchestrator-core - Versions diffs - 4.6.3__py3-none-any.whl → 4.6.4__py3-none-any.whl - Mend

orchestrator-core 4.6.3py3-none-any.whl → 4.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

orchestrator/__init__.py CHANGED Viewed

@@ -13,7 +13,7 @@
 """This is the orchestrator workflow engine."""
-__version__ = "4.6.3"
+__version__ = "4.6.4"
 from structlog import get_logger

orchestrator/cli/search/index_llm.py CHANGED Viewed

@@ -14,6 +14,7 @@ def subscriptions_command(
     subscription_id: str | None = typer.Option(None, help="UUID (default = all)"),
     dry_run: bool = typer.Option(False, help="No DB writes"),
     force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
+    show_progress: bool = typer.Option(False, help="Show per-entity progress"),
 ) -> None:
     """Index subscription_search_index."""
     run_indexing_for_entity(
@@ -21,6 +22,7 @@ def subscriptions_command(
         entity_id=subscription_id,
         dry_run=dry_run,
         force_index=force_index,
+        show_progress=show_progress,
     )
@@ -29,6 +31,7 @@ def products_command(
     product_id: str | None = typer.Option(None, help="UUID (default = all)"),
     dry_run: bool = typer.Option(False, help="No DB writes"),
     force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
+    show_progress: bool = typer.Option(False, help="Show per-entity progress"),
 ) -> None:
     """Index product_search_index."""
     run_indexing_for_entity(
@@ -36,6 +39,7 @@ def products_command(
         entity_id=product_id,
         dry_run=dry_run,
         force_index=force_index,
+        show_progress=show_progress,
     )
@@ -44,6 +48,7 @@ def processes_command(
     process_id: str | None = typer.Option(None, help="UUID (default = all)"),
     dry_run: bool = typer.Option(False, help="No DB writes"),
     force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
+    show_progress: bool = typer.Option(False, help="Show per-entity progress"),
 ) -> None:
     """Index process_search_index."""
     run_indexing_for_entity(
@@ -51,6 +56,7 @@ def processes_command(
         entity_id=process_id,
         dry_run=dry_run,
         force_index=force_index,
+        show_progress=show_progress,
     )
@@ -59,6 +65,7 @@ def workflows_command(
     workflow_id: str | None = typer.Option(None, help="UUID (default = all)"),
     dry_run: bool = typer.Option(False, help="No DB writes"),
     force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
+    show_progress: bool = typer.Option(False, help="Show per-entity progress"),
 ) -> None:
     """Index workflow_search_index."""
     run_indexing_for_entity(
@@ -66,6 +73,7 @@ def workflows_command(
         entity_id=workflow_id,
         dry_run=dry_run,
         force_index=force_index,
+        show_progress=show_progress,
     )

orchestrator/search/filters/date_filters.py CHANGED Viewed

@@ -14,7 +14,6 @@
 from datetime import date, datetime
 from typing import Annotated, Any, Literal
-from dateutil.parser import parse as dt_parse
 from pydantic import BaseModel, BeforeValidator, Field, model_validator
 from sqlalchemy import TIMESTAMP, and_
 from sqlalchemy import cast as sa_cast
@@ -27,10 +26,10 @@ def _validate_date_string(v: Any) -> Any:
     if not isinstance(v, str):
         return v
     try:
-        dt_parse(v)
+        datetime.fromisoformat(v)
         return v
     except Exception as exc:
-        raise ValueError("is not a valid date or datetime string") from exc
+        raise ValueError("is not a valid ISO-8601 date or datetime string") from exc
 DateValue = datetime | date | str
@@ -44,8 +43,8 @@ class DateRange(BaseModel):
     @model_validator(mode="after")
     def _order(self) -> "DateRange":
-        to_datetime = dt_parse(str(self.end))
-        from_datetime = dt_parse(str(self.start))
+        to_datetime = datetime.fromisoformat(str(self.end))
+        from_datetime = datetime.fromisoformat(str(self.start))
         if to_datetime <= from_datetime:
             raise ValueError("'to' must be after 'from'")
         return self

orchestrator/search/indexing/indexer.py CHANGED Viewed

@@ -45,6 +45,23 @@ def _maybe_begin(session: Session | None) -> Iterator[None]:
             yield
+@contextmanager
+def _maybe_progress(show_progress: bool, total_count: int | None, label: str) -> Iterator[Any]:
+    """Context manager that optionally creates a progress bar."""
+    if show_progress:
+        import typer
+        with typer.progressbar(
+            length=total_count,
+            label=label,
+            show_eta=True,
+            show_percent=bool(total_count),
+        ) as progress:
+            yield progress
+    else:
+        yield None
 class Indexer:
     """Index entities into `AiSearchIndex` using streaming reads and batched writes.
@@ -89,11 +106,21 @@ class Indexer:
         8) Repeat until the stream is exhausted.
     """
-    def __init__(self, config: EntityConfig, dry_run: bool, force_index: bool, chunk_size: int = 1000) -> None:
+    def __init__(
+        self,
+        config: EntityConfig,
+        dry_run: bool,
+        force_index: bool,
+        chunk_size: int = 1000,
+        show_progress: bool = False,
+        total_count: int | None = None,
+    ) -> None:
         self.config = config
         self.dry_run = dry_run
         self.force_index = force_index
         self.chunk_size = chunk_size
+        self.show_progress = show_progress
+        self.total_count = total_count
         self.embedding_model = llm_settings.EMBEDDING_MODEL
         self.logger = logger.bind(entity_kind=config.entity_kind.value)
         self._entity_titles: dict[str, str] = {}
@@ -116,13 +143,22 @@ class Indexer:
         with write_scope as database:
             session: Session | None = getattr(database, "session", None)
-            for entity in entities:
-                chunk.append(entity)
-                if len(chunk) >= self.chunk_size:
-                    flush()
-            if chunk:
-                flush()
+            with _maybe_progress(
+                self.show_progress, self.total_count, f"Indexing {self.config.entity_kind.value}"
+            ) as progress:
+                for entity in entities:
+                    chunk.append(entity)
+                    if len(chunk) >= self.chunk_size:
+                        flush()
+                        if progress:
+                            progress.update(self.chunk_size)
+                if chunk:
+                    flush()
+                    if progress:
+                        progress.update(len(chunk))
         final_log_message = (
             f"processed {total_records_processed} records and skipped {total_identical_records} identical records."

orchestrator/search/indexing/registry.py CHANGED Viewed

@@ -66,6 +66,21 @@ class EntityConfig(Generic[ModelT]):
         return "UNKNOWN"
+@dataclass(frozen=True)
+class ProcessConfig(EntityConfig[ProcessTable]):
+    """Processes need to eager load workflow for workflow_name field."""
+    def get_all_query(self, entity_id: str | None = None) -> Query | Select:
+        from sqlalchemy.orm import selectinload
+        # Only load workflow, not subscriptions (keeps it lightweight)
+        query = self.table.query.options(selectinload(ProcessTable.workflow))
+        if entity_id:
+            pk_column = getattr(self.table, self.pk_name)
+            query = query.filter(pk_column == UUID(entity_id))
+        return query
 @dataclass(frozen=True)
 class WorkflowConfig(EntityConfig[WorkflowTable]):
     """Workflows have a custom select() function that filters out deleted workflows."""
@@ -95,7 +110,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
         root_name="product",
         title_paths=["product.description", "product.name"],
     ),
-    EntityType.PROCESS: EntityConfig(
+    EntityType.PROCESS: ProcessConfig(
         entity_kind=EntityType.PROCESS,
         table=ProcessTable,
         traverser=ProcessTraverser,

orchestrator/search/indexing/tasks.py CHANGED Viewed

@@ -11,7 +11,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Any
 import structlog
+from sqlalchemy import func, select
 from sqlalchemy.orm import Query
 from orchestrator.db import db
@@ -23,12 +26,20 @@ from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
 logger = structlog.get_logger(__name__)
+def _get_entity_count(stmt: Any) -> int | None:
+    """Get total count of entities from a select statement."""
+    count_stmt = select(func.count()).select_from(stmt.subquery())
+    return db.session.execute(count_stmt).scalar()
 def run_indexing_for_entity(
     entity_kind: EntityType,
     entity_id: str | None = None,
     dry_run: bool = False,
     force_index: bool = False,
     chunk_size: int = 1000,
+    show_progress: bool = False,
 ) -> None:
     """Stream and index entities for the given kind.
@@ -46,6 +57,7 @@ def run_indexing_for_entity(
             existing hashes.
         chunk_size (int): Number of rows fetched per round-trip and passed to
             the indexer per batch.
+        show_progress (bool): When True, logs progress for each processed entity.
     Returns:
         None
@@ -60,10 +72,19 @@ def run_indexing_for_entity(
     else:
         stmt = q
+    total_count = _get_entity_count(stmt) if show_progress else None
     stmt = stmt.execution_options(stream_results=True, yield_per=chunk_size)
     entities = db.session.execute(stmt).scalars()
-    indexer = Indexer(config=config, dry_run=dry_run, force_index=force_index, chunk_size=chunk_size)
+    indexer = Indexer(
+        config=config,
+        dry_run=dry_run,
+        force_index=force_index,
+        chunk_size=chunk_size,
+        show_progress=show_progress,
+        total_count=total_count,
+    )
     with cache_subscription_models():
         indexer.run(entities)

orchestrator/search/indexing/traverse.py CHANGED Viewed

@@ -29,7 +29,7 @@ from orchestrator.domain.base import ProductBlockModel, ProductModel
 from orchestrator.domain.lifecycle import (
     lookup_specialized_type,
 )
-from orchestrator.schemas.process import ProcessSchema
+from orchestrator.schemas.process import ProcessBaseSchema
 from orchestrator.schemas.workflow import WorkflowSchema
 from orchestrator.search.core.exceptions import ModelLoadError, ProductNotInRegistryError
 from orchestrator.search.core.types import LTREE_SEPARATOR, ExtractedField, FieldType
@@ -307,17 +307,39 @@ class ProductTraverser(BaseTraverser):
 class ProcessTraverser(BaseTraverser):
-    """Traverser for process entities using ProcessSchema model.
+    """Traverser for process entities using ProcessBaseSchema.
-    Note: Currently extracts only top-level process fields. Could be extended to include:
-    - Related subscriptions (entity.subscriptions)
-    - Related workflow information beyond workflow_name
+    Only indexes top-level process fields (no subscriptions or steps)
+    to keep the index size manageable.
     """
+    EXCLUDED_FIELDS = {"traceback", "failed_reason"}
+    @classmethod
+    def _load_model(cls, entity: ProcessTable) -> ProcessBaseSchema | None:
+        return cls._load_model_with_schema(entity, ProcessBaseSchema, "process_id")
     @classmethod
-    def _load_model(cls, process: ProcessTable) -> ProcessSchema:
-        """Load process model using ProcessSchema."""
-        return cls._load_model_with_schema(process, ProcessSchema, "process_id")
+    def get_fields(cls, entity: ProcessTable, pk_name: str, root_name: str) -> list[ExtractedField]:  # type: ignore[override]
+        """Extract fields from process, excluding fields in EXCLUDED_FIELDS."""
+        try:
+            model = cls._load_model(entity)
+            if model is None:
+                return []
+            return sorted(
+                (
+                    field
+                    for field in cls.traverse(model, root_name)
+                    if field.path.split(LTREE_SEPARATOR)[-1] not in cls.EXCLUDED_FIELDS
+                ),
+                key=lambda f: f.path,
+            )
+        except (ProductNotInRegistryError, ModelLoadError) as e:
+            entity_id = getattr(entity, pk_name, "unknown")
+            logger.error(f"Failed to extract fields from {entity.__class__.__name__}", id=str(entity_id), error=str(e))
+            return []
 class WorkflowTraverser(BaseTraverser):

orchestrator/search/query/results.py CHANGED Viewed

@@ -139,6 +139,63 @@ def format_aggregation_response(
     )
+def truncate_text_with_highlights(
+    text: str, highlight_indices: list[tuple[int, int]] | None = None, max_length: int = 500, context_chars: int = 100
+) -> tuple[str, list[tuple[int, int]] | None]:
+    """Truncate text to max_length while preserving context around the first highlight.
+    Args:
+        text: The text to truncate
+        highlight_indices: List of (start, end) tuples indicating highlight positions, or None
+        max_length: Maximum length of the returned text
+        context_chars: Number of characters to show before and after the first highlight
+    Returns:
+        Tuple of (truncated_text, adjusted_highlight_indices)
+    """
+    # If text is short enough, return as-is
+    if len(text) <= max_length:
+        return text, highlight_indices
+    # If no highlights, truncate from beginning
+    if not highlight_indices:
+        truncated_text = text[:max_length]
+        suffix = "..." if len(text) > max_length else ""
+        return truncated_text + suffix, None
+    # Use first highlight to determine what to show
+    first_highlight_start = highlight_indices[0][0]
+    # Calculate start position: try to center around first highlight
+    start = max(0, first_highlight_start - context_chars)
+    end = min(len(text), start + max_length)
+    # Adjust start if we hit the end boundary
+    if end == len(text) and (end - start) < max_length:
+        start = max(0, end - max_length)
+    truncated_text = text[start:end]
+    # Add ellipsis to indicate truncation
+    truncated_from_start = start > 0
+    truncated_from_end = end < len(text)
+    if truncated_from_start:
+        truncated_text = "..." + truncated_text
+    if truncated_from_end:
+        truncated_text = truncated_text + "..."
+    # Adjust highlight indices to be relative to truncated text
+    offset = start - (3 if truncated_from_start else 0)  # Account for leading "..."
+    adjusted_indices = []
+    for hl_start, hl_end in highlight_indices:
+        # Only include highlights that are within the truncated range
+        if hl_start >= start and hl_end <= end:
+            adjusted_indices.append((hl_start - offset, hl_end - offset))
+    return truncated_text, adjusted_indices if adjusted_indices else None
 def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
     """Finds all occurrences of individual words from the term, including both word boundary and substring matches."""
     import re
@@ -201,8 +258,9 @@ def format_search_response(
             if not isinstance(path, str):
                 path = str(path)
-            highlight_indices = generate_highlight_indices(text, user_query) or None
-            matching_field = MatchingField(text=text, path=path, highlight_indices=highlight_indices)
+            highlight_indices = generate_highlight_indices(text, user_query)
+            truncated_text, adjusted_indices = truncate_text_with_highlights(text, highlight_indices)
+            matching_field = MatchingField(text=truncated_text, path=path, highlight_indices=adjusted_indices)
         elif not user_query and query.filters and metadata.search_type == "structured":
             # Structured search (filter-only)

orchestrator/search/retrieval/retrievers/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from .base import Retriever
 from .fuzzy import FuzzyRetriever
 from .hybrid import RrfHybridRetriever
+from .process import ProcessHybridRetriever
 from .semantic import SemanticRetriever
 from .structured import StructuredRetriever
@@ -21,6 +22,7 @@ __all__ = [
     "Retriever",
     "FuzzyRetriever",
     "RrfHybridRetriever",
+    "ProcessHybridRetriever",
     "SemanticRetriever",
     "StructuredRetriever",
 ]

orchestrator/search/retrieval/retrievers/base.py CHANGED Viewed

@@ -17,7 +17,7 @@ from decimal import Decimal
 import structlog
 from sqlalchemy import BindParameter, Numeric, Select, literal
-from orchestrator.search.core.types import FieldType, SearchMetadata
+from orchestrator.search.core.types import EntityType, FieldType, SearchMetadata
 from orchestrator.search.query.queries import ExportQuery, SelectQuery
 from ..pagination import PageCursor
@@ -63,12 +63,15 @@ class Retriever(ABC):
         Returns:
             A concrete retriever instance based on available search criteria
         """
         from .fuzzy import FuzzyRetriever
         from .hybrid import RrfHybridRetriever
+        from .process import ProcessHybridRetriever
         from .semantic import SemanticRetriever
         from .structured import StructuredRetriever
         fuzzy_term = query.fuzzy_term
+        is_process = query.entity_type == EntityType.PROCESS
         # If vector_query exists but embedding generation failed, fall back to fuzzy search with full query text
         if query_embedding is None and query.vector_query is not None and query.query_text is not None:
@@ -76,10 +79,14 @@ class Retriever(ABC):
         # Select retriever based on available search criteria
         if query_embedding is not None and fuzzy_term is not None:
+            if is_process:
+                return ProcessHybridRetriever(query_embedding, fuzzy_term, cursor)
             return RrfHybridRetriever(query_embedding, fuzzy_term, cursor)
         if query_embedding is not None:
             return SemanticRetriever(query_embedding, cursor)
         if fuzzy_term is not None:
+            if is_process:
+                return ProcessHybridRetriever(None, fuzzy_term, cursor)
             return FuzzyRetriever(fuzzy_term, cursor)
         return StructuredRetriever(cursor)

orchestrator/search/retrieval/retrievers/process.py ADDED Viewed

@@ -0,0 +1,225 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+from sqlalchemy import BindParameter, Select, String, and_, case, cast, func, literal, select
+from sqlalchemy.sql.expression import ColumnElement, Label
+from sqlalchemy_utils import LtreeType
+from orchestrator.db.models import AiSearchIndex, ProcessStepTable
+from orchestrator.search.core.types import SearchMetadata
+from .hybrid import RrfHybridRetriever, compute_rrf_hybrid_score_sql
+class ProcessHybridRetriever(RrfHybridRetriever):
+    """Process-specific hybrid retriever with process.last_step JSONB search.
+    Extends RrfHybridRetriever to include fuzzy search over the process.last_step
+    (JSONB) column. For process searches:
+    - Indexed fields (from AiSearchIndex): semantic + fuzzy search
+    - Last step JSONB field: fuzzy search only (no embeddings for dynamic data)
+    The retriever:
+    1. Gets field candidates from AiSearchIndex
+    2. Uses process.last_step JSONB column directly for fuzzy matching
+    3. Combines both sources (indexed + JSONB) in unified ranking
+    """
+    q_vec: list[float] | None  # type: ignore[assignment]  # Override parent's type to allow None for fuzzy-only search
+    def __init__(self, q_vec: list[float] | None, *args: Any, **kwargs: Any) -> None:
+        # ProcessHybridRetriever accepts None for q_vec (fuzzy-only search)
+        # We pass empty list to parent to satisfy type requirements, but override behavior in _get_semantic_distance_expr
+        super().__init__(q_vec or [], *args, **kwargs)
+        self.q_vec = q_vec
+    def _get_semantic_distance_expr(self) -> Label[Any]:
+        """Get semantic distance expression, handling optional q_vec."""
+        if self.q_vec is None:
+            return literal(1.0).label("semantic_distance")
+        from sqlalchemy import bindparam
+        q_param: BindParameter[list[float]] = bindparam("q_vec", type_=AiSearchIndex.embedding.type)
+        sem_expr = case(
+            (AiSearchIndex.embedding.is_(None), None),
+            else_=AiSearchIndex.embedding.op("<->")(q_param),
+        )
+        return func.coalesce(sem_expr, literal(1.0)).label("semantic_distance")
+    def _build_indexed_candidates(
+        self, cand: Any, sem_val: Label[Any], best_similarity: ColumnElement[Any], filter_condition: ColumnElement[Any]
+    ) -> Select:
+        """Build candidates from indexed fields in AiSearchIndex."""
+        return (
+            select(
+                AiSearchIndex.entity_id,
+                AiSearchIndex.entity_title,
+                AiSearchIndex.path,
+                AiSearchIndex.value,
+                sem_val,
+                best_similarity.label("fuzzy_score"),
+            )
+            .select_from(AiSearchIndex)
+            .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
+            .where(
+                and_(
+                    AiSearchIndex.value_type.in_(self.SEARCHABLE_FIELD_TYPES),
+                    filter_condition,
+                )
+            )
+            .limit(self.field_candidates_limit)
+        )
+    def _build_jsonb_candidates(self, cand: Any) -> Select:
+        """Build candidates from last process_step.state JSONB column."""
+        # Get the last step per process using LATERAL subquery
+        last_step_subq = (
+            select(ProcessStepTable.process_id, ProcessStepTable.state)
+            .where(ProcessStepTable.process_id == cand.c.entity_id)
+            .order_by(ProcessStepTable.completed_at.desc())
+            .limit(1)
+            .lateral("last_step")
+        )
+        # Cast JSONB to text for substring search
+        state_text = cast(last_step_subq.c.state, String)
+        jsonb_fuzzy_score = func.word_similarity(self.fuzzy_term, state_text)
+        jsonb_filter = state_text.ilike(f"%{self.fuzzy_term}%")
+        return (
+            select(
+                cand.c.entity_id,
+                cand.c.entity_title,
+                cast(literal("process.last_step.state"), LtreeType).label("path"),
+                state_text.label("value"),
+                literal(1.0).label("semantic_distance"),
+                jsonb_fuzzy_score.label("fuzzy_score"),
+            )
+            .select_from(cand)
+            .join(last_step_subq, literal(True))
+            .where(and_(last_step_subq.c.state.isnot(None), jsonb_filter))
+            .limit(self.field_candidates_limit)
+        )
+    def apply(self, candidate_query: Select) -> Select:
+        """Apply process-specific hybrid search with process.last_step JSONB.
+        Args:
+            candidate_query: Base query returning process entity_id candidates
+        Returns:
+            Select statement with RRF scoring including last step JSONB fields
+        """
+        cand = candidate_query.subquery()
+        best_similarity = func.word_similarity(self.fuzzy_term, AiSearchIndex.value)
+        sem_val = self._get_semantic_distance_expr()
+        filter_condition = literal(self.fuzzy_term).op("<%")(AiSearchIndex.value)
+        indexed_candidates = self._build_indexed_candidates(cand, sem_val, best_similarity, filter_condition)
+        jsonb_candidates = self._build_jsonb_candidates(cand)
+        field_candidates = indexed_candidates.union_all(jsonb_candidates).cte("field_candidates")
+        entity_scores = (
+            select(
+                field_candidates.c.entity_id,
+                field_candidates.c.entity_title,
+                func.avg(field_candidates.c.semantic_distance).label("avg_semantic_distance"),
+                func.avg(field_candidates.c.fuzzy_score).label("avg_fuzzy_score"),
+            ).group_by(field_candidates.c.entity_id, field_candidates.c.entity_title)
+        ).cte("entity_scores")
+        entity_highlights = (
+            select(
+                field_candidates.c.entity_id,
+                func.first_value(field_candidates.c.value)
+                .over(
+                    partition_by=field_candidates.c.entity_id,
+                    order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
+                )
+                .label(self.HIGHLIGHT_TEXT_LABEL),
+                func.first_value(field_candidates.c.path)
+                .over(
+                    partition_by=field_candidates.c.entity_id,
+                    order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
+                )
+                .label(self.HIGHLIGHT_PATH_LABEL),
+            ).distinct(field_candidates.c.entity_id)
+        ).cte("entity_highlights")
+        ranked = (
+            select(
+                entity_scores.c.entity_id,
+                entity_scores.c.entity_title,
+                entity_scores.c.avg_semantic_distance,
+                entity_scores.c.avg_fuzzy_score,
+                entity_highlights.c.highlight_text,
+                entity_highlights.c.highlight_path,
+                func.dense_rank()
+                .over(
+                    order_by=[entity_scores.c.avg_semantic_distance.asc().nulls_last(), entity_scores.c.entity_id.asc()]
+                )
+                .label("sem_rank"),
+                func.dense_rank()
+                .over(order_by=[entity_scores.c.avg_fuzzy_score.desc().nulls_last(), entity_scores.c.entity_id.asc()])
+                .label("fuzzy_rank"),
+            ).select_from(
+                entity_scores.join(entity_highlights, entity_scores.c.entity_id == entity_highlights.c.entity_id)
+            )
+        ).cte("ranked_results")
+        score_components = compute_rrf_hybrid_score_sql(
+            sem_rank_col=ranked.c.sem_rank,
+            fuzzy_rank_col=ranked.c.fuzzy_rank,
+            avg_fuzzy_score_col=ranked.c.avg_fuzzy_score,
+            k=self.k,
+            perfect_threshold=0.9,
+            score_numeric_type=self.SCORE_NUMERIC_TYPE,
+        )
+        perfect = score_components["perfect"]
+        normalized_score = score_components["normalized_score"]
+        score = cast(
+            func.round(cast(normalized_score, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION),
+            self.SCORE_NUMERIC_TYPE,
+        ).label(self.SCORE_LABEL)
+        stmt = select(
+            ranked.c.entity_id,
+            ranked.c.entity_title,
+            score,
+            ranked.c.highlight_text,
+            ranked.c.highlight_path,
+            perfect.label("perfect_match"),
+        ).select_from(ranked)
+        stmt = self._apply_fused_pagination(stmt, score, ranked.c.entity_id)
+        stmt = stmt.order_by(
+            score.desc().nulls_last(),
+            ranked.c.entity_id.asc(),
+        )
+        if self.q_vec is not None:
+            stmt = stmt.params(q_vec=self.q_vec)
+        return stmt
+    @property
+    def metadata(self) -> SearchMetadata:
+        return SearchMetadata.hybrid()

{orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: orchestrator-core
-Version: 4.6.3
+Version: 4.6.4
 Summary: This is the orchestrator workflow engine.
 Author-email: SURF <automation-beheer@surf.nl>
 Requires-Python: >=3.11,<3.15
@@ -42,7 +42,7 @@ Requires-Dist: fastapi-etag==0.4.0
 Requires-Dist: itsdangerous>=2.2.0
 Requires-Dist: jinja2==3.1.6
 Requires-Dist: more-itertools~=10.8.0
-Requires-Dist: nwa-stdlib~=1.10.3
+Requires-Dist: nwa-stdlib~=1.11.0
 Requires-Dist: oauth2-lib>=2.5.0
 Requires-Dist: orjson==3.11.4
 Requires-Dist: pgvector>=0.4.1
@@ -54,11 +54,11 @@ Requires-Dist: pydantic[email]~=2.12.4
 Requires-Dist: python-dateutil==2.9.0.post0
 Requires-Dist: python-rapidjson>=1.22,<1.23
 Requires-Dist: pytz==2025.2
-Requires-Dist: redis==5.1.1
+Requires-Dist: redis==5.3.1
 Requires-Dist: semver==3.0.4
 Requires-Dist: sentry-sdk[fastapi]>=2.29.1
 Requires-Dist: sqlalchemy==2.0.44
-Requires-Dist: sqlalchemy-utils==0.41.2
+Requires-Dist: sqlalchemy-utils==0.42.0
 Requires-Dist: strawberry-graphql>=0.281.0,<0.285.0
 Requires-Dist: structlog>=25.4.0
 Requires-Dist: tabulate==0.9.0

{orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-orchestrator/__init__.py,sha256=kUPxKX8HanrEfxBTsedmPwi5wHf5sPtu6DhGUR3R_I0,1454
+orchestrator/__init__.py,sha256=Lby3r_nYazHbsANmKYjUnfpWl5UDYWncINvMVs0RipU,1454
 orchestrator/agentic_app.py,sha256=ouiyyZiS4uS6Lox2DtbGGRnb2njJBMSHpSAGe-T5rX0,3028
 orchestrator/app.py,sha256=w8ubXaaogwjmwLM0TXqZaLkAhmaOTWzVlwiYbi5mHeE,13203
 orchestrator/exception_handlers.py,sha256=UsW3dw8q0QQlNLcV359bIotah8DYjMsj2Ts1LfX4ClY,1268
@@ -105,7 +105,7 @@ orchestrator/cli/helpers/input_helpers.py,sha256=pv5GTMuIWLzBE_bKNhn1XD_gxoqB0s1
 orchestrator/cli/helpers/print_helpers.py,sha256=b3ePg6HfBLKPYBBVr5XOA__JnFEMI5HBjbjov3CP8Po,859
 orchestrator/cli/search/__init__.py,sha256=K15_iW9ogR7xtX7qHDal4H09tmwVGnOBZWyPBLWhuzc,1274
 orchestrator/cli/search/display.py,sha256=PKy9sOTpq0WUdSfY2khLrIQ1OdAfsyl95ogF1Z6Dae0,3629
-orchestrator/cli/search/index_llm.py,sha256=RWPkFz5bxiznjpN1vMsSWeqcvYKB90DLL4pXQ92QJNI,2239
+orchestrator/cli/search/index_llm.py,sha256=VQlLNGXro4ZbehWZIMKPHGpGE4oF1at2bkWZssimWc4,2707
 orchestrator/cli/search/resize_embedding.py,sha256=iJdM7L6Kyq4CzRjXHWLwpGRiMnKK7xZ9133C0THebBE,4847
 orchestrator/cli/search/search_explore.py,sha256=LNAn6w13Q69fpv8CHcicHAbabrctrjGvwTjjJyC0AZY,8447
 orchestrator/cli/search/speedtest.py,sha256=J_l-8WxgN3YnqmwnbRhDyVbeqtvk3d2SfIpRBOJuhvE,4840
@@ -299,15 +299,15 @@ orchestrator/search/docs/index.md,sha256=zKzE2fbtHDfYTKaHg628wAsqCTOJ5yFUWV0ucFH
 orchestrator/search/docs/running_local_text_embedding_inference.md,sha256=OR0NVZMb8DqpgXYxlwDUrJwfRk0bYOk1-LkDMqsV6bU,1327
 orchestrator/search/filters/__init__.py,sha256=pmnHyq5SrqxS6eEiyhSIify776W9BIZ0gjOZFuYZ0nU,1335
 orchestrator/search/filters/base.py,sha256=lUr0eW0zi4oIMVUHuRD3GAQ9xEbHiFUl_EfAI6ABPVo,12456
-orchestrator/search/filters/date_filters.py,sha256=0a6nbUTK647_Qf4XXZMLDvBLVjF5Qqy9eJ-9SrTGaGg,3040
+orchestrator/search/filters/date_filters.py,sha256=DrmOcjL3v7bh93xjC4_Q0kkZV-yanBaUetwnYTRcAGI,3045
 orchestrator/search/filters/definitions.py,sha256=k30Dp1bEr3CWMeuIcF1wPgmmF3rxI9Urx-sCaPfaE3c,4607
 orchestrator/search/filters/ltree_filters.py,sha256=1OOmM5K90NsGBQmTqyoDlphdAOGd9r2rmz1rNItm8yk,2341
 orchestrator/search/filters/numeric_filter.py,sha256=do52w5Dmb5Rt4ipfX1iEObNcdymbWHtgS2HI1Otq-JQ,2771
 orchestrator/search/indexing/__init__.py,sha256=Or78bizNPiuNOgwLGJQ0mspCF1G_gSe5C9Ap7qi0MZk,662
-orchestrator/search/indexing/indexer.py,sha256=2qqDe6IlKfz1exh0xLBmpPdkTqbapLnTJORZneM6tmw,15320
-orchestrator/search/indexing/registry.py,sha256=N4YOUhNJfY6iBwPnn76tDcZaOVYJMA2SxwWhdBE85Xs,3716
-orchestrator/search/indexing/tasks.py,sha256=vmS1nnprPF74yitS0xGpP1dhSDis2nekMYF0v_jduDE,2478
-orchestrator/search/indexing/traverse.py,sha256=NKkKSri-if1d1vwzTQlDCF0hvBdB2IbWWuMdPrQ78Jg,14330
+orchestrator/search/indexing/indexer.py,sha256=4Oh-gspJrjhyecw87TK68lvGb3inVy2Sa8RlD_FHo3c,16357
+orchestrator/search/indexing/registry.py,sha256=V6Q4aRXHON1gSE6wsavEIfwHwCPicSzFBS2mqNExFGs,4305
+orchestrator/search/indexing/tasks.py,sha256=0p68RNwJnHSGZQjfdpyFsS2Ma5Gr2PpZROZgal_R1wI,3064
+orchestrator/search/indexing/traverse.py,sha256=JLut9t4LoPCWhJ_63VgYhRKfjwyxRv-mTbQLC8mA_mU,15158
 orchestrator/search/query/__init__.py,sha256=nCjvK_n2WQdV_ACrncFXEfnvLcHtuI__J7KLlFIaQvo,2437
 orchestrator/search/query/builder.py,sha256=kgnJ93TOCm8UTL5k09nWLsG4NXAlvFFa65gbciOwZ8E,10153
 orchestrator/search/query/engine.py,sha256=TFdV_sSoSXCSDSpyhVA2S6YaJysDSW2WtPj7duAyomk,5745
@@ -315,15 +315,16 @@ orchestrator/search/query/exceptions.py,sha256=DrkNzXVbQAOi28FTHKimf_eTrXmhYwXrH
 orchestrator/search/query/export.py,sha256=_0ncVpTqN6AoQfW3WX0fWnDQX3hBz6ZGC31Beu4PVwQ,6678
 orchestrator/search/query/mixins.py,sha256=BdVDzCOFDXT6N9LI_WrbVzGrk61UNplX-UZPvD0rEV0,3019
 orchestrator/search/query/queries.py,sha256=j1uKSQgF_ifVaDJaxjs4h2z48KqGVEIKCXOoJ7Ur9Mk,3805
-orchestrator/search/query/results.py,sha256=8k3o7F5EESuNOdNRPsmv0klPAG9YlPisCR4iGtewiXg,8662
+orchestrator/search/query/results.py,sha256=5OgAs39oncDIBdpB3NJltPr-UvLvLlxTWw9sn-lyfQA,10989
 orchestrator/search/query/state.py,sha256=fMSBJs39kZTkpDE2T4h4x0x-51GqUvzAuePg2YUbO6I,3220
 orchestrator/search/query/validation.py,sha256=m0xJ71A0Qa5hm8b71zKRjSVpPrnkG7LbqPu4lv_GboI,8260
 orchestrator/search/retrieval/__init__.py,sha256=q5G0z3nKjIHKFs1PkEG3nvTUy3Wp4kCyBtCbqUITj3A,579
 orchestrator/search/retrieval/pagination.py,sha256=kcUzq1QQk4GrZq02M4hsKwAelUo1qDeCqsXImLUK6DA,3006
-orchestrator/search/retrieval/retrievers/__init__.py,sha256=1bGmbae0GYRM6e1vxf0ww79NaTSmfOMe9S0pPVmh3CM,897
-orchestrator/search/retrieval/retrievers/base.py,sha256=Xfjo-LD_fltKy0xhBBUSsmm2v2TNygDotWQ0wTTyvVE,4155
+orchestrator/search/retrieval/retrievers/__init__.py,sha256=dJlN6a0oHSquzjE5POYxrMGOXMx4Bx2khbJI-rA_qwg,971
+orchestrator/search/retrieval/retrievers/base.py,sha256=esdYrkyUjwjpg-fg7BurOMe7WCTUr2cjxHqdMKDc3DI,4490
 orchestrator/search/retrieval/retrievers/fuzzy.py,sha256=PLp_ANRLzmtGQP1t9X4jt43_JLKDnOxWU2xqlexSH1U,3779
 orchestrator/search/retrieval/retrievers/hybrid.py,sha256=l-7J4qct0h28wSi0KvdFJw2lyh3jyobbrCbg0PuX-4I,11141
+orchestrator/search/retrieval/retrievers/process.py,sha256=_nEEYex9iO4iBVrn6VCbvSIHf7Kb76c6id2krs-uef0,9255
 orchestrator/search/retrieval/retrievers/semantic.py,sha256=36ky_A_LNWs13IYe809qy1RPrd0Fab-G-9pf2ZDARhA,3905
 orchestrator/search/retrieval/retrievers/structured.py,sha256=13TxC52fpNGXHnPX40J2GczRYFk8LAvWn2a0HWZCd2Q,1426
 orchestrator/services/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
@@ -379,7 +380,7 @@ orchestrator/workflows/tasks/resume_workflows.py,sha256=T3iobSJjVgiupe0rClD34kUZ
 orchestrator/workflows/tasks/validate_product_type.py,sha256=lo2TX_MZOfcOmYFjLyD82FrJ5AAN3HOsE6BhDVFuy9Q,3210
 orchestrator/workflows/tasks/validate_products.py,sha256=GZJBoFF-WMphS7ghMs2-gqvV2iL1F0POhk0uSNt93n0,8510
 orchestrator/workflows/translations/en-GB.json,sha256=Gc5gy_RghZOeSNcJIntAsz_7DsCg8n_vzoHBPXxCn_U,908
-orchestrator_core-4.6.3.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
-orchestrator_core-4.6.3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
-orchestrator_core-4.6.3.dist-info/METADATA,sha256=nw7A7L5C76TZYOZ8NycRKwxH13tMucr4JkghhrztitM,6416
-orchestrator_core-4.6.3.dist-info/RECORD,,
+orchestrator_core-4.6.4.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
+orchestrator_core-4.6.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+orchestrator_core-4.6.4.dist-info/METADATA,sha256=wtpq1zMwlyS21ZMcodbT338EwqmKBXWV28KAcyJRStg,6416
+orchestrator_core-4.6.4.dist-info/RECORD,,

{orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

orchestrator-core 4.6.3__py3-none-any.whl → 4.6.4__py3-none-any.whl

orchestrator-core 4.6.3py3-none-any.whl → 4.6.4py3-none-any.whl