PyPI - orchestrator-core - Versions diffs - 4.5.3__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

orchestrator-core 4.5.3py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

orchestrator/__init__.py +2 -2
orchestrator/agentic_app.py +3 -23
orchestrator/api/api_v1/api.py +5 -0
orchestrator/api/api_v1/endpoints/agent.py +49 -0
orchestrator/api/api_v1/endpoints/search.py +120 -201
orchestrator/app.py +1 -1
orchestrator/cli/database.py +3 -0
orchestrator/cli/generate.py +11 -4
orchestrator/cli/generator/generator/migration.py +7 -3
orchestrator/cli/main.py +1 -1
orchestrator/cli/scheduler.py +15 -22
orchestrator/cli/search/resize_embedding.py +28 -22
orchestrator/cli/search/speedtest.py +4 -6
orchestrator/db/__init__.py +6 -0
orchestrator/db/models.py +75 -0
orchestrator/llm_settings.py +18 -1
orchestrator/migrations/helpers.py +47 -39
orchestrator/schedules/scheduler.py +32 -15
orchestrator/schedules/validate_products.py +1 -1
orchestrator/schemas/search.py +8 -85
orchestrator/search/agent/__init__.py +2 -2
orchestrator/search/agent/agent.py +26 -30
orchestrator/search/agent/json_patch.py +51 -0
orchestrator/search/agent/prompts.py +35 -9
orchestrator/search/agent/state.py +28 -2
orchestrator/search/agent/tools.py +192 -53
orchestrator/search/core/embedding.py +2 -2
orchestrator/search/core/exceptions.py +6 -0
orchestrator/search/core/types.py +1 -0
orchestrator/search/export.py +199 -0
orchestrator/search/indexing/indexer.py +13 -4
orchestrator/search/indexing/registry.py +14 -1
orchestrator/search/llm_migration.py +55 -0
orchestrator/search/retrieval/__init__.py +3 -2
orchestrator/search/retrieval/builder.py +5 -1
orchestrator/search/retrieval/engine.py +66 -23
orchestrator/search/retrieval/pagination.py +46 -56
orchestrator/search/retrieval/query_state.py +61 -0
orchestrator/search/retrieval/retrievers/base.py +26 -40
orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
orchestrator/search/retrieval/retrievers/semantic.py +9 -8
orchestrator/search/retrieval/retrievers/structured.py +6 -6
orchestrator/search/schemas/parameters.py +17 -13
orchestrator/search/schemas/results.py +4 -1
orchestrator/settings.py +1 -0
orchestrator/utils/auth.py +3 -2
orchestrator/workflow.py +23 -6
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +16 -11
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +52 -48
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.3.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/indexing/registry.py CHANGED Viewed

@@ -25,7 +25,7 @@ from orchestrator.db import (
     WorkflowTable,
 )
 from orchestrator.db.database import BaseModel
-from orchestrator.search.core.types import EntityType
+from orchestrator.search.core.types import EntityType, ExtractedField
 from .traverse import (
     BaseTraverser,
@@ -48,6 +48,7 @@ class EntityConfig(Generic[ModelT]):
     traverser: "type[BaseTraverser]"
     pk_name: str
     root_name: str
+    title_paths: list[str]  # List of field paths to check for title (with fallback)
     def get_all_query(self, entity_id: str | None = None) -> Query | Select:
         query = self.table.query
@@ -56,6 +57,14 @@ class EntityConfig(Generic[ModelT]):
             query = query.filter(pk_column == UUID(entity_id))
         return query
+    def get_title_from_fields(self, fields: list[ExtractedField]) -> str:
+        """Extract title from fields using configured paths."""
+        for title_path in self.title_paths:
+            for field in fields:
+                if field.path == title_path and field.value:
+                    return str(field.value)
+        return "UNKNOWN"
 @dataclass(frozen=True)
 class WorkflowConfig(EntityConfig[WorkflowTable]):
@@ -76,6 +85,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
         traverser=SubscriptionTraverser,
         pk_name="subscription_id",
         root_name="subscription",
+        title_paths=["subscription.description"],
     ),
     EntityType.PRODUCT: EntityConfig(
         entity_kind=EntityType.PRODUCT,
@@ -83,6 +93,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
         traverser=ProductTraverser,
         pk_name="product_id",
         root_name="product",
+        title_paths=["product.description", "product.name"],
     ),
     EntityType.PROCESS: EntityConfig(
         entity_kind=EntityType.PROCESS,
@@ -90,6 +101,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
         traverser=ProcessTraverser,
         pk_name="process_id",
         root_name="process",
+        title_paths=["process.workflow_name"],
     ),
     EntityType.WORKFLOW: WorkflowConfig(
         entity_kind=EntityType.WORKFLOW,
@@ -97,5 +109,6 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
         traverser=WorkflowTraverser,
         pk_name="workflow_id",
         root_name="workflow",
+        title_paths=["workflow.description", "workflow.name"],
     ),
 }

orchestrator/search/llm_migration.py CHANGED Viewed

@@ -37,6 +37,7 @@ def run_migration(connection: Connection) -> None:
         if llm_settings.LLM_FORCE_EXTENTION_MIGRATION or res.rowcount == 0:
             # Create PostgreSQL extensions
             logger.info("Attempting to run the extention creation;")
+            connection.execute(text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'))
             connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
             connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
             connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
@@ -64,6 +65,7 @@ def run_migration(connection: Connection) -> None:
             CREATE TABLE IF NOT EXISTS {TABLE} (
                 entity_type TEXT NOT NULL,
                 entity_id UUID NOT NULL,
+                entity_title TEXT,
                 path LTREE NOT NULL,
                 value TEXT NOT NULL,
                 embedding VECTOR({TARGET_DIM}),
@@ -78,6 +80,23 @@ def run_migration(connection: Connection) -> None:
         # Drop default
         connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
+        # Add entity_title column if it doesn't exist (for existing installations)
+        connection.execute(
+            text(
+                f"""
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM information_schema.columns
+                    WHERE table_name = '{TABLE}' AND column_name = 'entity_title'
+                ) THEN
+                    ALTER TABLE {TABLE} ADD COLUMN entity_title TEXT;
+                END IF;
+            END $$;
+        """
+            )
+        )
         # Create indexes with IF NOT EXISTS
         connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
         connection.execute(
@@ -96,6 +115,42 @@ def run_migration(connection: Connection) -> None:
             )
         )
+        # Create agent_runs table
+        connection.execute(
+            text(
+                """
+            CREATE TABLE IF NOT EXISTS agent_runs (
+                run_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+                agent_type VARCHAR(50) NOT NULL,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL
+            );
+        """
+            )
+        )
+        connection.execute(text("CREATE INDEX IF NOT EXISTS ix_agent_runs_created_at ON agent_runs (created_at);"))
+        # Create search_queries table
+        connection.execute(
+            text(
+                f"""
+            CREATE TABLE IF NOT EXISTS search_queries (
+                query_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+                run_id UUID,
+                query_number INTEGER NOT NULL,
+                parameters JSONB NOT NULL,
+                query_embedding VECTOR({TARGET_DIM}),
+                executed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL,
+                CONSTRAINT fk_search_queries_run_id FOREIGN KEY (run_id) REFERENCES agent_runs(run_id) ON DELETE CASCADE
+            );
+        """
+            )
+        )
+        connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_run_id ON search_queries (run_id);"))
+        connection.execute(
+            text("CREATE INDEX IF NOT EXISTS ix_search_queries_executed_at ON search_queries (executed_at);")
+        )
+        connection.execute(text("CREATE INDEX IF NOT EXISTS ix_search_queries_query_id ON search_queries (query_id);"))
         connection.commit()
         logger.info("LLM migration completed successfully")

orchestrator/search/retrieval/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .engine import execute_search
+from .engine import execute_search, execute_search_for_export
+from .query_state import SearchQueryState
-__all__ = ["execute_search"]
+__all__ = ["execute_search", "execute_search_for_export", "SearchQueryState"]

orchestrator/search/retrieval/builder.py CHANGED Viewed

@@ -43,7 +43,11 @@ def build_candidate_query(params: BaseSearchParameters) -> Select:
         Select: The SQLAlchemy `Select` object representing the query.
     """
-    stmt = select(AiSearchIndex.entity_id).where(AiSearchIndex.entity_type == params.entity_type.value).distinct()
+    stmt = (
+        select(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
+        .where(AiSearchIndex.entity_type == params.entity_type.value)
+        .distinct()
+    )
     if params.filters is not None:
         entity_id_col = AiSearchIndex.entity_id

orchestrator/search/retrieval/engine.py CHANGED Viewed

@@ -17,13 +17,15 @@ import structlog
 from sqlalchemy.engine.row import RowMapping
 from sqlalchemy.orm import Session
+from orchestrator.search.core.embedding import QueryEmbedder
 from orchestrator.search.core.types import FilterOp, SearchMetadata
 from orchestrator.search.filters import FilterTree, LtreeFilter
 from orchestrator.search.schemas.parameters import BaseSearchParameters
 from orchestrator.search.schemas.results import MatchingField, SearchResponse, SearchResult
 from .builder import build_candidate_query
-from .pagination import PaginationParams
+from .pagination import PageCursor
+from .query_state import SearchQueryState
 from .retrievers import Retriever
 from .utils import generate_highlight_indices
@@ -74,9 +76,15 @@ def _format_response(
             # Structured search (filter-only)
             matching_field = _extract_matching_field_from_filters(search_params.filters)
+        entity_title = row.get("entity_title", "")
+        if not isinstance(entity_title, str):
+            entity_title = str(entity_title) if entity_title is not None else ""
         results.append(
             SearchResult(
                 entity_id=str(row.entity_id),
+                entity_type=search_params.entity_type,
+                entity_title=entity_title,
                 score=row.score,
                 perfect_match=row.get("perfect_match", 0),
                 matching_field=matching_field,
@@ -110,45 +118,80 @@ def _extract_matching_field_from_filters(filters: FilterTree) -> MatchingField |
     return MatchingField(text=text, path=pf.path, highlight_indices=[(0, len(text))])
-async def execute_search(
+async def _execute_search_internal(
     search_params: BaseSearchParameters,
     db_session: Session,
-    pagination_params: PaginationParams | None = None,
+    limit: int,
+    cursor: PageCursor | None = None,
+    query_embedding: list[float] | None = None,
 ) -> SearchResponse:
-    """Execute a hybrid search and return ranked results.
-    Builds a candidate entity query based on the given search parameters,
-    applies the appropriate ranking strategy, and executes the final ranked
-    query to retrieve results.
+    """Internal function to execute search with specified parameters.
     Args:
-        search_params (BaseSearchParameters): The search parameters specifying vector, fuzzy, or filter criteria.
-        db_session (Session): The active SQLAlchemy session for executing the query.
-        pagination_params (PaginationParams): Parameters controlling pagination of the search results.
-        limit (int, optional): The maximum number of search results to return, by default 5.
+        search_params: The search parameters specifying vector, fuzzy, or filter criteria.
+        db_session: The active SQLAlchemy session for executing the query.
+        limit: Maximum number of results to return.
+        cursor: Optional pagination cursor.
+        query_embedding: Optional pre-computed query embedding to use instead of generating a new one.
     Returns:
-        SearchResponse: A list of `SearchResult` objects containing entity IDs, scores,
-        and optional highlight metadata.
-    Notes:
-        If no vector query, filters, or fuzzy term are provided, a warning is logged
-        and an empty result set is returned.
+        SearchResponse with results and embedding (for internal use).
     """
     if not search_params.vector_query and not search_params.filters and not search_params.fuzzy_term:
         logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
         return SearchResponse(results=[], metadata=SearchMetadata.empty())
     candidate_query = build_candidate_query(search_params)
-    pagination_params = pagination_params or PaginationParams()
-    retriever = await Retriever.from_params(search_params, pagination_params)
+    if search_params.vector_query and not query_embedding:
+        query_embedding = await QueryEmbedder.generate_for_text_async(search_params.vector_query)
+    retriever = await Retriever.route(search_params, cursor, query_embedding)
     logger.debug("Using retriever", retriever_type=retriever.__class__.__name__)
     final_stmt = retriever.apply(candidate_query)
-    final_stmt = final_stmt.limit(search_params.limit)
+    final_stmt = final_stmt.limit(limit)
     logger.debug(final_stmt)
     result = db_session.execute(final_stmt).mappings().all()
-    return _format_response(result, search_params, retriever.metadata)
+    response = _format_response(result, search_params, retriever.metadata)
+    # Store embedding in response for agent to save to DB
+    response.query_embedding = query_embedding
+    return response
+async def execute_search(
+    search_params: BaseSearchParameters,
+    db_session: Session,
+    cursor: PageCursor | None = None,
+    query_embedding: list[float] | None = None,
+) -> SearchResponse:
+    """Execute a search and return ranked results."""
+    return await _execute_search_internal(search_params, db_session, search_params.limit, cursor, query_embedding)
+async def execute_search_for_export(
+    query_state: SearchQueryState,
+    db_session: Session,
+) -> list[dict]:
+    """Execute a search for export and fetch flattened entity data.
+    Args:
+        query_state: Query state containing parameters and query_embedding.
+        db_session: The active SQLAlchemy session for executing the query.
+    Returns:
+        List of flattened entity records suitable for export.
+    """
+    from orchestrator.search.export import fetch_export_data
+    search_response = await _execute_search_internal(
+        search_params=query_state.parameters,
+        db_session=db_session,
+        limit=query_state.parameters.export_limit,
+        query_embedding=query_state.query_embedding,
+    )
+    entity_ids = [res.entity_id for res in search_response.results]
+    return fetch_export_data(query_state.parameters.entity_type, entity_ids)

orchestrator/search/retrieval/pagination.py CHANGED Viewed

@@ -11,42 +11,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import array
 import base64
-from dataclasses import dataclass
+from uuid import UUID
 from pydantic import BaseModel
+from orchestrator.db import SearchQueryTable, db
 from orchestrator.search.core.exceptions import InvalidCursorError
-from orchestrator.search.schemas.parameters import BaseSearchParameters
-from orchestrator.search.schemas.results import SearchResult
-@dataclass
-class PaginationParams:
-    """Parameters for pagination in search queries."""
-    page_after_score: float | None = None
-    page_after_id: str | None = None
-    q_vec_override: list[float] | None = None
-def floats_to_b64(v: list[float]) -> str:
-    a = array.array("f", v)
-    return base64.urlsafe_b64encode(a.tobytes()).decode("ascii")
-def b64_to_floats(s: str) -> list[float]:
-    raw = base64.urlsafe_b64decode(s.encode("ascii"))
-    a = array.array("f")
-    a.frombytes(raw)
-    return list(a)
+from orchestrator.search.schemas.parameters import SearchParameters
+from orchestrator.search.schemas.results import SearchResponse
 class PageCursor(BaseModel):
     score: float
     id: str
-    q_vec_b64: str
+    query_id: UUID
     def encode(self) -> str:
         """Encode the cursor data into a URL-safe Base64 string."""
@@ -63,34 +42,45 @@ class PageCursor(BaseModel):
             raise InvalidCursorError("Invalid pagination cursor") from e
-async def process_pagination_cursor(cursor: str | None, search_params: BaseSearchParameters) -> PaginationParams:
-    """Process pagination cursor and return pagination parameters."""
-    if cursor:
-        c = PageCursor.decode(cursor)
-        return PaginationParams(
-            page_after_score=c.score,
-            page_after_id=c.id,
-            q_vec_override=b64_to_floats(c.q_vec_b64),
-        )
-    if search_params.vector_query:
-        from orchestrator.search.core.embedding import QueryEmbedder
-        q_vec_override = await QueryEmbedder.generate_for_text_async(search_params.vector_query)
-        return PaginationParams(q_vec_override=q_vec_override)
-    return PaginationParams()
-def create_next_page_cursor(
-    search_results: list[SearchResult], pagination_params: PaginationParams, limit: int
+def encode_next_page_cursor(
+    search_response: SearchResponse,
+    cursor: PageCursor | None,
+    search_params: SearchParameters,
 ) -> str | None:
-    """Create next page cursor if there are more results."""
-    has_next_page = len(search_results) == limit and limit > 0
-    if has_next_page:
-        last_item = search_results[-1]
-        cursor_data = PageCursor(
-            score=float(last_item.score),
-            id=last_item.entity_id,
-            q_vec_b64=floats_to_b64(pagination_params.q_vec_override or []),
-        )
-        return cursor_data.encode()
-    return None
+    """Create next page cursor if there are more results.
+    On first page, saves the query to database and includes query_id in cursor
+    for subsequent pages to ensure consistent parameters across pagination.
+    Args:
+        search_response: SearchResponse containing results and query_embedding
+        cursor: Current page cursor (None for first page, PageCursor for subsequent pages)
+        search_params: Search parameters to save for pagination consistency
+    Returns:
+        Encoded cursor for next page, or None if no more results
+    """
+    from orchestrator.search.retrieval.query_state import SearchQueryState
+    has_next_page = len(search_response.results) == search_params.limit and search_params.limit > 0
+    if not has_next_page:
+        return None
+    # If this is the first page, save query state to database
+    if cursor is None:
+        query_state = SearchQueryState(parameters=search_params, query_embedding=search_response.query_embedding)
+        search_query = SearchQueryTable.from_state(state=query_state)
+        db.session.add(search_query)
+        db.session.commit()
+        query_id = search_query.query_id
+    else:
+        query_id = cursor.query_id
+    last_item = search_response.results[-1]
+    cursor_data = PageCursor(
+        score=float(last_item.score),
+        id=last_item.entity_id,
+        query_id=query_id,
+    )
+    return cursor_data.encode()

orchestrator/search/retrieval/query_state.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from uuid import UUID
+from pydantic import BaseModel, ConfigDict, Field
+from orchestrator.db import SearchQueryTable, db
+from orchestrator.search.core.exceptions import QueryStateNotFoundError
+from orchestrator.search.schemas.parameters import SearchParameters
+class SearchQueryState(BaseModel):
+    """State of a search query including parameters and embedding.
+    This model provides a complete snapshot of what was searched and how.
+    Used for both agent and regular API searches.
+    """
+    parameters: SearchParameters = Field(discriminator="entity_type")
+    query_embedding: list[float] | None = Field(default=None, description="The embedding vector for semantic search")
+    model_config = ConfigDict(from_attributes=True)
+    @classmethod
+    def load_from_id(cls, query_id: UUID | str) -> "SearchQueryState":
+        """Load query state from database by query_id.
+        Args:
+            query_id: UUID or string UUID of the saved query
+        Returns:
+            SearchQueryState loaded from database
+        Raises:
+            ValueError: If query_id format is invalid
+            QueryStateNotFoundError: If query not found in database
+        """
+        if isinstance(query_id, UUID):
+            query_uuid = query_id
+        else:
+            try:
+                query_uuid = UUID(query_id)
+            except (ValueError, TypeError) as e:
+                raise ValueError(f"Invalid query_id format: {query_id}") from e
+        search_query = db.session.query(SearchQueryTable).filter_by(query_id=query_uuid).first()
+        if not search_query:
+            raise QueryStateNotFoundError(f"Query {query_uuid} not found in database")
+        return cls.model_validate(search_query)

orchestrator/search/retrieval/retrievers/base.py CHANGED Viewed

@@ -20,7 +20,7 @@ from sqlalchemy import BindParameter, Numeric, Select, literal
 from orchestrator.search.core.types import FieldType, SearchMetadata
 from orchestrator.search.schemas.parameters import BaseSearchParameters
-from ..pagination import PaginationParams
+from ..pagination import PageCursor
 logger = structlog.get_logger(__name__)
@@ -41,62 +41,48 @@ class Retriever(ABC):
     ]
     @classmethod
-    async def from_params(
+    async def route(
         cls,
         params: BaseSearchParameters,
-        pagination_params: PaginationParams,
+        cursor: PageCursor | None,
+        query_embedding: list[float] | None = None,
     ) -> "Retriever":
-        """Create the appropriate retriever instance from search parameters.
+        """Route to the appropriate retriever instance based on search parameters.
+        Selects the retriever type based on available search criteria:
+        - Hybrid: both embedding and fuzzy term available
+        - Semantic: only embedding available
+        - Fuzzy: only text term available (or fallback when embedding generation fails)
+        - Structured: only filters available
         Args:
-            params (BaseSearchParameters): Search parameters including vector queries, fuzzy terms, and filters.
-            pagination_params (PaginationParams): Pagination parameters for cursor-based paging.
+            params: Search parameters including vector queries, fuzzy terms, and filters
+            cursor: Pagination cursor for cursor-based paging
+            query_embedding: Query embedding for semantic search, or None if not available
         Returns:
-            Retriever: A concrete retriever instance (semantic, fuzzy, hybrid, or structured).
+            A concrete retriever instance based on available search criteria
         """
         from .fuzzy import FuzzyRetriever
         from .hybrid import RrfHybridRetriever
         from .semantic import SemanticRetriever
         from .structured import StructuredRetriever
         fuzzy_term = params.fuzzy_term
-        q_vec = await cls._get_query_vector(params.vector_query, pagination_params.q_vec_override)
-        # If semantic search was attempted but failed, fall back to fuzzy with the full query
-        fallback_fuzzy_term = fuzzy_term
-        if q_vec is None and params.vector_query is not None and params.query is not None:
-            fallback_fuzzy_term = params.query
-        if q_vec is not None and fallback_fuzzy_term is not None:
-            return RrfHybridRetriever(q_vec, fallback_fuzzy_term, pagination_params)
-        if q_vec is not None:
-            return SemanticRetriever(q_vec, pagination_params)
-        if fallback_fuzzy_term is not None:
-            return FuzzyRetriever(fallback_fuzzy_term, pagination_params)
-        return StructuredRetriever(pagination_params)
-    @classmethod
-    async def _get_query_vector(
-        cls, vector_query: str | None, q_vec_override: list[float] | None
-    ) -> list[float] | None:
-        """Get query vector either from override or by generating from text."""
-        if q_vec_override:
-            return q_vec_override
-        if not vector_query:
-            return None
-        from orchestrator.search.core.embedding import QueryEmbedder
+        # If vector_query exists but embedding generation failed, fall back to fuzzy search with full query
+        if query_embedding is None and params.vector_query is not None and params.query is not None:
+            fuzzy_term = params.query
-        q_vec = await QueryEmbedder.generate_for_text_async(vector_query)
-        if not q_vec:
-            logger.warning("Embedding generation failed; using non-semantic retriever")
-            return None
+        # Select retriever based on available search criteria
+        if query_embedding is not None and fuzzy_term is not None:
+            return RrfHybridRetriever(query_embedding, fuzzy_term, cursor)
+        if query_embedding is not None:
+            return SemanticRetriever(query_embedding, cursor)
+        if fuzzy_term is not None:
+            return FuzzyRetriever(fuzzy_term, cursor)
-        return q_vec
+        return StructuredRetriever(cursor)
     @abstractmethod
     def apply(self, candidate_query: Select) -> Select:

orchestrator/search/retrieval/retrievers/fuzzy.py CHANGED Viewed

@@ -17,17 +17,16 @@ from sqlalchemy.sql.expression import ColumnElement
 from orchestrator.db.models import AiSearchIndex
 from orchestrator.search.core.types import SearchMetadata
-from ..pagination import PaginationParams
+from ..pagination import PageCursor
 from .base import Retriever
 class FuzzyRetriever(Retriever):
     """Ranks results based on the max of fuzzy text similarity scores."""
-    def __init__(self, fuzzy_term: str, pagination_params: PaginationParams) -> None:
+    def __init__(self, fuzzy_term: str, cursor: PageCursor | None) -> None:
         self.fuzzy_term = fuzzy_term
-        self.page_after_score = pagination_params.page_after_score
-        self.page_after_id = pagination_params.page_after_id
+        self.cursor = cursor
     def apply(self, candidate_query: Select) -> Select:
         cand = candidate_query.subquery()
@@ -42,6 +41,7 @@ class FuzzyRetriever(Retriever):
         combined_query = (
             select(
                 AiSearchIndex.entity_id,
+                AiSearchIndex.entity_title,
                 score,
                 func.first_value(AiSearchIndex.value)
                 .over(partition_by=AiSearchIndex.entity_id, order_by=[similarity_expr.desc(), AiSearchIndex.path.asc()])
@@ -58,12 +58,13 @@ class FuzzyRetriever(Retriever):
                     literal(self.fuzzy_term).op("<%")(AiSearchIndex.value),
                 )
             )
-            .distinct(AiSearchIndex.entity_id)
+            .distinct(AiSearchIndex.entity_id, AiSearchIndex.entity_title)
         )
         final_query = combined_query.subquery("ranked_fuzzy")
         stmt = select(
             final_query.c.entity_id,
+            final_query.c.entity_title,
             final_query.c.score,
             final_query.c.highlight_text,
             final_query.c.highlight_path,
@@ -81,13 +82,13 @@ class FuzzyRetriever(Retriever):
         self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
     ) -> Select:
         """Apply standard score + entity_id pagination."""
-        if self.page_after_score is not None and self.page_after_id is not None:
+        if self.cursor is not None:
             stmt = stmt.where(
                 or_(
-                    score_column < self.page_after_score,
+                    score_column < self.cursor.score,
                     and_(
-                        score_column == self.page_after_score,
-                        entity_id_column > self.page_after_id,
+                        score_column == self.cursor.score,
+                        entity_id_column > self.cursor.id,
                     ),
                 )
             )

orchestrator-core 4.5.3__py3-none-any.whl → 4.6.0__py3-none-any.whl

orchestrator-core 4.5.3py3-none-any.whl → 4.6.0py3-none-any.whl