PyPI - orchestrator-core - Versions diffs - 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

orchestrator-core 4.5.2py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

orchestrator/__init__.py +2 -2
orchestrator/agentic_app.py +3 -23
orchestrator/api/api_v1/api.py +5 -0
orchestrator/api/api_v1/endpoints/agent.py +49 -0
orchestrator/api/api_v1/endpoints/search.py +120 -201
orchestrator/app.py +1 -1
orchestrator/cli/database.py +3 -0
orchestrator/cli/generate.py +11 -4
orchestrator/cli/generator/generator/migration.py +7 -3
orchestrator/cli/main.py +1 -1
orchestrator/cli/scheduler.py +15 -22
orchestrator/cli/search/resize_embedding.py +28 -22
orchestrator/cli/search/speedtest.py +4 -6
orchestrator/db/__init__.py +6 -0
orchestrator/db/models.py +75 -0
orchestrator/llm_settings.py +18 -1
orchestrator/migrations/helpers.py +47 -39
orchestrator/schedules/scheduler.py +32 -15
orchestrator/schedules/validate_products.py +1 -1
orchestrator/schemas/search.py +8 -85
orchestrator/search/agent/__init__.py +2 -2
orchestrator/search/agent/agent.py +26 -30
orchestrator/search/agent/json_patch.py +51 -0
orchestrator/search/agent/prompts.py +35 -9
orchestrator/search/agent/state.py +28 -2
orchestrator/search/agent/tools.py +192 -53
orchestrator/search/core/embedding.py +2 -2
orchestrator/search/core/exceptions.py +6 -0
orchestrator/search/core/types.py +1 -0
orchestrator/search/export.py +199 -0
orchestrator/search/indexing/indexer.py +13 -4
orchestrator/search/indexing/registry.py +14 -1
orchestrator/search/llm_migration.py +55 -0
orchestrator/search/retrieval/__init__.py +3 -2
orchestrator/search/retrieval/builder.py +5 -1
orchestrator/search/retrieval/engine.py +66 -23
orchestrator/search/retrieval/pagination.py +46 -56
orchestrator/search/retrieval/query_state.py +61 -0
orchestrator/search/retrieval/retrievers/base.py +26 -40
orchestrator/search/retrieval/retrievers/fuzzy.py +10 -9
orchestrator/search/retrieval/retrievers/hybrid.py +11 -8
orchestrator/search/retrieval/retrievers/semantic.py +9 -8
orchestrator/search/retrieval/retrievers/structured.py +6 -6
orchestrator/search/schemas/parameters.py +17 -13
orchestrator/search/schemas/results.py +4 -1
orchestrator/settings.py +1 -0
orchestrator/utils/auth.py +3 -2
orchestrator/workflow.py +23 -6
orchestrator/workflows/tasks/validate_product_type.py +3 -3
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/METADATA +17 -12
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/RECORD +53 -49
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/WHEEL +0 -0
{orchestrator_core-4.5.2.dist-info → orchestrator_core-4.6.0.dist-info}/licenses/LICENSE +0 -0

orchestrator/search/agent/tools.py CHANGED Viewed

@@ -11,11 +11,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from collections.abc import Awaitable, Callable
-from typing import Any, TypeVar
+import json
+from typing import Any
 import structlog
-from ag_ui.core import EventType, StateSnapshotEvent
+from ag_ui.core import EventType, StateDeltaEvent, StateSnapshotEvent
 from pydantic_ai import RunContext
 from pydantic_ai.ag_ui import StateDeps
 from pydantic_ai.exceptions import ModelRetry
@@ -25,34 +25,22 @@ from pydantic_ai.toolsets import FunctionToolset
 from orchestrator.api.api_v1.endpoints.search import (
     get_definitions,
     list_paths,
-    search_processes,
-    search_products,
-    search_subscriptions,
-    search_workflows,
 )
-from orchestrator.schemas.search import SearchResultsSchema
+from orchestrator.db import AgentRunTable, SearchQueryTable, db
+from orchestrator.search.agent.json_patch import JSONPatchOp
+from orchestrator.search.agent.state import ExportData, SearchResultsData, SearchState
 from orchestrator.search.core.types import ActionType, EntityType, FilterOp
+from orchestrator.search.export import fetch_export_data
 from orchestrator.search.filters import FilterTree
+from orchestrator.search.retrieval.engine import execute_search
 from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
+from orchestrator.search.retrieval.query_state import SearchQueryState
 from orchestrator.search.retrieval.validation import validate_filter_tree
-from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
-from .state import SearchState
+from orchestrator.search.schemas.parameters import BaseSearchParameters
+from orchestrator.settings import app_settings
 logger = structlog.get_logger(__name__)
-P = TypeVar("P", bound=BaseSearchParameters)
-SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
-SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
-    EntityType.SUBSCRIPTION: search_subscriptions,
-    EntityType.WORKFLOW: search_workflows,
-    EntityType.PRODUCT: search_products,
-    EntityType.PROCESS: search_processes,
-}
 search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
@@ -65,32 +53,50 @@ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
     return None
+def _set_parameters(
+    ctx: RunContext[StateDeps[SearchState]],
+    entity_type: EntityType,
+    action: str | ActionType,
+    query: str,
+    filters: Any | None,
+) -> None:
+    """Internal helper to set parameters."""
+    ctx.deps.state.parameters = {
+        "action": action,
+        "entity_type": entity_type,
+        "filters": filters,
+        "query": query,
+    }
 @search_toolset.tool
-async def set_search_parameters(
+async def start_new_search(
     ctx: RunContext[StateDeps[SearchState]],
     entity_type: EntityType,
     action: str | ActionType = ActionType.SELECT,
 ) -> StateSnapshotEvent:
-    """Sets the initial search context, like the entity type and the user's query.
+    """Starts a completely new search, clearing all previous state.
-    This MUST be the first tool called to start any new search.
-    Warning: Calling this tool will erase any existing filters and search results from the state.
+    This MUST be the first tool called when the user asks for a NEW search.
+    Warning: This will erase any existing filters, results, and search state.
     """
-    params = ctx.deps.state.parameters or {}
-    is_new_search = params.get("entity_type") != entity_type.value
-    final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
+    final_query = last_user_message(ctx) or ""
     logger.debug(
-        "Setting search parameters",
+        "Starting new search",
         entity_type=entity_type.value,
         action=action,
-        is_new_search=is_new_search,
         query=final_query,
     )
-    ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
-    ctx.deps.state.results = []
-    logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
+    # Clear all state
+    ctx.deps.state.results_data = None
+    ctx.deps.state.export_data = None
+    # Set fresh parameters with no filters
+    _set_parameters(ctx, entity_type, action, final_query, None)
+    logger.debug("New search started", parameters=ctx.deps.state.parameters)
     return StateSnapshotEvent(
         type=EventType.STATE_SNAPSHOT,
@@ -102,7 +108,7 @@ async def set_search_parameters(
 async def set_filter_tree(
     ctx: RunContext[StateDeps[SearchState]],
     filters: FilterTree | None,
-) -> StateSnapshotEvent:
+) -> StateDeltaEvent:
     """Replace current filters atomically with a full FilterTree, or clear with None.
     Requirements:
@@ -111,7 +117,7 @@ async def set_filter_tree(
     - See the FilterTree schema examples for the exact shape.
     """
     if ctx.deps.state.parameters is None:
-        raise ModelRetry("Search parameters are not initialized. Call set_search_parameters first.")
+        raise ModelRetry("Search parameters are not initialized. Call start_new_search first.")
     entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
@@ -136,28 +142,33 @@ async def set_filter_tree(
         raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
     filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
+    filters_existed = "filters" in ctx.deps.state.parameters
     ctx.deps.state.parameters["filters"] = filter_data
-    return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
+    return StateDeltaEvent(
+        type=EventType.STATE_DELTA,
+        delta=[
+            JSONPatchOp.upsert(
+                path="/parameters/filters",
+                value=filter_data,
+                existed=filters_existed,
+            )
+        ],
+    )
 @search_toolset.tool
-async def execute_search(
+async def run_search(
     ctx: RunContext[StateDeps[SearchState]],
     limit: int = 10,
-) -> StateSnapshotEvent:
-    """Execute the search with the current parameters."""
+) -> StateDeltaEvent:
+    """Execute the search with the current parameters and save to database."""
     if not ctx.deps.state.parameters:
         raise ValueError("No search parameters set")
-    entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
-    param_class = PARAMETER_REGISTRY.get(entity_type)
-    if not param_class:
-        raise ValueError(f"Unknown entity type: {entity_type}")
-    params = param_class(**ctx.deps.state.parameters)
+    params = BaseSearchParameters.create(**ctx.deps.state.parameters)
     logger.debug(
         "Executing database search",
-        search_entity_type=entity_type.value,
+        search_entity_type=params.entity_type.value,
         limit=limit,
         has_filters=params.filters is not None,
         query=params.query,
@@ -169,17 +180,61 @@ async def execute_search(
     params.limit = limit
-    fn = SEARCH_FN_MAP[entity_type]
-    search_results = await fn(params)
+    changes: list[JSONPatchOp] = []
+    if not ctx.deps.state.run_id:
+        agent_run = AgentRunTable(agent_type="search")
+        db.session.add(agent_run)
+        db.session.commit()
+        db.session.expire_all()  # Release connection to prevent stacking while agent runs
+        ctx.deps.state.run_id = agent_run.run_id
+        logger.debug("Created new agent run", run_id=str(agent_run.run_id))
+        changes.append(JSONPatchOp(op="add", path="/run_id", value=str(ctx.deps.state.run_id)))
+    # Get query with embedding and save to DB
+    search_response = await execute_search(params, db.session)
+    query_embedding = search_response.query_embedding
+    query_state = SearchQueryState(parameters=params, query_embedding=query_embedding)
+    query_number = db.session.query(SearchQueryTable).filter_by(run_id=ctx.deps.state.run_id).count() + 1
+    search_query = SearchQueryTable.from_state(
+        state=query_state,
+        run_id=ctx.deps.state.run_id,
+        query_number=query_number,
+    )
+    db.session.add(search_query)
+    db.session.commit()
+    db.session.expire_all()
+    query_id_existed = ctx.deps.state.query_id is not None
+    ctx.deps.state.query_id = search_query.query_id
+    logger.debug("Saved search query", query_id=str(search_query.query_id), query_number=query_number)
+    changes.append(JSONPatchOp.upsert(path="/query_id", value=str(ctx.deps.state.query_id), existed=query_id_existed))
     logger.debug(
         "Search completed",
-        total_results=len(search_results.data) if search_results.data else 0,
+        total_results=len(search_response.results),
     )
-    ctx.deps.state.results = search_results.data
+    # Store results data for both frontend display and agent context
+    results_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}"
+    results_data_existed = ctx.deps.state.results_data is not None
+    ctx.deps.state.results_data = SearchResultsData(
+        query_id=str(ctx.deps.state.query_id),
+        results_url=results_url,
+        total_count=len(search_response.results),
+        message=f"Found {len(search_response.results)} results.",
+        results=search_response.results,  # Include actual results in state
+    )
+    changes.append(
+        JSONPatchOp.upsert(
+            path="/results_data", value=ctx.deps.state.results_data.model_dump(), existed=results_data_existed
+        )
+    )
-    return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
+    return StateDeltaEvent(type=EventType.STATE_DELTA, delta=changes)
 @search_toolset.tool
@@ -256,3 +311,87 @@ async def get_valid_operators() -> dict[str, list[FilterOp]]:
         if hasattr(type_def, "operators"):
             operator_map[key] = type_def.operators
     return operator_map
+@search_toolset.tool
+async def fetch_entity_details(
+    ctx: RunContext[StateDeps[SearchState]],
+    limit: int = 10,
+) -> str:
+    """Fetch detailed entity information to answer user questions.
+    Use this tool when you need detailed information about entities from the search results
+    to answer the user's question. This provides the same detailed data that would be
+    included in an export (e.g., subscription status, product details, workflow info, etc.).
+    Args:
+        ctx: Runtime context for agent (injected).
+        limit: Maximum number of entities to fetch details for (default 10).
+    Returns:
+        JSON string containing detailed entity information.
+    Raises:
+        ValueError: If no search results are available.
+    """
+    if not ctx.deps.state.results_data or not ctx.deps.state.results_data.results:
+        raise ValueError("No search results available. Run a search first before fetching entity details.")
+    if not ctx.deps.state.parameters:
+        raise ValueError("No search parameters found.")
+    entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
+    entity_ids = [r.entity_id for r in ctx.deps.state.results_data.results[:limit]]
+    logger.debug(
+        "Fetching detailed entity data",
+        entity_type=entity_type.value,
+        entity_count=len(entity_ids),
+    )
+    detailed_data = fetch_export_data(entity_type, entity_ids)
+    return json.dumps(detailed_data, indent=2)
+@search_toolset.tool
+async def prepare_export(
+    ctx: RunContext[StateDeps[SearchState]],
+) -> StateSnapshotEvent:
+    """Prepares export URL using the last executed search query."""
+    if not ctx.deps.state.query_id or not ctx.deps.state.run_id:
+        raise ValueError("No search has been executed yet. Run a search first before exporting.")
+    if not ctx.deps.state.parameters:
+        raise ValueError("No search parameters found. Run a search first before exporting.")
+    # Validate that export is only available for SELECT actions
+    action = ctx.deps.state.parameters.get("action", ActionType.SELECT)
+    if action != ActionType.SELECT:
+        raise ValueError(
+            f"Export is only available for SELECT actions. Current action is '{action}'. "
+            "Please run a SELECT search first."
+        )
+    logger.debug(
+        "Prepared query for export",
+        query_id=str(ctx.deps.state.query_id),
+    )
+    download_url = f"{app_settings.BASE_URL}/api/search/queries/{ctx.deps.state.query_id}/export"
+    ctx.deps.state.export_data = ExportData(
+        query_id=str(ctx.deps.state.query_id),
+        download_url=download_url,
+        message="Export ready for download.",
+    )
+    logger.debug("Export data set in state", export_data=ctx.deps.state.export_data.model_dump())
+    # Should use StateDelta here? Use snapshot to workaround state persistence issue
+    # TODO: Fix root cause; state is empty on frontend when it should have data from run_search
+    return StateSnapshotEvent(
+        type=EventType.STATE_SNAPSHOT,
+        snapshot=ctx.deps.state.model_dump(),
+    )

orchestrator/search/core/embedding.py CHANGED Viewed

@@ -42,7 +42,7 @@ class EmbeddingIndexer:
                 max_retries=llm_settings.LLM_MAX_RETRIES,
             )
             data = sorted(resp.data, key=lambda e: e["index"])
-            return [row["embedding"] for row in data]
+            return [row["embedding"][: llm_settings.EMBEDDING_DIMENSION] for row in data]
         except (llm_exc.APIError, llm_exc.APIConnectionError, llm_exc.RateLimitError, llm_exc.Timeout) as e:
             logger.error("Embedding request failed", error=str(e))
             return [[] for _ in texts]
@@ -67,7 +67,7 @@ class QueryEmbedder:
                 timeout=5.0,
                 max_retries=0,  # No retries, prioritize speed.
             )
-            return resp.data[0]["embedding"]
+            return resp.data[0]["embedding"][: llm_settings.EMBEDDING_DIMENSION]
         except Exception as e:
             logger.error("Async embedding generation failed", error=str(e))
             return []

orchestrator/search/core/exceptions.py CHANGED Viewed

@@ -34,3 +34,9 @@ class InvalidCursorError(SearchUtilsError):
     """Raised when cursor cannot be decoded."""
     pass
+class QueryStateNotFoundError(SearchUtilsError):
+    """Raised when a query state cannot be found in the database."""
+    pass

orchestrator/search/core/types.py CHANGED Viewed

@@ -289,6 +289,7 @@ class ExtractedField(NamedTuple):
 class IndexableRecord(TypedDict):
     entity_id: str
     entity_type: str
+    entity_title: str
     path: Ltree
     value: Any
     value_type: Any

orchestrator/search/export.py ADDED Viewed

@@ -0,0 +1,199 @@
+# Copyright 2019-2025 SURF, GÉANT.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from uuid import UUID
+from sqlalchemy import select
+from sqlalchemy.orm import selectinload
+from orchestrator.db import (
+    ProcessTable,
+    ProductTable,
+    SubscriptionTable,
+    WorkflowTable,
+    db,
+)
+from orchestrator.search.core.types import EntityType
+def fetch_subscription_export_data(entity_ids: list[str]) -> list[dict]:
+    """Fetch subscription data for export.
+    Args:
+        entity_ids: List of subscription IDs as strings
+    Returns:
+        List of flattened subscription dictionaries with fields:
+        subscription_id, description, status, insync, start_date, end_date,
+        note, product_name, tag, product_type, customer_id
+    """
+    stmt = (
+        select(
+            SubscriptionTable.subscription_id,
+            SubscriptionTable.description,
+            SubscriptionTable.status,
+            SubscriptionTable.insync,
+            SubscriptionTable.start_date,
+            SubscriptionTable.end_date,
+            SubscriptionTable.note,
+            SubscriptionTable.customer_id,
+            ProductTable.name.label("product_name"),
+            ProductTable.tag,
+            ProductTable.product_type,
+        )
+        .join(ProductTable, SubscriptionTable.product_id == ProductTable.product_id)
+        .filter(SubscriptionTable.subscription_id.in_([UUID(sid) for sid in entity_ids]))
+    )
+    rows = db.session.execute(stmt).all()
+    return [
+        {
+            "subscription_id": str(row.subscription_id),
+            "description": row.description,
+            "status": row.status,
+            "insync": row.insync,
+            "start_date": row.start_date.isoformat() if row.start_date else None,
+            "end_date": row.end_date.isoformat() if row.end_date else None,
+            "note": row.note,
+            "product_name": row.product_name,
+            "tag": row.tag,
+            "product_type": row.product_type,
+            "customer_id": row.customer_id,
+        }
+        for row in rows
+    ]
+def fetch_workflow_export_data(entity_ids: list[str]) -> list[dict]:
+    """Fetch workflow data for export.
+    Args:
+        entity_ids: List of workflow names as strings
+    Returns:
+        List of flattened workflow dictionaries with fields:
+        name, description, created_at, product_names (comma-separated),
+        product_ids (comma-separated), product_types (comma-separated)
+    """
+    stmt = (
+        select(WorkflowTable).options(selectinload(WorkflowTable.products)).filter(WorkflowTable.name.in_(entity_ids))
+    )
+    workflows = db.session.scalars(stmt).all()
+    return [
+        {
+            "name": w.name,
+            "description": w.description,
+            "created_at": w.created_at.isoformat() if w.created_at else None,
+            "product_names": ", ".join(p.name for p in w.products),
+            "product_ids": ", ".join(str(p.product_id) for p in w.products),
+            "product_types": ", ".join(p.product_type for p in w.products),
+        }
+        for w in workflows
+    ]
+def fetch_product_export_data(entity_ids: list[str]) -> list[dict]:
+    """Fetch product data for export.
+    Args:
+        entity_ids: List of product IDs as strings
+    Returns:
+        List of flattened product dictionaries with fields:
+        product_id, name, product_type, tag, description, status, created_at
+    """
+    stmt = (
+        select(ProductTable)
+        .options(
+            selectinload(ProductTable.workflows),
+            selectinload(ProductTable.fixed_inputs),
+            selectinload(ProductTable.product_blocks),
+        )
+        .filter(ProductTable.product_id.in_([UUID(pid) for pid in entity_ids]))
+    )
+    products = db.session.scalars(stmt).all()
+    return [
+        {
+            "product_id": str(p.product_id),
+            "name": p.name,
+            "product_type": p.product_type,
+            "tag": p.tag,
+            "description": p.description,
+            "status": p.status,
+            "created_at": p.created_at.isoformat() if p.created_at else None,
+        }
+        for p in products
+    ]
+def fetch_process_export_data(entity_ids: list[str]) -> list[dict]:
+    """Fetch process data for export.
+    Args:
+        entity_ids: List of process IDs as strings
+    Returns:
+        List of flattened process dictionaries with fields:
+        process_id, workflow_name, workflow_id, last_status, is_task,
+        created_by, started_at, last_modified_at, last_step
+    """
+    stmt = (
+        select(ProcessTable)
+        .options(selectinload(ProcessTable.workflow))
+        .filter(ProcessTable.process_id.in_([UUID(pid) for pid in entity_ids]))
+    )
+    processes = db.session.scalars(stmt).all()
+    return [
+        {
+            "process_id": str(p.process_id),
+            "workflow_name": p.workflow.name if p.workflow else None,
+            "workflow_id": str(p.workflow_id),
+            "last_status": p.last_status,
+            "is_task": p.is_task,
+            "created_by": p.created_by,
+            "started_at": p.started_at.isoformat() if p.started_at else None,
+            "last_modified_at": p.last_modified_at.isoformat() if p.last_modified_at else None,
+            "last_step": p.last_step,
+        }
+        for p in processes
+    ]
+def fetch_export_data(entity_type: EntityType, entity_ids: list[str]) -> list[dict]:
+    """Fetch export data for any entity type.
+    Args:
+        entity_type: The type of entities to fetch
+        entity_ids: List of entity IDs/names as strings
+    Returns:
+        List of flattened entity dictionaries ready for CSV export
+    Raises:
+        ValueError: If entity_type is not supported
+    """
+    match entity_type:
+        case EntityType.SUBSCRIPTION:
+            return fetch_subscription_export_data(entity_ids)
+        case EntityType.WORKFLOW:
+            return fetch_workflow_export_data(entity_ids)
+        case EntityType.PRODUCT:
+            return fetch_product_export_data(entity_ids)
+        case EntityType.PROCESS:
+            return fetch_process_export_data(entity_ids)
+        case _:
+            raise ValueError(f"Unsupported entity type: {entity_type}")

orchestrator/search/indexing/indexer.py CHANGED Viewed

@@ -96,6 +96,7 @@ class Indexer:
         self.chunk_size = chunk_size
         self.embedding_model = llm_settings.EMBEDDING_MODEL
         self.logger = logger.bind(entity_kind=config.entity_kind.value)
+        self._entity_titles: dict[str, str] = {}
     def run(self, entities: Iterable[DatabaseEntity]) -> int:
         """Orchestrates the entire indexing process."""
@@ -138,6 +139,8 @@ class Indexer:
         if not entity_chunk:
             return 0, 0
+        self._entity_titles.clear()
         fields_to_upsert, paths_to_delete, identical_count = self._determine_changes(entity_chunk, session)
         if paths_to_delete and session is not None:
@@ -174,12 +177,15 @@ class Indexer:
                 entity, pk_name=self.config.pk_name, root_name=self.config.root_name
             )
+            entity_title = self.config.get_title_from_fields(current_fields)
+            self._entity_titles[entity_id] = entity_title
             entity_hashes = existing_hashes.get(entity_id, {})
             current_paths = set()
             for field in current_fields:
                 current_paths.add(field.path)
-                current_hash = self._compute_content_hash(field.path, field.value, field.value_type)
+                current_hash = self._compute_content_hash(field.path, field.value, field.value_type, entity_title)
                 if field.path not in entity_hashes or entity_hashes[field.path] != current_hash:
                     fields_to_upsert.append((entity_id, field))
                 else:
@@ -301,21 +307,23 @@ class Indexer:
         return f"{field.path}: {str(field.value)}"
     @staticmethod
-    def _compute_content_hash(path: str, value: Any, value_type: Any) -> str:
+    def _compute_content_hash(path: str, value: Any, value_type: Any, entity_title: str = "") -> str:
         v = "" if value is None else str(value)
-        content = f"{path}:{v}:{value_type}"
+        content = f"{path}:{v}:{value_type}:{entity_title}"
         return hashlib.sha256(content.encode("utf-8")).hexdigest()
     def _make_indexable_record(
         self, field: ExtractedField, entity_id: str, embedding: list[float] | None
     ) -> IndexableRecord:
+        entity_title = self._entity_titles[entity_id]
         return IndexableRecord(
             entity_id=entity_id,
             entity_type=self.config.entity_kind.value,
+            entity_title=entity_title,
             path=Ltree(field.path),
             value=field.value,
             value_type=field.value_type,
-            content_hash=self._compute_content_hash(field.path, field.value, field.value_type),
+            content_hash=self._compute_content_hash(field.path, field.value, field.value_type, entity_title),
             embedding=embedding if embedding else None,
         )
@@ -326,6 +334,7 @@ class Indexer:
         return stmt.on_conflict_do_update(
             index_elements=[AiSearchIndex.entity_id, AiSearchIndex.path],
             set_={
+                AiSearchIndex.entity_title: stmt.excluded.entity_title,
                 AiSearchIndex.value: stmt.excluded.value,
                 AiSearchIndex.value_type: stmt.excluded.value_type,
                 AiSearchIndex.content_hash: stmt.excluded.content_hash,

orchestrator-core 4.5.2__py3-none-any.whl → 4.6.0__py3-none-any.whl

orchestrator-core 4.5.2py3-none-any.whl → 4.6.0py3-none-any.whl