PyPI - datahub-analytics-agent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

datahub-analytics-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

analytics_agent/__init__.py +0 -0
analytics_agent/agent/__init__.py +0 -0
analytics_agent/agent/analysis.py +149 -0
analytics_agent/agent/chart_generator.py +70 -0
analytics_agent/agent/chart_tool.py +103 -0
analytics_agent/agent/compaction.py +57 -0
analytics_agent/agent/compactor_registry.py +22 -0
analytics_agent/agent/graph.py +121 -0
analytics_agent/agent/history.py +159 -0
analytics_agent/agent/llm.py +87 -0
analytics_agent/agent/mock_llm.py +111 -0
analytics_agent/agent/state.py +13 -0
analytics_agent/agent/streaming.py +304 -0
analytics_agent/api/__init__.py +135 -0
analytics_agent/api/chat.py +439 -0
analytics_agent/api/conversations.py +244 -0
analytics_agent/api/oauth.py +741 -0
analytics_agent/api/settings.py +1947 -0
analytics_agent/config.py +236 -0
analytics_agent/context/__init__.py +0 -0
analytics_agent/context/base.py +26 -0
analytics_agent/context/datahub.py +242 -0
analytics_agent/context/mcp_platform.py +123 -0
analytics_agent/context/native_datahub.py +58 -0
analytics_agent/context/registry.py +84 -0
analytics_agent/db/__init__.py +0 -0
analytics_agent/db/alembic/env.py +49 -0
analytics_agent/db/alembic/script.py.mako +25 -0
analytics_agent/db/alembic/versions/001_init.py +47 -0
analytics_agent/db/alembic/versions/002_settings_table.py +30 -0
analytics_agent/db/alembic/versions/003_integrations.py +52 -0
analytics_agent/db/alembic/versions/004_conversation_quality.py +28 -0
analytics_agent/db/alembic/versions/005_context_platforms.py +36 -0
analytics_agent/db/base.py +33 -0
analytics_agent/db/models.py +137 -0
analytics_agent/db/repository.py +294 -0
analytics_agent/db/types.py +69 -0
analytics_agent/engines/__init__.py +0 -0
analytics_agent/engines/base.py +30 -0
analytics_agent/engines/factory.py +95 -0
analytics_agent/engines/mcp/__init__.py +0 -0
analytics_agent/engines/mcp/engine.py +78 -0
analytics_agent/engines/resolver.py +84 -0
analytics_agent/engines/snowflake/__init__.py +0 -0
analytics_agent/engines/snowflake/engine.py +304 -0
analytics_agent/engines/sqlalchemy/__init__.py +0 -0
analytics_agent/engines/sqlalchemy/engine.py +163 -0
analytics_agent/main.py +536 -0
analytics_agent/prompts/__init__.py +0 -0
analytics_agent/prompts/chart.py +101 -0
analytics_agent/prompts/system.py +33 -0
analytics_agent/prompts/system_prompt.md +184 -0
analytics_agent/skills/__init__.py +0 -0
analytics_agent/skills/datahub_skills.py +409 -0
analytics_agent/skills/improve-context/SKILL.md +73 -0
analytics_agent/skills/loader.py +162 -0
analytics_agent/skills/publish-analysis/SKILL.md +99 -0
analytics_agent/skills/save-correction/SKILL.md +161 -0
analytics_agent/skills/search-business-context/SKILL.md +109 -0
analytics_agent/tracing.py +88 -0
datahub_analytics_agent-0.1.0.dist-info/METADATA +328 -0
datahub_analytics_agent-0.1.0.dist-info/RECORD +63 -0
datahub_analytics_agent-0.1.0.dist-info/WHEEL +4 -0

analytics_agent/__init__.py ADDED Viewed

File without changes

analytics_agent/agent/__init__.py ADDED Viewed

File without changes

analytics_agent/agent/analysis.py ADDED Viewed

@@ -0,0 +1,149 @@
+from __future__ import annotations
+import json
+import logging
+from dataclasses import dataclass
+import orjson
+logger = logging.getLogger(__name__)
+CONTEXT_TOOLS: frozenset[str] = frozenset(
+    {"search_documents", "grep_documents", "search", "get_entities", "search_business_context"}
+)
+_CONTEXT_TOOLS = CONTEXT_TOOLS  # backward compat alias
+_SCORE_LABELS = {1: "Poor", 2: "Poor", 3: "Fair", 4: "Good", 5: "Excellent"}
+_ASSESSMENT_PROMPT = """\
+You are assessing the **context quality** of a data assistant conversation.
+Context quality measures how well the DataHub knowledge base (documentation, \
+definitions, dataset descriptions) supported the agent's work.
+Score 1–5:
+5 Excellent — DataHub had rich, accurate documentation that fully covered the \
+question. Agent applied the definition directly with no improvisation.
+4 Good — Useful docs found; definition was mostly complete; agent made one minor \
+stated assumption that didn't change the answer meaningfully.
+3 Fair — Definition found but incomplete or ambiguous; agent had to fill gaps, \
+deviate from the definition, or ask the user for clarification about what the \
+context should have made clear.
+2 Poor — Docs mostly missing or returned empty results; agent improvised \
+substantially and the answer depended heavily on undocumented choices.
+1 Very Poor — No useful context; agent expressed significant uncertainty, made \
+conflicting assumptions, or produced an answer that contradicts available definitions.
+Key signals that push the score DOWN:
+- Agent says "the definition doesn't cover this" or "I'll interpret this as…"
+- Agent switches columns, tables, or date anchors not mentioned in the definition
+- Agent produces a result that varies based on an undocumented assumption
+- Agent asks the user to clarify something the glossary/docs should have defined
+--- CONTEXT TOOL CALLS AND RESULTS ---
+{context_calls}
+--- END CONTEXT ---
+--- AGENT REASONING (what the agent said and concluded) ---
+{agent_reasoning}
+--- END REASONING ---
+Respond with ONLY valid JSON, no explanation outside it:
+{{"score": <1-5>, "label": "<Excellent|Good|Fair|Poor>", "reason": "<one sentence that names the specific gap or strength>"}}"""
+async def compute_context_quality(messages: list) -> QualityScore:
+    """
+    LLM-assessed context quality score (1–5).
+    Extracts DataHub context tool calls + results and the agent's own reasoning
+    text, then asks a cheap model to judge whether the returned context was
+    actually useful and complete — penalising cases where the agent had to
+    improvise or deviate from the definition.
+    Returns Neutral (3) immediately when no context tool calls have occurred yet.
+    """
+    context_calls: list[dict] = []
+    agent_text_chunks: list[str] = []
+    for msg in messages:
+        try:
+            payload = (
+                orjson.loads(msg.payload) if isinstance(msg.payload, (str, bytes)) else msg.payload
+            )
+        except Exception:
+            continue
+        if msg.event_type == "TOOL_RESULT":
+            tool_name = payload.get("tool_name", "")
+            if tool_name not in _CONTEXT_TOOLS:
+                continue
+            result_raw = payload.get("result", "")
+            result_str = str(result_raw)[:800] + ("…" if len(str(result_raw)) > 800 else "")
+            context_calls.append(
+                {
+                    "tool": tool_name,
+                    "is_error": bool(payload.get("is_error", False)),
+                    "result": result_str,
+                }
+            )
+        elif msg.event_type in ("TEXT", "COMPLETE"):
+            text = payload.get("text", "")
+            if text:
+                agent_text_chunks.append(text[:400])
+    if not context_calls:
+        return QualityScore(
+            score=3, label="Neutral", breakdown={"reason": "No context lookups yet"}
+        )
+    calls_text = "\n\n".join(
+        f"Tool: {c['tool']}\nError: {c['is_error']}\nResult: {c['result']}" for c in context_calls
+    )
+    # Deduplicate and cap agent reasoning (TEXT events stream token-by-token)
+    reasoning = " ".join(dict.fromkeys(agent_text_chunks))[:1200]
+    prompt = _ASSESSMENT_PROMPT.format(context_calls=calls_text, agent_reasoning=reasoning)
+    try:
+        from langchain_core.messages import HumanMessage, SystemMessage
+        from analytics_agent.agent.llm import get_quality_llm
+        llm = get_quality_llm()
+        response = await llm.ainvoke(
+            [
+                SystemMessage(
+                    content="You assess data assistant context quality. Reply only with the requested JSON."
+                ),
+                HumanMessage(content=prompt),
+            ]
+        )
+        raw = response.content
+        if isinstance(raw, list):
+            raw = next(
+                (b.get("text", "") for b in raw if isinstance(b, dict) and b.get("type") == "text"),
+                "",
+            )
+        raw = raw.strip()
+        # Strip markdown code fences if present
+        if raw.startswith("```"):
+            raw = raw.split("```")[1]
+            if raw.startswith("json"):
+                raw = raw[4:]
+        data = json.loads(raw.strip())
+        score = max(1, min(5, int(data.get("score", 3))))
+        label = data.get("label", _SCORE_LABELS[score])
+        reason = data.get("reason", "")
+        return QualityScore(score=score, label=label, breakdown={"reason": reason})
+    except Exception as exc:
+        logger.warning("Context quality LLM assessment failed: %s", exc)
+        return QualityScore(
+            score=3, label="Neutral", breakdown={"reason": "Assessment unavailable"}
+        )
+@dataclass
+class QualityScore:
+    score: int  # 1–5
+    label: str
+    breakdown: dict

analytics_agent/agent/chart_generator.py ADDED Viewed

@@ -0,0 +1,70 @@
+from __future__ import annotations
+import logging
+import orjson
+from langchain_core.messages import HumanMessage, SystemMessage
+from analytics_agent.agent.llm import get_chart_llm
+from analytics_agent.agent.state import AgentState
+from analytics_agent.prompts.chart import CHART_SYSTEM_PROMPT, build_chart_user_prompt
+logger = logging.getLogger(__name__)
+async def chart_node(state: AgentState) -> dict:
+    """
+    Generate a Vega-Lite chart spec from the last SQL result and store it in state.
+    streaming.py reads it from state after the graph completes.
+    """
+    from analytics_agent.agent.graph import get_last_sql_result
+    sql_result = get_last_sql_result(state)
+    if not sql_result or not sql_result.get("rows"):
+        return {}
+    llm = get_chart_llm()
+    user_prompt = build_chart_user_prompt(
+        question=state.get("user_question", ""),
+        sql=sql_result.get("sql", ""),
+        columns=sql_result.get("columns", []),
+        sample_rows=sql_result.get("rows", []),
+    )
+    try:
+        response = await llm.ainvoke(
+            [SystemMessage(content=CHART_SYSTEM_PROMPT), HumanMessage(content=user_prompt)]
+        )
+        raw = response.content
+        if isinstance(raw, list):
+            # Anthropic returns list of content blocks
+            raw = next(
+                (b.get("text", "") for b in raw if isinstance(b, dict) and b.get("type") == "text"),
+                "",
+            )
+        if "```" in raw:
+            raw = raw.split("```")[1]
+            if raw.startswith("json"):
+                raw = raw[4:]
+        result = orjson.loads(raw.strip())
+        chart_schema = result.get("chart_schema", {})
+        chart_type = result.get("chart_type", "")
+        if chart_schema and chart_type:
+            chart_schema["data"] = {"values": sql_result.get("rows", [])}
+        # Store in state so streaming.py can emit it after graph completion
+        return {
+            "pending_chart": {
+                "vega_lite_spec": chart_schema,
+                "reasoning": result.get("reasoning", ""),
+                "chart_type": chart_type,
+            }
+        }
+    except Exception:
+        logger.exception("Chart generation failed (non-fatal)")
+    return {}

analytics_agent/agent/chart_tool.py ADDED Viewed

@@ -0,0 +1,103 @@
+from __future__ import annotations
+import logging
+import uuid
+import orjson
+from langchain_core.tools import tool
+logger = logging.getLogger(__name__)
+# Side-channel: keyed by chart_id so streaming.py can fetch the spec
+# without the model ever seeing the full JSON.
+_pending_charts: dict[str, dict] = {}
+@tool
+async def create_chart(
+    data: list[dict] | None = None,
+    question: str = "",
+    title: str = "",
+    color_scheme: str = "",
+) -> str:
+    """
+    Generate a Vega-Lite chart from structured data. Call this when the user asks
+    for a chart, graph, or visualization. The chart renders automatically in the UI.
+    Args:
+        data: list of dicts with consistent keys (e.g. [{"platform": "snowflake", "count": 2290}])
+        question: the user's question or description of what to visualize
+        title: optional chart title
+        color_scheme: optional color instruction e.g. "rainbow", "blue", "categorical", "green"
+    On follow-up requests to change chart colors or style, call this again with the
+    same data and the new color_scheme.
+    Example: create_chart(data=[...], question="datasets by platform", color_scheme="rainbow")
+    """
+    from analytics_agent.agent.llm import get_chart_llm
+    from analytics_agent.prompts.chart import CHART_SYSTEM_PROMPT, build_chart_user_prompt
+    if not data:
+        return "No data provided — cannot create chart."
+    columns = list(data[0].keys()) if data else []
+    llm = get_chart_llm()
+    full_question = question or title
+    if color_scheme:
+        full_question = f"{full_question} (use {color_scheme} color scheme)"
+    user_prompt = build_chart_user_prompt(
+        question=full_question,
+        sql="",
+        columns=columns,
+        sample_rows=data[:50],
+    )
+    try:
+        from langchain_core.messages import HumanMessage, SystemMessage
+        response = await llm.ainvoke(
+            [SystemMessage(content=CHART_SYSTEM_PROMPT), HumanMessage(content=user_prompt)]
+        )
+        raw = response.content
+        if isinstance(raw, list):
+            raw = next(
+                (b.get("text", "") for b in raw if isinstance(b, dict) and b.get("type") == "text"),
+                "",
+            )
+        if "```" in raw:
+            raw = raw.split("```")[1]
+            if raw.startswith("json"):
+                raw = raw[4:]
+        result = orjson.loads(raw.strip())
+        chart_schema = result.get("chart_schema", {})
+        chart_type = result.get("chart_type", "")
+        if chart_schema and chart_type:
+            chart_schema["data"] = {"values": data}
+        # Store spec in side-channel — return a short human-readable summary so
+        # the model retains context for follow-up requests (e.g. "change color")
+        chart_id = str(uuid.uuid4())
+        _pending_charts[chart_id] = {
+            "vega_lite_spec": chart_schema,
+            "reasoning": result.get("reasoning", ""),
+            "chart_type": chart_type,
+        }
+        color_note = f", color_scheme={color_scheme!r}" if color_scheme else ""
+        # Include the full data inline so the model can reuse it on follow-up requests
+        # (e.g. "redraw with different colors")
+        data_summary = orjson.dumps(data).decode()
+        return (
+            f"CHART_READY:{chart_id} "
+            f"({chart_type} chart, {len(data)} rows{color_note})\n"
+            f"data={data_summary}"
+        )
+    except Exception as e:
+        logger.exception("create_chart failed")
+        return f"Chart generation failed: {e}"

analytics_agent/agent/compaction.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Pluggable chat history compaction.
+OSS default: TurnWindowCompactor drops oldest turns by token budget.
+DataHub Cloud (or other extensions) can register a SummarizingCompactor
+via compactor_registry.register_compactor() at app startup.
+"""
+from __future__ import annotations
+from typing import Protocol, runtime_checkable
+from langchain_core.messages import AIMessage, BaseMessage
+@runtime_checkable
+class HistoryCompactor(Protocol):
+    def compact(
+        self,
+        turns: list[list[BaseMessage]],
+        max_tokens: int,
+    ) -> list[list[BaseMessage]]:
+        """Return a (possibly shorter) list of turns that fits within max_tokens.
+        Turns are in chronological order; always keep the most recent turn.
+        Never return an empty list when given a non-empty input.
+        """
+        ...
+def estimate_tokens(msgs: list[BaseMessage]) -> int:
+    """Fast character-based token estimate (~4 chars per token)."""
+    total = 0
+    for msg in msgs:
+        total += 100  # per-message overhead (role, metadata, framing)
+        content = msg.content if isinstance(msg.content, str) else str(msg.content)
+        total += len(content) // 4
+        if isinstance(msg, AIMessage):
+            for tc in msg.tool_calls or []:
+                total += len(str(tc.get("args", ""))) // 4
+    return total
+class TurnWindowCompactor:
+    """Drop oldest turns until the flattened history fits within max_tokens."""
+    def compact(
+        self,
+        turns: list[list[BaseMessage]],
+        max_tokens: int,
+    ) -> list[list[BaseMessage]]:
+        while len(turns) > 1:
+            flat = [msg for turn in turns for msg in turn]
+            if estimate_tokens(flat) <= max_tokens:
+                break
+            turns = turns[1:]
+        return turns

analytics_agent/agent/compactor_registry.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Module-level registry for the active HistoryCompactor.
+DataHub Cloud (or any extension) can call register_compactor() at app startup
+to swap in a more sophisticated strategy (e.g. LLM summarization) without
+modifying core files.
+"""
+from __future__ import annotations
+from analytics_agent.agent.compaction import HistoryCompactor, TurnWindowCompactor
+_compactor: HistoryCompactor = TurnWindowCompactor()
+def register_compactor(c: HistoryCompactor) -> None:
+    global _compactor
+    _compactor = c
+def get_compactor() -> HistoryCompactor:
+    return _compactor

analytics_agent/agent/graph.py ADDED Viewed

@@ -0,0 +1,121 @@
+from __future__ import annotations
+from typing import Literal
+import orjson
+from langchain.agents import create_agent
+from langchain_core.messages import ToolMessage
+from langgraph.graph import END, START, StateGraph
+from analytics_agent.agent.llm import get_llm
+from analytics_agent.agent.state import AgentState
+from analytics_agent.prompts.system import build_system_prompt
+# Write-back skills are opt-in; only included when explicitly enabled by the user
+_SKILL_TOOL_NAMES: frozenset[str] = frozenset({"publish_analysis", "save_correction"})
+_MUTATION_TOOL_NAMES = _SKILL_TOOL_NAMES  # alias used in filter below
+def get_last_sql_result(state: AgentState) -> dict | None:
+    """Scan message history for the last execute_sql ToolMessage and parse its content."""
+    for msg in reversed(state["messages"]):
+        if isinstance(msg, ToolMessage) and getattr(msg, "name", None) == "execute_sql":
+            try:
+                if isinstance(msg.content, str):
+                    return orjson.loads(msg.content)
+            except Exception:
+                pass
+    return None
+def _route_after_agent(state: AgentState) -> Literal["chart", "__end__"]:
+    result = get_last_sql_result(state)
+    if result and result.get("rows"):
+        return "chart"
+    return "__end__"
+def build_graph(
+    engine_name: str,
+    engine=None,  # pre-resolved engine from resolver.py; if None falls back to registry
+    system_prompt_override: str | None = None,
+    disabled_tools: set[str] | None = None,
+    enabled_mutations: set[str] | None = None,
+    context_tools: list | None = None,  # pre-built from DB context platforms at request time
+    engine_tools: list | None = None,  # pre-built for MCP data sources (bypasses QueryEngine)
+):
+    from analytics_agent.agent.chart_generator import chart_node
+    from analytics_agent.engines.factory import get_registry
+    disabled = disabled_tools or set()
+    llm = get_llm(streaming=True)
+    from analytics_agent.agent.chart_tool import create_chart
+    # Context platform tools — built dynamically from DB at request time.
+    # Falls back to env-var based build only when caller doesn't provide them.
+    if context_tools is not None:
+        datahub_tools = [t for t in context_tools if t.name not in disabled]
+    else:
+        from analytics_agent.context.datahub import build_datahub_tools
+        datahub_tools = [t for t in build_datahub_tools() if t.name not in disabled]
+    # Always-on skills (context search etc.) + opt-in write-back skills
+    from analytics_agent.skills.loader import build_always_on_skill_tools, build_skill_tools
+    skill_tools = build_always_on_skill_tools() + build_skill_tools(enabled_mutations or set())
+    # Engine tools — MCP data sources supply pre-built tools; native engines use QueryEngine
+    if engine_tools is not None:
+        engine_tools = [t for t in engine_tools if t.name not in disabled]
+    else:
+        if engine is None:
+            registry = get_registry()
+            engine = registry.get(engine_name)
+            if not engine:
+                raise ValueError(f"Engine '{engine_name}' not found.")
+        engine_tools = [t for t in engine.get_tools() if t.name not in disabled]
+    chart_tools = [] if "create_chart" in disabled else [create_chart]
+    all_tools = datahub_tools + skill_tools + engine_tools + chart_tools
+    if system_prompt_override:
+        from analytics_agent.skills.loader import (
+            get_improve_context_prompt_section,
+            get_search_business_context_section,
+            get_skill_system_prompt_section,
+        )
+        system_prompt = system_prompt_override.format(engine_name=engine_name)
+        system_prompt += get_search_business_context_section()
+        system_prompt += get_improve_context_prompt_section()
+        if enabled_mutations:
+            system_prompt += get_skill_system_prompt_section(enabled_mutations)
+    else:
+        system_prompt = build_system_prompt(engine_name, enabled_skills=enabled_mutations)
+    # Enable per-tool error handling so validation errors (e.g. hallucinated
+    # arguments like filter= on get_entities) are returned as tool messages
+    # the agent can read and recover from, rather than crashing the loop.
+    for tool in all_tools:
+        tool.handle_tool_error = True
+    react_agent = create_agent(
+        model=llm,
+        tools=all_tools,
+        state_schema=AgentState,
+        system_prompt=system_prompt,
+    )
+    graph = StateGraph(AgentState)
+    graph.add_node("agent", react_agent)
+    graph.add_node("chart", chart_node)
+    graph.add_edge(START, "agent")
+    graph.add_conditional_edges(
+        "agent",
+        _route_after_agent,
+        {"chart": "chart", "__end__": END},
+    )
+    graph.add_edge("chart", END)
+    return graph.compile()

analytics_agent/agent/history.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""
+Reconstruct LangChain-compatible message history from DB-persisted events.
+Strategy:
+- Group stored messages by user turn (each user TEXT starts a new turn).
+- Each turn emits: HumanMessage → [tool call/result pairs] → AIMessage (final text).
+- Tool calls and results are matched by sequence order within a turn.
+- If a turn had no tool calls and no COMPLETE text, we fall back to TEXT chunks.
+- Turns that have no assistant response at all (e.g. error turns) are skipped entirely
+  to avoid consecutive HumanMessages which LangGraph rejects.
+- An optional HistoryCompactor drops the oldest turns to stay within the token budget.
+"""
+from __future__ import annotations
+import re
+import orjson
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
+from analytics_agent.agent.compaction import HistoryCompactor
+def build_history(
+    stored_messages: list,
+    current_user_text: str,
+    compactor: HistoryCompactor | None = None,
+    max_history_tokens: int = 120_000,
+) -> list[BaseMessage]:
+    """
+    Convert persisted message rows into a LangChain message list ending with
+    the current user turn.
+    If a compactor is provided, oldest turns are dropped to stay within
+    max_history_tokens before returning.
+    """
+    # Split into turns at each user TEXT message
+    raw_turns: list[list] = []
+    current_turn: list = []
+    for msg in stored_messages:
+        payload = orjson.loads(msg.payload) if isinstance(msg.payload, str) else msg.payload
+        if msg.role == "user" and msg.event_type == "TEXT":
+            if current_turn:
+                raw_turns.append(current_turn)
+            current_turn = [("user", payload.get("text", ""), msg)]
+        else:
+            current_turn.append((msg.role, payload, msg))
+    if current_turn:
+        raw_turns.append(current_turn)
+    # Build LangChain messages per turn
+    lc_turns: list[list[BaseMessage]] = []
+    for turn in raw_turns:
+        role0, content0, _ = turn[0]
+        if role0 != "user":
+            continue
+        tool_calls: list[dict] = []
+        tool_results: list[dict] = []
+        text_chunks: list[str] = []
+        final_text = ""
+        has_chart = False
+        for role, payload, msg in turn[1:]:
+            if role != "assistant":
+                continue
+            evt = msg.event_type
+            if evt == "TOOL_CALL":
+                tool_calls.append(
+                    {
+                        "id": msg.id,
+                        "name": payload.get("tool_name", ""),
+                        "input": payload.get("tool_input", {}),
+                    }
+                )
+            elif evt in ("TOOL_RESULT", "SQL"):
+                idx = len(tool_results)
+                call_id = tool_calls[idx]["id"] if idx < len(tool_calls) else msg.id
+                tool_results.append(
+                    {
+                        "id": call_id,
+                        "name": payload.get("tool_name", ""),
+                        "result": payload.get("result", payload.get("sql", ""))[:4000],
+                    }
+                )
+            elif evt == "TEXT":
+                chunk = payload.get("text", "")
+                if chunk:
+                    text_chunks.append(chunk)
+            elif evt == "COMPLETE":
+                final_text = payload.get("text", "")
+            elif evt == "CHART":
+                has_chart = True
+        if not final_text:
+            assembled = "".join(text_chunks)
+            assembled = re.sub(
+                r"```(?:json)?\s*\{.*?\"chart_schema\".*?\}\s*```", "", assembled, flags=re.DOTALL
+            ).strip()
+            final_text = assembled[:500] if assembled else ""
+        if not final_text and has_chart:
+            final_text = "[Chart rendered]"
+        has_any_assistant_content = tool_calls or final_text or has_chart
+        if not has_any_assistant_content:
+            continue
+        turn_msgs: list[BaseMessage] = []
+        turn_msgs.append(HumanMessage(content=content0))
+        if tool_calls:
+            lc_tool_calls = [
+                {
+                    "id": tc["id"],
+                    "name": tc["name"],
+                    "args": tc["input"],
+                    "type": "tool_call",
+                }
+                for tc in tool_calls
+            ]
+            turn_msgs.append(AIMessage(content="", tool_calls=lc_tool_calls))
+            # Every tool_call must have a ToolMessage with its exact ID.
+            # Pad missing results; always use tc["id"] as tool_call_id so
+            # the IDs are guaranteed to match the AIMessage (avoids Anthropic
+            # "unexpected tool_use_id" errors from orphaned DB records).
+            for i, tc in enumerate(tool_calls):
+                if i < len(tool_results):
+                    tr = tool_results[i]
+                    turn_msgs.append(
+                        ToolMessage(
+                            content=str(tr["result"]),
+                            tool_call_id=tc["id"],
+                            name=tr["name"],
+                        )
+                    )
+                else:
+                    turn_msgs.append(
+                        ToolMessage(
+                            content="[Tool did not return a result]",
+                            tool_call_id=tc["id"],
+                            name=tc["name"],
+                        )
+                    )
+        if final_text or not tool_calls:
+            turn_msgs.append(AIMessage(content=final_text or "Done."))
+        lc_turns.append(turn_msgs)
+    # Drop oldest turns if needed
+    if compactor is not None and lc_turns:
+        lc_turns = compactor.compact(lc_turns, max_tokens=max_history_tokens)
+    result = [msg for turn in lc_turns for msg in turn]
+    result.append(HumanMessage(content=current_user_text))
+    return result