PyPI - dao-ai - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.20__py3-none-any.whl - Mend

dao-ai 0.1.2py3-none-any.whl → 0.1.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

dao_ai/apps/__init__.py +24 -0
dao_ai/apps/handlers.py +105 -0
dao_ai/apps/model_serving.py +29 -0
dao_ai/apps/resources.py +1122 -0
dao_ai/apps/server.py +39 -0
dao_ai/cli.py +546 -37
dao_ai/config.py +1179 -139
dao_ai/evaluation.py +543 -0
dao_ai/genie/__init__.py +55 -7
dao_ai/genie/cache/__init__.py +34 -7
dao_ai/genie/cache/base.py +143 -2
dao_ai/genie/cache/context_aware/__init__.py +31 -0
dao_ai/genie/cache/context_aware/base.py +1151 -0
dao_ai/genie/cache/context_aware/in_memory.py +609 -0
dao_ai/genie/cache/context_aware/persistent.py +802 -0
dao_ai/genie/cache/context_aware/postgres.py +1166 -0
dao_ai/genie/cache/core.py +1 -1
dao_ai/genie/cache/lru.py +257 -75
dao_ai/genie/cache/optimization.py +890 -0
dao_ai/genie/core.py +235 -11
dao_ai/memory/postgres.py +175 -39
dao_ai/middleware/__init__.py +38 -0
dao_ai/middleware/assertions.py +3 -3
dao_ai/middleware/context_editing.py +230 -0
dao_ai/middleware/core.py +4 -4
dao_ai/middleware/guardrails.py +3 -3
dao_ai/middleware/human_in_the_loop.py +3 -2
dao_ai/middleware/message_validation.py +4 -4
dao_ai/middleware/model_call_limit.py +77 -0
dao_ai/middleware/model_retry.py +121 -0
dao_ai/middleware/pii.py +157 -0
dao_ai/middleware/summarization.py +1 -1
dao_ai/middleware/tool_call_limit.py +210 -0
dao_ai/middleware/tool_retry.py +174 -0
dao_ai/middleware/tool_selector.py +129 -0
dao_ai/models.py +327 -370
dao_ai/nodes.py +9 -16
dao_ai/orchestration/core.py +33 -9
dao_ai/orchestration/supervisor.py +29 -13
dao_ai/orchestration/swarm.py +6 -1
dao_ai/{prompts.py → prompts/__init__.py} +12 -61
dao_ai/prompts/instructed_retriever_decomposition.yaml +58 -0
dao_ai/prompts/instruction_reranker.yaml +14 -0
dao_ai/prompts/router.yaml +37 -0
dao_ai/prompts/verifier.yaml +46 -0
dao_ai/providers/base.py +28 -2
dao_ai/providers/databricks.py +363 -33
dao_ai/state.py +1 -0
dao_ai/tools/__init__.py +5 -3
dao_ai/tools/genie.py +103 -26
dao_ai/tools/instructed_retriever.py +366 -0
dao_ai/tools/instruction_reranker.py +202 -0
dao_ai/tools/mcp.py +539 -97
dao_ai/tools/router.py +89 -0
dao_ai/tools/slack.py +13 -2
dao_ai/tools/sql.py +7 -3
dao_ai/tools/unity_catalog.py +32 -10
dao_ai/tools/vector_search.py +493 -160
dao_ai/tools/verifier.py +159 -0
dao_ai/utils.py +182 -2
dao_ai/vector_search.py +46 -1
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/METADATA +45 -9
dao_ai-0.1.20.dist-info/RECORD +89 -0
dao_ai/agent_as_code.py +0 -22
dao_ai/genie/cache/semantic.py +0 -970
dao_ai-0.1.2.dist-info/RECORD +0 -64
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/WHEEL +0 -0
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/licenses/LICENSE +0 -0

dao_ai/models.py CHANGED Viewed

@@ -1,7 +1,16 @@
 import uuid
 from os import PathLike
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Generator, Literal, Optional, Sequence, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Generator,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+)
 from databricks_langchain import ChatDatabricks
@@ -825,13 +834,16 @@ class LanggraphResponsesAgent(ResponsesAgent):
     ) -> None:
         self.graph = graph
-    def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    async def apredict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:
         """
+        Async version of predict - primary implementation for Databricks Apps.
         Process a ResponsesAgentRequest and return a ResponsesAgentResponse.
+        This method can be awaited directly in async contexts (e.g., MLflow AgentServer).
         Input structure (custom_inputs):
             configurable:
-                thread_id: "abc-123"        # Or conversation_id (aliases, conversation_id takes precedence)
+                thread_id: "abc-123"        # Or conversation_id (aliases)
                 user_id: "nate.fleming"
                 store_num: "87887"
             session:  # Paste from previous output
@@ -846,11 +858,11 @@ class LanggraphResponsesAgent(ResponsesAgent):
         Output structure (custom_outputs):
             configurable:
-                thread_id: "abc-123"        # Only thread_id in configurable
+                thread_id: "abc-123"
                 user_id: "nate.fleming"
                 store_num: "87887"
             session:
-                conversation_id: "abc-123"  # conversation_id in session
+                conversation_id: "abc-123"
                 genie:
                     spaces:
                         space_123: {conversation_id: "conv_456", ...}
@@ -859,12 +871,13 @@ class LanggraphResponsesAgent(ResponsesAgent):
                   arguments: {...}
                   description: "..."
         """
-        # Extract conversation_id for logging (from context or custom_inputs)
+        from langgraph.types import Command
+        # Extract conversation_id for logging
         conversation_id_for_log: str | None = None
         if request.context and hasattr(request.context, "conversation_id"):
             conversation_id_for_log = request.context.conversation_id
         elif request.custom_inputs:
-            # Check configurable or session for conversation_id
             if "configurable" in request.custom_inputs and isinstance(
                 request.custom_inputs["configurable"], dict
             ):
@@ -881,7 +894,7 @@ class LanggraphResponsesAgent(ResponsesAgent):
                 )
         logger.debug(
-            "ResponsesAgent predict called",
+            "ResponsesAgent apredict called",
             conversation_id=conversation_id_for_log
             if conversation_id_for_log
             else "new",
@@ -899,130 +912,106 @@ class LanggraphResponsesAgent(ResponsesAgent):
         # Extract session state from request
         session_input: dict[str, Any] = self._extract_session_from_request(request)
-        # Use async ainvoke internally for parallel execution
-        import asyncio
-        from langgraph.types import Command
+        try:
+            # Check if this is a resume request (HITL)
+            if request.custom_inputs and "decisions" in request.custom_inputs:
+                # Explicit structured decisions
+                decisions: list[Decision] = request.custom_inputs["decisions"]
+                logger.info(
+                    "HITL: Resuming with explicit decisions",
+                    decisions_count=len(decisions),
+                )
-        async def _async_invoke():
-            try:
-                # Check if this is a resume request (HITL)
-                # Two ways to resume:
-                # 1. Explicit decisions in custom_inputs (structured)
-                # 2. Natural language message when graph is interrupted (LLM-parsed)
-                if request.custom_inputs and "decisions" in request.custom_inputs:
-                    # Explicit structured decisions
-                    decisions: list[Decision] = request.custom_inputs["decisions"]
-                    logger.info(
-                        "HITL: Resuming with explicit decisions",
-                        decisions_count=len(decisions),
-                    )
+                # Resume interrupted graph with decisions
+                response = await self.graph.ainvoke(
+                    Command(resume={"decisions": decisions}),
+                    context=context,
+                    config=custom_inputs,
+                )
+            elif self.graph.checkpointer:
+                # Check if graph is currently interrupted
+                snapshot: StateSnapshot = await self.graph.aget_state(
+                    config=custom_inputs
+                )
+                if is_interrupted(snapshot):
+                    logger.info("HITL: Graph interrupted, checking for user response")
-                    # Resume interrupted graph with decisions
-                    return await self.graph.ainvoke(
-                        Command(resume={"decisions": decisions}),
-                        context=context,
-                        config=custom_inputs,
+                    # Convert message dicts to BaseMessage objects
+                    message_objects: list[BaseMessage] = convert_openai_messages(
+                        messages
                     )
-                # Check if graph is currently interrupted (only if checkpointer is configured)
-                # aget_state requires a checkpointer
-                if self.graph.checkpointer:
-                    snapshot: StateSnapshot = await self.graph.aget_state(
-                        config=custom_inputs
+                    # Parse user's message with LLM to extract decisions
+                    parsed_result: dict[str, Any] = handle_interrupt_response(
+                        snapshot=snapshot,
+                        messages=message_objects,
+                        model=None,
                     )
-                    if is_interrupted(snapshot):
-                        logger.info(
-                            "HITL: Graph interrupted, checking for user response"
-                        )
-                        # Convert message dicts to BaseMessage objects
-                        message_objects: list[BaseMessage] = convert_openai_messages(
-                            messages
+                    if not parsed_result.get("is_valid", False):
+                        validation_message: str = parsed_result.get(
+                            "validation_message",
+                            "Your response was unclear. Please provide a clear decision for each action.",
                         )
-                        # Parse user's message with LLM to extract decisions
-                        parsed_result: dict[str, Any] = handle_interrupt_response(
-                            snapshot=snapshot,
-                            messages=message_objects,
-                            model=None,  # Uses default model
+                        logger.warning(
+                            "HITL: Invalid response from user",
+                            validation_message=validation_message,
                         )
-                        # Check if the response was valid
-                        if not parsed_result.get("is_valid", False):
-                            validation_message: str = parsed_result.get(
-                                "validation_message",
-                                "Your response was unclear. Please provide a clear decision for each action.",
-                            )
-                            logger.warning(
-                                "HITL: Invalid response from user",
-                                validation_message=validation_message,
-                            )
-                            # Return error message to user instead of resuming
-                            # Don't resume the graph - stay interrupted so user can try again
-                            return {
-                                "messages": [
-                                    AIMessage(
-                                        content=f"❌ **Invalid Response**\n\n{validation_message}"
-                                    )
-                                ]
-                            }
-                        decisions: list[Decision] = parsed_result.get("decisions", [])
+                        # Return error message without resuming
+                        response = {
+                            "messages": [
+                                AIMessage(
+                                    content=f"❌ **Invalid Response**\n\n{validation_message}"
+                                )
+                            ]
+                        }
+                    else:
+                        decisions = parsed_result.get("decisions", [])
                         logger.info(
                             "HITL: LLM parsed valid decisions from user message",
                             decisions_count=len(decisions),
                         )
                         # Resume interrupted graph with parsed decisions
-                        return await self.graph.ainvoke(
+                        response = await self.graph.ainvoke(
                             Command(resume={"decisions": decisions}),
                             context=context,
                             config=custom_inputs,
                         )
+                else:
+                    # Normal invocation
+                    graph_input: dict[str, Any] = {"messages": messages}
+                    if "genie_conversation_ids" in session_input:
+                        graph_input["genie_conversation_ids"] = session_input[
+                            "genie_conversation_ids"
+                        ]
-                # Normal invocation - build the graph input state
-                graph_input: dict[str, Any] = {"messages": messages}
+                    response = await self.graph.ainvoke(
+                        graph_input, context=context, config=custom_inputs
+                    )
+            else:
+                # No checkpointer, use normal invocation
+                graph_input = {"messages": messages}
                 if "genie_conversation_ids" in session_input:
                     graph_input["genie_conversation_ids"] = session_input[
                         "genie_conversation_ids"
                     ]
-                    logger.trace(
-                        "Including genie conversation IDs in graph input",
-                        count=len(graph_input["genie_conversation_ids"]),
-                    )
-                return await self.graph.ainvoke(
+                response = await self.graph.ainvoke(
                     graph_input, context=context, config=custom_inputs
                 )
-            except Exception as e:
-                logger.error("Error in graph invocation", error=str(e))
-                raise
-        try:
-            loop = asyncio.get_event_loop()
-        except RuntimeError:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-        try:
-            response: dict[str, Sequence[BaseMessage]] = loop.run_until_complete(
-                _async_invoke()
-            )
         except Exception as e:
-            logger.error("Error in async execution", error=str(e))
+            logger.error("Error in graph invocation", error=str(e))
             raise
         # Convert response to ResponsesAgent format
         last_message: BaseMessage = response["messages"][-1]
-        # Build custom_outputs that can be copy-pasted as next request's custom_inputs
-        custom_outputs: dict[str, Any] = self._build_custom_outputs(
+        # Build custom_outputs
+        custom_outputs: dict[str, Any] = await self._build_custom_outputs_async(
             context=context,
             thread_id=context.thread_id,
-            loop=loop,
         )
         # Handle structured_response if present
@@ -1037,25 +1026,19 @@ class LanggraphResponsesAgent(ResponsesAgent):
                 response_type=type(structured_response).__name__,
             )
-            # Serialize to dict for JSON compatibility using type hints
             if isinstance(structured_response, BaseModel):
-                # Pydantic model
                 serialized: dict[str, Any] = structured_response.model_dump()
             elif is_dataclass(structured_response):
-                # Dataclass
                 serialized = asdict(structured_response)
             elif isinstance(structured_response, dict):
-                # Already a dict
                 serialized = structured_response
             else:
-                # Unknown type, convert to dict if possible
                 serialized = (
                     dict(structured_response)
                     if hasattr(structured_response, "__dict__")
                     else structured_response
                 )
-            # Place structured output in message content as JSON
             import json
             structured_text: str = json.dumps(serialized, indent=2)
@@ -1064,22 +1047,18 @@ class LanggraphResponsesAgent(ResponsesAgent):
             )
             logger.trace("Structured response placed in message content")
         else:
-            # No structured response, use text content
             output_item = self.create_text_output_item(
                 text=last_message.content, id=f"msg_{uuid.uuid4().hex[:8]}"
             )
-        # Include interrupt structure if HITL occurred (following LangChain pattern)
+        # Include interrupt structure if HITL occurred
         if "__interrupt__" in response:
             interrupts: list[Interrupt] = response["__interrupt__"]
             logger.info("HITL: Interrupts detected", interrupts_count=len(interrupts))
-            # Extract HITLRequest structures from interrupts (deduplicate by ID)
             seen_interrupt_ids: set[str] = set()
             interrupt_data: list[HITLRequest] = []
-            interrupt: Interrupt
             for interrupt in interrupts:
-                # Only process each unique interrupt once
                 if interrupt.id not in seen_interrupt_ids:
                     seen_interrupt_ids.add(interrupt.id)
                     interrupt_data.append(_extract_interrupt_value(interrupt))
@@ -1093,7 +1072,6 @@ class LanggraphResponsesAgent(ResponsesAgent):
                 interrupts_count=len(interrupt_data),
             )
-            # Add user-facing message about the pending actions
             action_message: str = _format_action_requests_message(interrupt_data)
             if action_message:
                 output_item = self.create_text_output_item(
@@ -1104,21 +1082,25 @@ class LanggraphResponsesAgent(ResponsesAgent):
             output=[output_item], custom_outputs=custom_outputs
         )
-    def predict_stream(
+    async def apredict_stream(
         self, request: ResponsesAgentRequest
-    ) -> Generator[ResponsesAgentStreamEvent, None, None]:
+    ) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
         """
+        Async version of predict_stream - primary implementation for Databricks Apps.
         Process a ResponsesAgentRequest and yield ResponsesAgentStreamEvent objects.
+        This method can be used directly with async for loops in async contexts.
-        Uses same input/output structure as predict() for consistency.
+        Uses same input/output structure as apredict() for consistency.
         Supports Human-in-the-Loop (HITL) interrupts.
         """
-        # Extract conversation_id for logging (from context or custom_inputs)
+        from langgraph.types import Command
+        # Extract conversation_id for logging
         conversation_id_for_log: str | None = None
         if request.context and hasattr(request.context, "conversation_id"):
             conversation_id_for_log = request.context.conversation_id
         elif request.custom_inputs:
-            # Check configurable or session for conversation_id
             if "configurable" in request.custom_inputs and isinstance(
                 request.custom_inputs["configurable"], dict
             ):
@@ -1135,7 +1117,7 @@ class LanggraphResponsesAgent(ResponsesAgent):
                 )
         logger.debug(
-            "ResponsesAgent predict_stream called",
+            "ResponsesAgent apredict_stream called",
             conversation_id=conversation_id_for_log
             if conversation_id_for_log
             else "new",
@@ -1153,305 +1135,280 @@ class LanggraphResponsesAgent(ResponsesAgent):
         # Extract session state from request
         session_input: dict[str, Any] = self._extract_session_from_request(request)
-        # Use async astream internally for parallel execution
-        import asyncio
-        from langgraph.types import Command
-        async def _async_stream():
-            item_id: str = f"msg_{uuid.uuid4().hex[:8]}"
-            accumulated_content: str = ""
-            interrupt_data: list[HITLRequest] = []
-            seen_interrupt_ids: set[str] = set()  # Track processed interrupt IDs
-            structured_response: Any = None  # Track structured output from stream
+        item_id: str = f"msg_{uuid.uuid4().hex[:8]}"
+        accumulated_content: str = ""
+        interrupt_data: list[HITLRequest] = []
+        seen_interrupt_ids: set[str] = set()
+        structured_response: Any = None
-            try:
-                # Check if this is a resume request (HITL)
-                # Two ways to resume:
-                # 1. Explicit decisions in custom_inputs (structured)
-                # 2. Natural language message when graph is interrupted (LLM-parsed)
-                if request.custom_inputs and "decisions" in request.custom_inputs:
-                    # Explicit structured decisions
-                    decisions: list[Decision] = request.custom_inputs["decisions"]
+        try:
+            # Check if this is a resume request (HITL)
+            if request.custom_inputs and "decisions" in request.custom_inputs:
+                decisions: list[Decision] = request.custom_inputs["decisions"]
+                logger.info(
+                    "HITL: Resuming stream with explicit decisions",
+                    decisions_count=len(decisions),
+                )
+                stream_input: Command | dict[str, Any] = Command(
+                    resume={"decisions": decisions}
+                )
+            elif self.graph.checkpointer:
+                snapshot: StateSnapshot = await self.graph.aget_state(
+                    config=custom_inputs
+                )
+                if is_interrupted(snapshot):
                     logger.info(
-                        "HITL: Resuming stream with explicit decisions",
-                        decisions_count=len(decisions),
+                        "HITL: Graph interrupted, checking for user response in stream"
                     )
-                    stream_input: Command | dict[str, Any] = Command(
-                        resume={"decisions": decisions}
+                    message_objects: list[BaseMessage] = convert_openai_messages(
+                        messages
                     )
-                elif self.graph.checkpointer:
-                    # Check if graph is currently interrupted (only if checkpointer is configured)
-                    # aget_state requires a checkpointer
-                    snapshot: StateSnapshot = await self.graph.aget_state(
-                        config=custom_inputs
+                    parsed_result: dict[str, Any] = handle_interrupt_response(
+                        snapshot=snapshot,
+                        messages=message_objects,
+                        model=None,
                     )
-                    if is_interrupted(snapshot):
-                        logger.info(
-                            "HITL: Graph interrupted, checking for user response in stream"
-                        )
-                        # Convert message dicts to BaseMessage objects
-                        message_objects: list[BaseMessage] = convert_openai_messages(
-                            messages
+                    if not parsed_result.get("is_valid", False):
+                        validation_message: str = parsed_result.get(
+                            "validation_message",
+                            "Your response was unclear. Please provide a clear decision for each action.",
                         )
-                        # Parse user's message with LLM to extract decisions
-                        parsed_result: dict[str, Any] = handle_interrupt_response(
-                            snapshot=snapshot,
-                            messages=message_objects,
-                            model=None,  # Uses default model
+                        logger.warning(
+                            "HITL: Invalid response from user in stream",
+                            validation_message=validation_message,
                         )
-                        # Check if the response was valid
-                        if not parsed_result.get("is_valid", False):
-                            validation_message: str = parsed_result.get(
-                                "validation_message",
-                                "Your response was unclear. Please provide a clear decision for each action.",
-                            )
-                            logger.warning(
-                                "HITL: Invalid response from user in stream",
-                                validation_message=validation_message,
-                            )
-                            # Build custom_outputs before returning
-                            custom_outputs: dict[
-                                str, Any
-                            ] = await self._build_custom_outputs_async(
-                                context=context,
-                                thread_id=context.thread_id,
-                            )
-                            # Yield error message to user - don't resume graph
-                            error_message: str = (
-                                f"❌ **Invalid Response**\n\n{validation_message}"
-                            )
-                            accumulated_content = error_message
-                            yield ResponsesAgentStreamEvent(
-                                type="response.output_item.done",
-                                item=self.create_text_output_item(
-                                    text=error_message, id=item_id
-                                ),
-                                custom_outputs=custom_outputs,
-                            )
-                            return  # Don't resume - stay interrupted
-                        decisions: list[Decision] = parsed_result.get("decisions", [])
-                        logger.info(
-                            "HITL: LLM parsed valid decisions from user message in stream",
-                            decisions_count=len(decisions),
+                        custom_outputs: dict[
+                            str, Any
+                        ] = await self._build_custom_outputs_async(
+                            context=context,
+                            thread_id=context.thread_id,
                         )
-                        # Resume interrupted graph with parsed decisions
-                        stream_input: Command | dict[str, Any] = Command(
-                            resume={"decisions": decisions}
+                        error_message: str = (
+                            f"❌ **Invalid Response**\n\n{validation_message}"
                         )
-                    else:
-                        # Graph not interrupted, use normal invocation
-                        graph_input: dict[str, Any] = {"messages": messages}
-                        if "genie_conversation_ids" in session_input:
-                            graph_input["genie_conversation_ids"] = session_input[
-                                "genie_conversation_ids"
-                            ]
-                        stream_input: Command | dict[str, Any] = graph_input
+                        yield ResponsesAgentStreamEvent(
+                            type="response.output_item.done",
+                            item=self.create_text_output_item(
+                                text=error_message, id=item_id
+                            ),
+                            custom_outputs=custom_outputs,
+                        )
+                        return
+                    decisions = parsed_result.get("decisions", [])
+                    logger.info(
+                        "HITL: LLM parsed valid decisions from user message in stream",
+                        decisions_count=len(decisions),
+                    )
+                    stream_input = Command(resume={"decisions": decisions})
                 else:
-                    # No checkpointer, use normal invocation
                     graph_input: dict[str, Any] = {"messages": messages}
                     if "genie_conversation_ids" in session_input:
                         graph_input["genie_conversation_ids"] = session_input[
                             "genie_conversation_ids"
                         ]
-                    stream_input: Command | dict[str, Any] = graph_input
+                    stream_input = graph_input
+            else:
+                graph_input = {"messages": messages}
+                if "genie_conversation_ids" in session_input:
+                    graph_input["genie_conversation_ids"] = session_input[
+                        "genie_conversation_ids"
+                    ]
+                stream_input = graph_input
-                # Stream the graph execution with both messages and updates modes to capture interrupts
-                async for nodes, stream_mode, data in self.graph.astream(
-                    stream_input,
-                    context=context,
-                    config=custom_inputs,
-                    stream_mode=["messages", "updates"],
-                    subgraphs=True,
-                ):
-                    nodes: tuple[str, ...]
-                    stream_mode: str
-                    # Handle message streaming
-                    if stream_mode == "messages":
-                        messages_batch: Sequence[BaseMessage] = data
-                        message: BaseMessage
-                        for message in messages_batch:
-                            if (
-                                isinstance(
-                                    message,
-                                    (
-                                        AIMessageChunk,
-                                        AIMessage,
-                                    ),
-                                )
-                                and message.content
-                                and "summarization" not in nodes
-                            ):
-                                content: str = message.content
-                                accumulated_content += content
-                                # Yield streaming delta
-                                yield ResponsesAgentStreamEvent(
-                                    **self.create_text_delta(
-                                        delta=content, item_id=item_id
-                                    )
-                                )
+            # Stream the graph execution
+            async for nodes, stream_mode, data in self.graph.astream(
+                stream_input,
+                context=context,
+                config=custom_inputs,
+                stream_mode=["messages", "updates"],
+                subgraphs=True,
+            ):
+                nodes: tuple[str, ...]
+                stream_mode: str
-                    # Handle interrupts (HITL) and state updates
-                    elif stream_mode == "updates":
-                        updates: dict[str, Any] = data
-                        source: str
-                        update: Any
-                        for source, update in updates.items():
-                            if source == "__interrupt__":
-                                interrupts: list[Interrupt] = update
-                                logger.info(
-                                    "HITL: Interrupts detected during streaming",
-                                    interrupts_count=len(interrupts),
-                                )
+                if stream_mode == "messages":
+                    messages_batch: Sequence[BaseMessage] = data
+                    for message in messages_batch:
+                        if (
+                            isinstance(message, (AIMessageChunk, AIMessage))
+                            and message.content
+                            and "summarization" not in nodes
+                        ):
+                            content: str = message.content
+                            accumulated_content += content
-                                # Extract interrupt values (deduplicate by ID)
-                                interrupt: Interrupt
-                                for interrupt in interrupts:
-                                    # Only process each unique interrupt once
-                                    if interrupt.id not in seen_interrupt_ids:
-                                        seen_interrupt_ids.add(interrupt.id)
-                                        interrupt_data.append(
-                                            _extract_interrupt_value(interrupt)
-                                        )
-                                        logger.trace(
-                                            "HITL: Added interrupt to response",
-                                            interrupt_id=interrupt.id,
-                                        )
-                            elif (
-                                isinstance(update, dict)
-                                and "structured_response" in update
-                            ):
-                                # Capture structured_response from stream updates
-                                structured_response = update["structured_response"]
-                                logger.trace(
-                                    "Captured structured response from stream",
-                                    response_type=type(structured_response).__name__,
-                                )
+                            yield ResponsesAgentStreamEvent(
+                                **self.create_text_delta(delta=content, item_id=item_id)
+                            )
-                # Get final state to extract structured_response (only if checkpointer available)
-                if self.graph.checkpointer:
-                    final_state: StateSnapshot = await self.graph.aget_state(
-                        config=custom_inputs
-                    )
-                    # Extract structured_response from state if not already captured
-                    if (
-                        "structured_response" in final_state.values
-                        and not structured_response
-                    ):
-                        structured_response = final_state.values["structured_response"]
+                elif stream_mode == "updates":
+                    updates: dict[str, Any] = data
+                    for source, update in updates.items():
+                        if source == "__interrupt__":
+                            interrupts: list[Interrupt] = update
+                            logger.info(
+                                "HITL: Interrupts detected during streaming",
+                                interrupts_count=len(interrupts),
+                            )
-                # Build custom_outputs
-                custom_outputs: dict[str, Any] = await self._build_custom_outputs_async(
-                    context=context,
-                    thread_id=context.thread_id,
+                            for interrupt in interrupts:
+                                if interrupt.id not in seen_interrupt_ids:
+                                    seen_interrupt_ids.add(interrupt.id)
+                                    interrupt_data.append(
+                                        _extract_interrupt_value(interrupt)
+                                    )
+                                    logger.trace(
+                                        "HITL: Added interrupt to response",
+                                        interrupt_id=interrupt.id,
+                                    )
+                        elif (
+                            isinstance(update, dict) and "structured_response" in update
+                        ):
+                            structured_response = update["structured_response"]
+                            logger.trace(
+                                "Captured structured response from stream",
+                                response_type=type(structured_response).__name__,
+                            )
+            # Get final state if checkpointer available
+            if self.graph.checkpointer:
+                final_state: StateSnapshot = await self.graph.aget_state(
+                    config=custom_inputs
                 )
+                if (
+                    "structured_response" in final_state.values
+                    and not structured_response
+                ):
+                    structured_response = final_state.values["structured_response"]
+            # Build custom_outputs
+            custom_outputs = await self._build_custom_outputs_async(
+                context=context,
+                thread_id=context.thread_id,
+            )
-                # Handle structured_response in streaming if present
-                output_text: str = accumulated_content
-                if structured_response:
-                    from dataclasses import asdict, is_dataclass
+            # Handle structured_response in streaming
+            output_text: str = accumulated_content
+            if structured_response:
+                from dataclasses import asdict, is_dataclass
-                    from pydantic import BaseModel
+                from pydantic import BaseModel
-                    logger.trace(
-                        "Processing structured response in streaming",
-                        response_type=type(structured_response).__name__,
+                logger.trace(
+                    "Processing structured response in streaming",
+                    response_type=type(structured_response).__name__,
+                )
+                if isinstance(structured_response, BaseModel):
+                    serialized: dict[str, Any] = structured_response.model_dump()
+                elif is_dataclass(structured_response):
+                    serialized = asdict(structured_response)
+                elif isinstance(structured_response, dict):
+                    serialized = structured_response
+                else:
+                    serialized = (
+                        dict(structured_response)
+                        if hasattr(structured_response, "__dict__")
+                        else structured_response
                     )
-                    # Serialize to dict for JSON compatibility using type hints
-                    if isinstance(structured_response, BaseModel):
-                        serialized: dict[str, Any] = structured_response.model_dump()
-                    elif is_dataclass(structured_response):
-                        serialized = asdict(structured_response)
-                    elif isinstance(structured_response, dict):
-                        serialized = structured_response
-                    else:
-                        serialized = (
-                            dict(structured_response)
-                            if hasattr(structured_response, "__dict__")
-                            else structured_response
-                        )
+                import json
-                    # Place structured output in message content - stream as JSON
-                    import json
+                structured_text: str = json.dumps(serialized, indent=2)
-                    structured_text: str = json.dumps(serialized, indent=2)
+                if accumulated_content.strip():
+                    yield ResponsesAgentStreamEvent(
+                        **self.create_text_delta(delta="\n\n", item_id=item_id)
+                    )
+                    yield ResponsesAgentStreamEvent(
+                        **self.create_text_delta(delta=structured_text, item_id=item_id)
+                    )
+                    output_text = f"{accumulated_content}\n\n{structured_text}"
+                else:
+                    yield ResponsesAgentStreamEvent(
+                        **self.create_text_delta(delta=structured_text, item_id=item_id)
+                    )
+                    output_text = structured_text
-                    # If we streamed text, append structured; if no text, use structured only
-                    if accumulated_content.strip():
-                        # Stream separator and structured output
-                        yield ResponsesAgentStreamEvent(
-                            **self.create_text_delta(delta="\n\n", item_id=item_id)
-                        )
+                logger.trace("Streamed structured response in message content")
+            # Include interrupt structure if HITL occurred
+            if interrupt_data:
+                custom_outputs["interrupts"] = interrupt_data
+                logger.info(
+                    "HITL: Included interrupts in streaming response",
+                    interrupts_count=len(interrupt_data),
+                )
+                action_message = _format_action_requests_message(interrupt_data)
+                if action_message:
+                    if not accumulated_content:
+                        output_text = action_message
                         yield ResponsesAgentStreamEvent(
                             **self.create_text_delta(
-                                delta=structured_text, item_id=item_id
+                                delta=action_message, item_id=item_id
                             )
                         )
-                        output_text = f"{accumulated_content}\n\n{structured_text}"
                     else:
-                        # No text content, stream structured output
+                        output_text = f"{accumulated_content}\n\n{action_message}"
+                        yield ResponsesAgentStreamEvent(
+                            **self.create_text_delta(delta="\n\n", item_id=item_id)
+                        )
                         yield ResponsesAgentStreamEvent(
                             **self.create_text_delta(
-                                delta=structured_text, item_id=item_id
+                                delta=action_message, item_id=item_id
                             )
                         )
-                        output_text = structured_text
-                    logger.trace("Streamed structured response in message content")
+            # Yield final output item
+            yield ResponsesAgentStreamEvent(
+                type="response.output_item.done",
+                item=self.create_text_output_item(text=output_text, id=item_id),
+                custom_outputs=custom_outputs,
+            )
+        except Exception as e:
+            logger.error("Error in graph streaming", error=str(e))
+            raise
-                # Include interrupt structure if HITL occurred
-                if interrupt_data:
-                    custom_outputs["interrupts"] = interrupt_data
-                    logger.info(
-                        "HITL: Included interrupts in streaming response",
-                        interrupts_count=len(interrupt_data),
-                    )
+    def predict(self, request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+        """
+        Synchronous wrapper for apredict().
-                    # Add user-facing message about the pending actions
-                    action_message = _format_action_requests_message(interrupt_data)
-                    if action_message:
-                        # If we haven't streamed any content yet, stream the action message
-                        if not accumulated_content:
-                            output_text = action_message
-                            # Stream the action message
-                            yield ResponsesAgentStreamEvent(
-                                **self.create_text_delta(
-                                    delta=action_message, item_id=item_id
-                                )
-                            )
-                        else:
-                            # Append action message after accumulated content
-                            output_text = f"{accumulated_content}\n\n{action_message}"
-                            # Stream the separator and action message
-                            yield ResponsesAgentStreamEvent(
-                                **self.create_text_delta(delta="\n\n", item_id=item_id)
-                            )
-                            yield ResponsesAgentStreamEvent(
-                                **self.create_text_delta(
-                                    delta=action_message, item_id=item_id
-                                )
-                            )
+        Process a ResponsesAgentRequest and return a ResponsesAgentResponse.
+        For async contexts (e.g., Databricks Apps), use apredict() directly.
-                # Yield final output item
-                yield ResponsesAgentStreamEvent(
-                    type="response.output_item.done",
-                    item=self.create_text_output_item(text=output_text, id=item_id),
-                    custom_outputs=custom_outputs,
-                )
-            except Exception as e:
-                logger.error("Error in graph streaming", error=str(e))
-                raise
+        Note: This method uses asyncio.run() internally, which will fail in contexts
+        where an event loop is already running (e.g., uvloop). For those cases,
+        use apredict() instead.
+        """
+        import asyncio
+        logger.debug("ResponsesAgent predict called (sync wrapper)")
+        return asyncio.run(self.apredict(request))
+    def predict_stream(
+        self, request: ResponsesAgentRequest
+    ) -> Generator[ResponsesAgentStreamEvent, None, None]:
+        """
+        Synchronous wrapper for apredict_stream().
+        Process a ResponsesAgentRequest and yield ResponsesAgentStreamEvent objects.
+        For async contexts (e.g., Databricks Apps), use apredict_stream() directly.
+        Note: This method converts the async generator to a sync generator using
+        event loop manipulation. For contexts where an event loop is already running
+        (e.g., uvloop), use apredict_stream() instead.
+        """
+        import asyncio
+        logger.debug("ResponsesAgent predict_stream called (sync wrapper)")
         # Convert async generator to sync generator
         try:
@@ -1460,7 +1417,7 @@ class LanggraphResponsesAgent(ResponsesAgent):
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
-        async_gen = _async_stream()
+        async_gen = self.apredict_stream(request)
         try:
             while True:

dao-ai 0.1.2__py3-none-any.whl → 0.1.20__py3-none-any.whl

dao-ai 0.1.2py3-none-any.whl → 0.1.20py3-none-any.whl