PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.27__py3-none-any.whl → 2.0.29__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.27py3-none-any.whl → 2.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

agent_server/langchain/custom_middleware.py CHANGED Viewed

@@ -12,7 +12,9 @@ import uuid
 from typing import Any, Dict, Optional
 from json_repair import repair_json
-from langchain_core.messages import AIMessage, HumanMessage
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+from langgraph.types import Command
 from agent_server.langchain.logging_utils import (
     _format_middleware_marker,
@@ -25,6 +27,92 @@ from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
 logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# TodoActiveMiddleware — manages todo_active state field
+# ---------------------------------------------------------------------------
+class TodoActiveMiddleware(AgentMiddleware):
+    """Middleware that manages the `todo_active` state field.
+    Intercepts write_todos and final_summary_tool calls to set/clear
+    the todo_active flag in LangGraph state via Command.
+    - write_todos called → todo_active = True
+    - final_summary_tool called → todo_active = False
+    This flag is checked by handle_empty_response and continuation_control
+    middlewares to decide whether to force continuation or let the LLM
+    terminate naturally (for simple 1-2 step tasks).
+    """
+    def wrap_tool_call(self, request, handler):
+        """Intercept tool calls to manage todo_active state."""
+        result = handler(request)
+        tool_name = request.tool_call.get("name", "")
+        if tool_name == "write_todos":
+            return self._wrap_with_todo_active(request, result, active=True)
+        elif tool_name in ("final_summary_tool", "final_summary"):
+            return self._wrap_with_todo_active(request, result, active=False)
+        return result
+    def _wrap_with_todo_active(self, request, result, active: bool):
+        """Wrap tool result in a Command that updates todo_active state.
+        Handles two cases:
+        1. Result is already a Command (e.g., from TodoListMiddleware) → merge
+        2. Result is a ToolMessage → wrap in new Command
+        """
+        try:
+            if isinstance(result, Command):
+                # Merge todo_active into existing Command's update dict
+                existing_update = (
+                    result.update if hasattr(result, "update") and result.update else {}
+                )
+                merged_update = {**existing_update, "todo_active": active}
+                logger.info(
+                    "[TodoActive] Merged todo_active=%s into Command for tool '%s'",
+                    active,
+                    request.tool_call.get("name", ""),
+                )
+                return Command(update=merged_update)
+            elif isinstance(result, ToolMessage):
+                # Wrap ToolMessage in a new Command
+                logger.info(
+                    "[TodoActive] Wrapped ToolMessage in Command with todo_active=%s for tool '%s'",
+                    active,
+                    request.tool_call.get("name", ""),
+                )
+                return Command(
+                    update={
+                        "todo_active": active,
+                        "messages": [result],
+                    }
+                )
+            else:
+                # Unknown result type — wrap as ToolMessage
+                tool_call_id = request.tool_call.get("id", "")
+                content = str(result) if result else ""
+                logger.info(
+                    "[TodoActive] Wrapped unknown result type (%s) in Command with todo_active=%s",
+                    type(result).__name__,
+                    active,
+                )
+                return Command(
+                    update={
+                        "todo_active": active,
+                        "messages": [
+                            ToolMessage(content=content, tool_call_id=tool_call_id)
+                        ],
+                    }
+                )
+        except Exception as e:
+            logger.warning("[TodoActive] Failed to set todo_active=%s: %s", active, e)
+            return result
 def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
     """Parse JSON tool call from text response.
@@ -262,6 +350,31 @@ def create_handle_empty_response_middleware(wrap_model_call):
     def handle_empty_response(request, handler):
         max_retries = 2
+        # Guard: If final_summary_tool was already called, stop the agent immediately.
+        # This is independent of todo status (LLM may call final_summary before
+        # marking all todos as completed).
+        todo_active = request.state.get("todo_active", False)
+        if not todo_active:
+            messages = request.messages
+            # Find last REAL HumanMessage index
+            _last_human = -1
+            for _i, _msg in enumerate(messages):
+                _mtype = getattr(_msg, "type", "") or type(_msg).__name__
+                if _mtype in ("human", "HumanMessage"):
+                    _mcontent = getattr(_msg, "content", "") or ""
+                    if not _mcontent.startswith("[SYSTEM]"):
+                        _last_human = _i
+            _msgs_after = (
+                messages[_last_human + 1 :] if _last_human >= 0 else messages[-10:]
+            )
+            for _msg in _msgs_after:
+                _name = getattr(_msg, "name", "") or ""
+                if _name in ("final_summary_tool", "final_summary"):
+                    logger.info(
+                        "final_summary_tool already executed and todo_active=False - stopping agent (no LLM call)"
+                    )
+                    return AIMessage(content="", tool_calls=[])
         # Check if all todos are completed - if so, return empty response to stop agent
         # Method 1: Check state.todos
         todos = request.state.get("todos", [])
@@ -297,8 +410,15 @@ def create_handle_empty_response_middleware(wrap_model_call):
                     else messages[-10:]
                 )
                 for msg in messages_to_check:
+                    # Check ToolMessage name for final_summary_tool
+                    msg_name = getattr(msg, "name", "") or ""
+                    if msg_name in ("final_summary_tool", "final_summary"):
+                        summary_exists = True
+                        break
                     content = getattr(msg, "content", "") or ""
-                    if '"summary"' in content and '"next_items"' in content:
+                    if ('"summary"' in content and '"next_items"' in content) or (
+                        "'summary'" in content and "'next_items'" in content
+                    ):
                         summary_exists = True
                         break
@@ -343,8 +463,15 @@ def create_handle_empty_response_middleware(wrap_model_call):
         messages = request.messages
         summary_exists = False
         for msg in messages[-15:]:
+            # Check ToolMessage name for final_summary_tool
+            msg_name = getattr(msg, "name", "") or ""
+            if msg_name in ("final_summary_tool", "final_summary"):
+                summary_exists = True
+                break
             msg_content = getattr(msg, "content", "") or ""
-            if '"summary"' in msg_content and '"next_items"' in msg_content:
+            if ('"summary"' in msg_content and '"next_items"' in msg_content) or (
+                "'summary'" in msg_content and "'next_items'" in msg_content
+            ):
                 summary_exists = True
                 break
             if any(
@@ -583,6 +710,14 @@ def create_handle_empty_response_middleware(wrap_model_call):
             # Invalid response - retry with JSON schema prompt
             if response_message and attempt < max_retries:
+                # todo_active=False → LLM can terminate naturally (simple tasks)
+                todo_active = request.state.get("todo_active", False)
+                if not todo_active:
+                    logger.info(
+                        "todo_active=False - skipping retry, allowing LLM natural termination"
+                    )
+                    return response
                 reason = "text-only" if has_content else "empty"
                 json_prompt = _build_json_prompt(request, response_message, has_content)
@@ -776,23 +911,38 @@ def _build_json_prompt(request, response_message, has_content):
             f"Example: {example_json}"
         )
     elif not todos:
-        # No todos yet = new task starting, LLM must create todos or call a tool
-        # This happens when LLM returns empty response at the start of a new task
-        logger.info("No todos exist yet - forcing retry to create todos or call tool")
-        return (
-            f"{JSON_TOOL_SCHEMA}\n\n"
-            f"Your response was empty. You MUST call a tool to proceed.\n"
-            f"한국어로 응답하고, write_todos로 작업 목록을 만들거나 jupyter_cell_tool/read_file_tool을 호출하세요.\n"
-            f'Example: {{"tool": "write_todos", "arguments": {{"todos": [{{"content": "데이터 분석", "status": "in_progress"}}]}}}}'
+        # No todos → simple task (1-2 steps), don't force write_todos creation
+        # This was the DIRECT CAUSE of the simple-task infinite loop:
+        # LLM completes simple task → empty response → forced to create todos → loop
+        logger.info(
+            "No todos exist - simple task, skipping retry (no write_todos forcing)"
         )
+        return None  # Signal to skip retry — LLM terminates naturally
     else:
-        # Todos exist but all completed - ask for summary
-        logger.info("All todos completed but response empty - asking for summary")
+        # Todos exist but all completed
+        # Check if final_summary_tool was already called in message history
+        messages = getattr(request, "messages", [])
+        final_summary_already_called = any(
+            getattr(msg, "name", "") in ("final_summary_tool", "final_summary")
+            for msg in messages
+        )
+        if final_summary_already_called:
+            logger.info(
+                "All todos completed and final_summary_tool already called - "
+                "signaling skip (no more retries needed)"
+            )
+            return None  # Signal to skip retry and synthesize completion
+        logger.info(
+            "All todos completed but response empty - asking for final_summary_tool"
+        )
         return (
             f"{JSON_TOOL_SCHEMA}\n\n"
-            f"All tasks completed. Call markdown_tool to provide a summary in Korean.\n"
-            f"한국어로 작업 요약을 작성하세요.\n"
-            f'Example: {{"tool": "markdown_tool", "arguments": {{"content": "작업이 완료되었습니다."}}}}'
+            f"All tasks completed. Call final_summary_tool to provide a summary.\n"
+            f"final_summary_tool(summary='완료된 작업 요약', "
+            f"next_items=[{{'subject': '제목', 'description': '설명'}}, ...]) "
+            f"(next_items 3개 이상 필수).\n"
+            f"텍스트로 JSON을 출력하지 말고, 반드시 도구 호출로 실행하세요."
         )
@@ -1020,8 +1170,31 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
                                 tool_call["args"], dict
                             ):
                                 args = tool_call["args"]
-                                # Normalize list arguments to strings for str-typed params
+                                # Normalize non-string arguments for str-typed params
                                 for key, value in args.items():
+                                    # Convert dict to string/None for str-typed params
+                                    # LLM sometimes sends {} instead of null for Optional[str]
+                                    if key in string_params and isinstance(value, dict):
+                                        if not value:  # Empty dict {}
+                                            logger.info(
+                                                "Converted empty dict to None for '%s' in tool '%s'",
+                                                key,
+                                                tool_name,
+                                            )
+                                            args[key] = None
+                                        else:
+                                            # Non-empty dict → JSON string
+                                            json_str = json.dumps(
+                                                value, ensure_ascii=False
+                                            )
+                                            logger.info(
+                                                "Converted dict to JSON string for '%s' in tool '%s': %s",
+                                                key,
+                                                tool_name,
+                                                json_str[:100],
+                                            )
+                                            args[key] = json_str
                                     if key in string_params and isinstance(value, list):
                                         # Join list items into a single string
                                         text_parts = []
@@ -1150,10 +1323,18 @@ def create_continuation_control_middleware(wrap_model_call):
             else messages[-15:]
         )
         for msg in messages_to_check:
+            # Check if this is a ToolMessage from final_summary_tool
+            msg_name = getattr(msg, "name", "") or ""
+            if msg_name in ("final_summary_tool", "final_summary"):
+                return True
             msg_content = getattr(msg, "content", "") or ""
-            # Check for summary JSON
+            # Check for summary JSON (double quotes)
             if '"summary"' in msg_content and '"next_items"' in msg_content:
                 return True
+            # Check for summary Python str (single quotes from tool output)
+            if "'summary'" in msg_content and "'next_items'" in msg_content:
+                return True
             # Check for markdown summary (common patterns)
             if any(
                 kw in msg_content
@@ -1203,6 +1384,24 @@ def create_continuation_control_middleware(wrap_model_call):
                         pass
                 if tool_name in NON_HITL_TOOLS:
+                    # GUARD: Skip forcing when final_summary_tool already ran
+                    if tool_name in ("final_summary_tool", "final_summary"):
+                        logger.info(
+                            "final_summary_tool already executed - "
+                            "skipping continuation (preventing infinite loop)"
+                        )
+                        return handler(request)
+                    # GUARD: todo_active=False → simple task, skip continuation
+                    todo_active = request.state.get("todo_active", False)
+                    if not todo_active:
+                        logger.info(
+                            "todo_active=False after tool '%s' - "
+                            "simple task, skipping continuation",
+                            tool_name,
+                        )
+                        return handler(request)
                     todos = request.state.get("todos", [])
                     last_real_human_idx = _find_last_real_human_idx(messages)
@@ -1237,36 +1436,60 @@ def create_continuation_control_middleware(wrap_model_call):
                         tool_name,
                     )
-                    # Skip continuation injection for write_todos
-                    # This prevents auto-continuation to next task after completing one
-                    # Agent will decide next action based on its own reasoning
-                    if tool_name == "write_todos":
+                    # === State-based branching: todos 유무로 분기 ===
+                    #
+                    # (1) todos 없음 → 간단한 1~2단계 작업 → continuation 불필요
+                    # (2) todos 있음 + 미완료 → 다음 작업 유도
+                    # (3) todos 있음 + 전부 완료 → final_summary_tool 호출 유도
+                    #
+                    if not todos:
+                        # No todos in state → simple task (1~2 steps)
+                        # Don't inject any continuation — LLM finishes naturally.
                         logger.info(
-                            "Skipping continuation prompt after write_todos - "
-                            "agent decides next action (pending: %d)",
-                            len(pending_todos) if pending_todos else 0,
+                            "No todos in state after tool: %s - "
+                            "simple task, skipping continuation",
+                            tool_name,
                         )
-                        # Don't inject continuation - let agent naturally continue or stop
                     elif pending_todos:
-                        pending_list = ", ".join(
-                            t.get("content", "")[:30] for t in pending_todos[:3]
-                        )
-                        continuation = (
-                            f"Tool '{tool_name}' completed. "
-                            f"Continue with pending tasks: {pending_list}. "
-                            f"Call jupyter_cell_tool or the next appropriate tool."
-                        )
-                        new_messages = list(messages) + [
-                            HumanMessage(content=f"[SYSTEM] {continuation}")
-                        ]
-                        request = request.override(messages=new_messages)
+                        # Todos exist with pending items → guide to next task
+                        if tool_name == "write_todos":
+                            # write_todos with pending items → agent manages its own flow
+                            logger.info(
+                                "write_todos with %d pending todos - "
+                                "agent manages own flow",
+                                len(pending_todos),
+                            )
+                        else:
+                            pending_list = ", ".join(
+                                t.get("content", "")[:30] for t in pending_todos[:3]
+                            )
+                            continuation = (
+                                f"Tool '{tool_name}' completed. "
+                                f"Continue with pending tasks: {pending_list}. "
+                                f"Call jupyter_cell_tool or the next appropriate tool."
+                            )
+                            new_messages = list(messages) + [
+                                HumanMessage(content=f"[SYSTEM] {continuation}")
+                            ]
+                            request = request.override(messages=new_messages)
                     else:
+                        # All todos completed → prompt for final_summary_tool
+                        logger.info(
+                            "All %d todos completed after tool: %s - "
+                            "prompting for final_summary_tool",
+                            len(todos),
+                            tool_name,
+                        )
                         continuation = (
-                            f"Tool '{tool_name}' completed. "
-                            f"Create a todo list with write_todos if needed."
+                            "[SYSTEM] 모든 작업이 완료되었습니다. "
+                            "반드시 final_summary_tool을 호출하여 작업 요약과 다음 단계를 제시하세요. "
+                            "final_summary_tool(summary='완료된 작업 요약', "
+                            "next_items=[{'subject': '제목', 'description': '설명'}, ...]) "
+                            "(next_items 3개 이상 필수). "
+                            "텍스트로 JSON을 출력하지 말고, 반드시 도구 호출로 실행하세요."
                         )
                         new_messages = list(messages) + [
-                            HumanMessage(content=f"[SYSTEM] {continuation}")
+                            HumanMessage(content=continuation)
                         ]
                         request = request.override(messages=new_messages)
@@ -1287,8 +1510,10 @@ def create_continuation_control_middleware(wrap_model_call):
                 if isinstance(p, (str, dict))
             )
-        # Check if content contains summary JSON pattern
-        has_summary_json = '"summary"' in content and '"next_items"' in content
+        # Check if content contains summary JSON pattern (double or single quotes)
+        has_summary_json = ('"summary"' in content and '"next_items"' in content) or (
+            "'summary'" in content and "'next_items'" in content
+        )
         if has_summary_json:
             tool_calls = getattr(response_message, "tool_calls", []) or []

agent_server/langchain/llm_factory.py CHANGED Viewed

@@ -93,7 +93,6 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
     from langchain_openai import ChatOpenAI
     vllm_config = llm_config.get("vllm", {})
-    # User provides full base URL (e.g., https://openrouter.ai/api/v1)
     endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
     model = vllm_config.get("model", "default")
     api_key = vllm_config.get("apiKey", "dummy")
@@ -140,9 +139,11 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
 def create_summarization_llm(llm_config: Dict[str, Any]):
-    """Create LLM for summarization middleware.
+    """Create LLM for summarization middleware and /compact feature.
-    Uses the same provider as the main LLM but with simpler configuration.
+    Priority:
+    1. If llm_config["summarization"]["enabled"] is True, use that config
+    2. Otherwise, fall back to main provider with default summarization model
     Args:
         llm_config: Configuration dictionary
@@ -150,60 +151,107 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
     Returns:
         LLM instance suitable for summarization, or None if unavailable
     """
-    provider = llm_config.get("provider", "gemini")
     try:
-        if provider == "gemini":
-            from langchain_google_genai import ChatGoogleGenerativeAI
-            gemini_config = llm_config.get("gemini", {})
-            api_key = gemini_config.get("apiKey")
-            if api_key:
-                return ChatGoogleGenerativeAI(
-                    model="gemini-2.5-flash",
-                    google_api_key=api_key,
-                    temperature=0.0,
-                )
-        elif provider == "openai":
-            from langchain_openai import ChatOpenAI
-            openai_config = llm_config.get("openai", {})
-            api_key = openai_config.get("apiKey")
-            if api_key:
-                return ChatOpenAI(
-                    model="gpt-4o-mini",
-                    api_key=api_key,
-                    temperature=0.0,
-                )
-        elif provider == "vllm":
-            vllm_config = llm_config.get("vllm", {})
-            # User provides full base URL (e.g., https://openrouter.ai/api/v1)
-            endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
-            model = vllm_config.get("model", "default")
-            api_key = vllm_config.get("apiKey", "dummy")
-            # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
-            is_openrouter = "openrouter" in endpoint.lower()
-            if "gpt-oss" in model.lower() and not is_openrouter:
-                from agent_server.langchain.models import ChatGPTOSS
-                return ChatGPTOSS(
-                    model=model,
-                    base_url=endpoint,
-                    api_key=api_key,
-                    temperature=0.0,
-                )
-            from langchain_openai import ChatOpenAI
-            return ChatOpenAI(
-                model=model,
-                api_key=api_key,
-                base_url=endpoint,  # Use endpoint as-is
-                temperature=0.0,
+        # 1. Check for dedicated summarization config
+        summarization_config = llm_config.get("summarization", {})
+        if summarization_config.get("enabled"):
+            sum_provider = summarization_config.get("provider", "gemini")
+            sum_model = summarization_config.get("model")
+            logger.info(
+                f"Using dedicated summarization LLM: provider={sum_provider}, model={sum_model or 'default'}"
+            )
+            return _create_llm_for_provider(
+                llm_config, sum_provider, sum_model, for_summarization=True
             )
+        # 2. Fall back to main provider with default summarization model
+        provider = llm_config.get("provider", "gemini")
+        logger.info(f"Using main provider for summarization: {provider}")
+        return _create_llm_for_provider(
+            llm_config, provider, None, for_summarization=True
+        )
     except Exception as e:
         logger.warning(f"Failed to create summarization LLM: {e}")
         return None
-    return None
+def _create_llm_for_provider(
+    llm_config: Dict[str, Any],
+    provider: str,
+    model_override: str = None,
+    for_summarization: bool = False,
+):
+    """Create LLM instance for a specific provider.
+    Args:
+        llm_config: Full configuration dictionary (for credentials)
+        provider: Provider to use ('gemini', 'openai', 'vllm')
+        model_override: Optional model name override
+        for_summarization: If True, use lightweight default models
+    Returns:
+        LLM instance or None
+    """
+    if provider == "gemini":
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        gemini_config = llm_config.get("gemini", {})
+        api_key = gemini_config.get("apiKey")
+        if not api_key:
+            logger.warning("No Gemini API key found")
+            return None
+        model = model_override or ("gemini-2.5-flash" if for_summarization else gemini_config.get("model", "gemini-2.5-flash"))
+        return ChatGoogleGenerativeAI(
+            model=model,
+            google_api_key=api_key,
+            temperature=0.0,
+        )
+    elif provider == "openai":
+        from langchain_openai import ChatOpenAI
+        openai_config = llm_config.get("openai", {})
+        api_key = openai_config.get("apiKey")
+        if not api_key:
+            logger.warning("No OpenAI API key found")
+            return None
+        model = model_override or ("gpt-4o-mini" if for_summarization else openai_config.get("model", "gpt-4"))
+        return ChatOpenAI(
+            model=model,
+            api_key=api_key,
+            temperature=0.0,
+        )
+    elif provider == "vllm":
+        vllm_config = llm_config.get("vllm", {})
+        endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
+        api_key = vllm_config.get("apiKey", "dummy")
+        model = model_override or vllm_config.get("model", "default")
+        # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
+        is_openrouter = "openrouter" in endpoint.lower()
+        if "gpt-oss" in model.lower() and not is_openrouter:
+            from agent_server.langchain.models import ChatGPTOSS
+            return ChatGPTOSS(
+                model=model,
+                base_url=endpoint,
+                api_key=api_key,
+                temperature=0.0,
+            )
+        from langchain_openai import ChatOpenAI
+        return ChatOpenAI(
+            model=model,
+            api_key=api_key,
+            base_url=endpoint,
+            temperature=0.0,
+        )
+    else:
+        logger.warning(f"Unknown provider: {provider}")
+        return None

agent_server/langchain/logging_utils.py CHANGED Viewed

@@ -37,7 +37,7 @@ def disable_langchain_logging():
 # Auto-disable on import (comment this line to re-enable all logs)
-disable_langchain_logging()
+# disable_langchain_logging()  # TEMPORARILY ENABLED FOR DEBUGGING
 LOG_SEPARATOR = "=" * 96
 LOG_SUBSECTION = "-" * 96

agent_server/langchain/middleware/__init__.py CHANGED Viewed

@@ -3,10 +3,14 @@ Middleware Module
 Custom middleware for the multi-agent architecture:
 - SubAgentMiddleware: Handles subagent delegation via task tool
+- ContentInjectionMiddleware: Injects generated code/SQL into tool args
 - SkillMiddleware: Progressive skill loading for code generation agents
 - Existing middleware from custom_middleware.py is also available
 """
+from agent_server.langchain.middleware.content_injection_middleware import (
+    ContentInjectionMiddleware,
+)
 from agent_server.langchain.middleware.skill_middleware import (
     SkillMiddleware,
     get_skill_middleware,
@@ -18,6 +22,7 @@ from agent_server.langchain.middleware.subagent_middleware import (
 __all__ = [
     "SubAgentMiddleware",
+    "ContentInjectionMiddleware",
     "create_task_tool",
     "SkillMiddleware",
     "get_skill_middleware",

hdsp-jupyter-extension 2.0.27__py3-none-any.whl → 2.0.29__py3-none-any.whl

hdsp-jupyter-extension 2.0.27py3-none-any.whl → 2.0.29py3-none-any.whl