PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.11__py3-none-any.whl → 2.0.13__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.11py3-none-any.whl → 2.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

agent_server/langchain/custom_middleware.py CHANGED Viewed

@@ -11,10 +11,13 @@ import re
 import uuid
 from typing import Any, Dict, Optional
+from json_repair import repair_json
 from langchain_core.messages import AIMessage, HumanMessage
 from agent_server.langchain.logging_utils import (
     _format_middleware_marker,
+    _pretty_json,
+    _serialize_message,
     _with_middleware_logging,
 )
 from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
@@ -75,6 +78,15 @@ def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
         except json.JSONDecodeError:
             pass
+    # Try json-repair for malformed JSON from LLMs
+    try:
+        repaired = repair_json(text, return_objects=True)
+        if isinstance(repaired, dict) and "tool" in repaired:
+            logger.info(f"Repaired malformed JSON tool call: {repaired.get('tool')}")
+            return repaired
+    except Exception as e:
+        logger.debug(f"json-repair failed: {e}")
     return None
@@ -83,15 +95,110 @@ def normalize_tool_name(tool_name: str) -> str:
     Rules:
     - write_todos_tool → write_todos (TodoListMiddleware exception)
+    - task → task_tool (SubAgentMiddleware uses task_tool)
     - other tools without _tool suffix → add _tool suffix
     """
     if tool_name == "write_todos_tool":
         return "write_todos"
-    if not tool_name.endswith("_tool") and tool_name != "write_todos":
+    if tool_name == "task":
+        return "task_tool"
+    if not tool_name.endswith("_tool") and tool_name not in ("write_todos",):
         return f"{tool_name}_tool"
     return tool_name
+def repair_tool_call_arguments(arguments: str) -> Optional[Dict[str, Any]]:
+    """Repair malformed tool call arguments from LLMs.
+    Some LLMs (e.g., gpt-oss-120b) return arguments without leading '{' or
+    with other JSON formatting issues.
+    Args:
+        arguments: Raw arguments string from LLM
+    Returns:
+        Parsed dictionary or None if repair failed
+    """
+    if not arguments or not isinstance(arguments, str):
+        return None
+    arguments = arguments.strip()
+    # Try direct parse first
+    try:
+        return json.loads(arguments)
+    except json.JSONDecodeError:
+        pass
+    # Use json-repair for malformed arguments
+    try:
+        repaired = repair_json(arguments, return_objects=True)
+        if isinstance(repaired, dict):
+            logger.info("Repaired malformed tool arguments")
+            return repaired
+    except Exception as e:
+        logger.debug(f"json-repair failed for arguments: {e}")
+    return None
+def try_extract_tool_calls_from_additional_kwargs(
+    response_message,
+) -> Optional[AIMessage]:
+    """Try to extract and repair tool_calls from additional_kwargs.
+    Some LLMs put tool_calls in additional_kwargs but with malformed arguments.
+    This function tries to repair them and create a proper AIMessage.
+    Args:
+        response_message: AIMessage with potential tool_calls in additional_kwargs
+    Returns:
+        New AIMessage with repaired tool_calls, or None if extraction failed
+    """
+    if not response_message:
+        return None
+    additional_kwargs = getattr(response_message, "additional_kwargs", {})
+    raw_tool_calls = additional_kwargs.get("tool_calls", [])
+    if not raw_tool_calls:
+        return None
+    repaired_tool_calls = []
+    for tc in raw_tool_calls:
+        func = tc.get("function", {})
+        name = func.get("name")
+        arguments = func.get("arguments", "")
+        tc_id = tc.get("id", str(uuid.uuid4()))
+        if not name:
+            continue
+        # Try to repair arguments
+        args = repair_tool_call_arguments(arguments)
+        if args is not None:
+            repaired_tool_calls.append(
+                {
+                    "name": normalize_tool_name(name),
+                    "args": args,
+                    "id": tc_id,
+                    "type": "tool_call",
+                }
+            )
+    if repaired_tool_calls:
+        logger.info(
+            f"Extracted {len(repaired_tool_calls)} tool calls from additional_kwargs"
+        )
+        return AIMessage(
+            content=getattr(response_message, "content", "") or "",
+            tool_calls=repaired_tool_calls,
+        )
+    return None
 def create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
     """Create AIMessage with tool_calls from parsed JSON.
@@ -125,7 +232,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
     1. Detects empty or text-only responses (no tool_calls)
     2. Retries with JSON schema prompt to force structured output
     3. Parses JSON response and injects tool_calls into AIMessage
-    4. Falls back to synthetic final_answer if all else fails
+    4. Falls back to synthetic write_todos completion if all else fails
     Args:
         wrap_model_call: LangChain's wrap_model_call decorator
@@ -142,65 +249,129 @@ def create_handle_empty_response_middleware(wrap_model_call):
         # Check if all todos are completed - if so, return empty response to stop agent
         # Method 1: Check state.todos
         todos = request.state.get("todos", [])
+        logger.debug(
+            "handle_empty_response: state.todos=%s",
+            json.dumps(todos, ensure_ascii=False) if todos else "[]",
+        )
         if todos:
             pending_todos = [
                 t for t in todos if t.get("status") in ("pending", "in_progress")
             ]
             if not pending_todos:
-                logger.info(
-                    "All %d todos completed (from state) - stopping agent (no LLM call)",
-                    len(todos),
+                # Check if summary already exists AFTER the last REAL HumanMessage
+                # (to avoid false positives from previous task summaries)
+                # Note: Skip system-injected messages like "[SYSTEM] Tool completed..."
+                summary_exists = False
+                messages = request.messages
+                # Find index of last REAL HumanMessage (not system-injected)
+                last_human_idx = -1
+                for i, msg in enumerate(messages):
+                    msg_type = getattr(msg, "type", "") or type(msg).__name__
+                    if msg_type in ("human", "HumanMessage"):
+                        msg_content = getattr(msg, "content", "") or ""
+                        # Skip system-injected messages
+                        if not msg_content.startswith("[SYSTEM]"):
+                            last_human_idx = i
+                # Only check messages after last REAL HumanMessage for summary
+                messages_to_check = (
+                    messages[last_human_idx + 1 :]
+                    if last_human_idx >= 0
+                    else messages[-10:]
                 )
-                return AIMessage(content="", tool_calls=[])
+                for msg in messages_to_check:
+                    content = getattr(msg, "content", "") or ""
+                    if '"summary"' in content and '"next_items"' in content:
+                        summary_exists = True
+                        break
+                if summary_exists:
+                    logger.info(
+                        "All %d todos completed and summary exists after last user message - stopping agent (no LLM call)",
+                        len(todos),
+                    )
+                    return AIMessage(content="", tool_calls=[])
+                else:
+                    # Allow one more LLM call for summary generation
+                    logger.info(
+                        "All %d todos completed but no summary yet after last user message - allowing LLM call for summary",
+                        len(todos),
+                    )
         # Method 2: Check last message if it's a write_todos ToolMessage with all completed
-        messages = request.messages
-        if messages:
-            last_msg = messages[-1]
-            if getattr(last_msg, "type", "") == "tool":
-                tool_name = getattr(last_msg, "name", "") or ""
-                content = getattr(last_msg, "content", "") or ""
-                if tool_name == "write_todos" or "Updated todo list to" in content:
-                    # Extract todos from ToolMessage content
-                    try:
-                        import ast
-                        if "Updated todo list to" in content:
-                            list_text = content.split("Updated todo list to", 1)[1].strip()
-                            todos_from_msg = ast.literal_eval(list_text)
-                            if isinstance(todos_from_msg, list) and len(todos_from_msg) > 0:
-                                pending = [t for t in todos_from_msg if t.get("status") in ("pending", "in_progress")]
-                                if not pending:
-                                    logger.info(
-                                        "All %d todos completed (from ToolMessage) - stopping agent (no LLM call)",
-                                        len(todos_from_msg),
-                                    )
-                                    return AIMessage(content="", tool_calls=[])
-                    except Exception as e:
-                        logger.debug("Failed to parse todos from ToolMessage: %s", e)
+        # Note: We now allow one more LLM call for summary generation when all todos are completed
+        # This check is skipped to let the agent produce a summary
+        # Check if summary todo is completed
+        # IMPORTANT: Only consider summary completed if it's the LAST todo item and ALL todos are done
+        # This prevents false positives when a previous summary is completed but new tasks are added
+        all_todos_completed = all(t.get("status") == "completed" for t in todos)
+        last_todo_is_summary = (
+            len(todos) > 0
+            and "작업 요약" in todos[-1].get("content", "")
+            and "다음 단계" in todos[-1].get("content", "")
+            and todos[-1].get("status") == "completed"
+        )
+        summary_todo_completed = all_todos_completed and last_todo_is_summary
+        if not summary_todo_completed and any(
+            t.get("status") == "completed"
+            and "작업 요약" in t.get("content", "")
+            for t in todos
+        ):
+            logger.debug(
+                "Previous summary todo completed but new tasks exist - NOT treating as final summary"
+            )
-        # Check if last message is final_answer_tool result - if so, don't retry/synthesize
-        # This allows agent to naturally terminate after final_answer_tool
+        # Check if summary content exists in messages
         messages = request.messages
-        if messages:
-            last_msg = messages[-1]
-            if getattr(last_msg, "type", "") == "tool":
-                tool_name = getattr(last_msg, "name", "") or ""
-                if not tool_name:
-                    try:
-                        content_json = json.loads(last_msg.content)
-                        tool_name = content_json.get("tool", "")
-                    except (json.JSONDecodeError, TypeError, AttributeError):
-                        pass
-                if tool_name in ("final_answer_tool", "final_answer"):
-                    logger.info(
-                        "Last message is final_answer_tool result - allowing natural termination"
-                    )
-                    # Just call handler and return response as-is (no retry/synthesize)
-                    return handler(request)
+        summary_exists = False
+        for msg in messages[-15:]:
+            msg_content = getattr(msg, "content", "") or ""
+            if '"summary"' in msg_content and '"next_items"' in msg_content:
+                summary_exists = True
+                break
+            if any(
+                kw in msg_content
+                for kw in [
+                    "다음 단계 제안",
+                    "다음 단계:",
+                    "### 다음 단계",
+                    "## 다음 단계",
+                    "**다음 단계**",
+                    "모든 작업이 완료",
+                    "**작업 요약**",
+                    "### 작업 요약",
+                    "## 작업 요약",
+                ]
+            ):
+                summary_exists = True
+                break
         for attempt in range(max_retries + 1):
             response = handler(request)
+            # If summary todo is completed AND summary content exists, accept empty response
+            # This prevents infinite loop when inject_continuation_middleware returns empty AIMessage
+            response_message = _extract_ai_message(response)
+            if summary_todo_completed and summary_exists:
+                has_content_check = (
+                    bool(getattr(response_message, "content", None))
+                    if response_message
+                    else False
+                )
+                has_tool_calls_check = (
+                    bool(getattr(response_message, "tool_calls", None))
+                    if response_message
+                    else False
+                )
+                if not has_content_check and not has_tool_calls_check:
+                    logger.info(
+                        "Summary todo completed AND summary exists - accepting empty response (agent should stop)"
+                    )
+                    return response
             # Extract AIMessage from response
             response_message = _extract_ai_message(response)
@@ -227,6 +398,23 @@ def create_handle_empty_response_middleware(wrap_model_call):
             if has_tool_calls:
                 return response
+            # Try to extract and repair tool_calls from additional_kwargs
+            # Some LLMs (e.g., gpt-oss-120b) put tool_calls in additional_kwargs
+            # but with malformed arguments (missing '{', broken JSON, etc.)
+            if response_message and not has_tool_calls:
+                repaired_message = try_extract_tool_calls_from_additional_kwargs(
+                    response_message
+                )
+                if repaired_message and repaired_message.tool_calls:
+                    logger.info(
+                        "Repaired tool_calls from additional_kwargs: %d calls",
+                        len(repaired_message.tool_calls),
+                    )
+                    response = _replace_ai_message_in_response(
+                        response, repaired_message
+                    )
+                    return response
             # Try to parse JSON from content
             if has_content and response_message:
                 parsed = parse_json_tool_call(response_message.content)
@@ -242,6 +430,117 @@ def create_handle_empty_response_middleware(wrap_model_call):
                     response = _replace_ai_message_in_response(response, new_message)
                     return response
+                # Check if content is summary JSON (for summary todo)
+                # Summary JSON has "summary" and "next_items" but no "tool"
+                # IMPORTANT: Check for summary JSON pattern FIRST, regardless of current todo
+                # This handles cases where LLM outputs summary JSON mixed with other content
+                content = response_message.content
+                if isinstance(content, list):
+                    content = " ".join(str(p) for p in content)
+                # Check if content contains summary JSON pattern
+                has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
+                    '"next_items"' in content or "'next_items'" in content
+                )
+                if has_summary_pattern:
+                    # Try to extract and repair summary JSON from mixed content
+                    try:
+                        # Try to find JSON object containing summary
+                        import re
+                        json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
+                        if json_match:
+                            repaired_summary = repair_json(
+                                json_match.group(), return_objects=True
+                            )
+                        else:
+                            repaired_summary = repair_json(
+                                content, return_objects=True
+                            )
+                        if (
+                            isinstance(repaired_summary, dict)
+                            and "summary" in repaired_summary
+                            and "next_items" in repaired_summary
+                        ):
+                            # Create new message with repaired JSON content
+                            repaired_content = json.dumps(
+                                repaired_summary, ensure_ascii=False
+                            )
+                            logger.info(
+                                "Detected and repaired summary JSON in content (pattern-based detection)"
+                            )
+                            # Create message with repaired content
+                            repaired_response_message = AIMessage(
+                                content=repaired_content,
+                                tool_calls=getattr(
+                                    response_message, "tool_calls", []
+                                )
+                                or [],
+                            )
+                            synthetic_message = _create_synthetic_completion(
+                                request,
+                                repaired_response_message,
+                                has_content=True,
+                            )
+                            response = _replace_ai_message_in_response(
+                                response, synthetic_message
+                            )
+                            return response
+                    except Exception as e:
+                        logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
+                    # Fallback: accept as-is if repair failed but looks like summary
+                    logger.info(
+                        "Detected summary JSON pattern in content - accepting and synthesizing write_todos"
+                    )
+                    synthetic_message = _create_synthetic_completion(
+                        request, response_message, has_content=True
+                    )
+                    response = _replace_ai_message_in_response(
+                        response, synthetic_message
+                    )
+                    return response
+                # Legacy: Also check if current todo is a summary todo (backward compatibility)
+                todos = request.state.get("todos", [])
+                in_progress_todos = [
+                    t for t in todos if t.get("status") == "in_progress"
+                ]
+                pending_todos = [t for t in todos if t.get("status") == "pending"]
+                current_todo = (
+                    in_progress_todos[0]
+                    if in_progress_todos
+                    else pending_todos[0]
+                    if pending_todos
+                    else None
+                )
+                if current_todo:
+                    summary_keywords = [
+                        "작업 요약",
+                        "결과 요약",
+                        "분석 요약",
+                        "요약 및",
+                        "다음단계",
+                        "다음 단계",
+                        "next step",
+                    ]
+                    is_summary_todo = any(
+                        kw in current_todo.get("content", "") for kw in summary_keywords
+                    )
+                    if is_summary_todo and ('"summary"' in content or "'summary'" in content):
+                        # This is a summary todo with summary content - accept it
+                        logger.info(
+                            "Summary todo with summary content detected - accepting"
+                        )
+                        synthetic_message = _create_synthetic_completion(
+                            request, response_message, has_content=True
+                        )
+                        response = _replace_ai_message_in_response(
+                            response, synthetic_message
+                        )
+                        return response
             # Invalid response - retry with JSON schema prompt
             if response_message and attempt < max_retries:
                 reason = "text-only" if has_content else "empty"
@@ -256,7 +555,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
                         reason,
                     )
                     # Synthesize write_todos while preserving the content (summary)
-                    synthetic_message = _create_synthetic_final_answer(
+                    synthetic_message = _create_synthetic_completion(
                         request, response_message, has_content
                     )
                     response = _replace_ai_message_in_response(
@@ -271,6 +570,8 @@ def create_handle_empty_response_middleware(wrap_model_call):
                     attempt + 1,
                     max_retries + 1,
                 )
+                if reason == "text-only":
+                    _log_invalid_ai_message(response_message, reason)
                 request = request.override(
                     messages=request.messages + [HumanMessage(content=json_prompt)]
@@ -293,7 +594,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
                 logger.warning(
                     "Max retries exhausted. Synthesizing write_todos to complete."
                 )
-                synthetic_message = _create_synthetic_final_answer(
+                synthetic_message = _create_synthetic_completion(
                     request, response_message, has_content
                 )
                 response = _replace_ai_message_in_response(response, synthetic_message)
@@ -325,6 +626,23 @@ def _extract_ai_message(response):
     return None
+def _log_invalid_ai_message(response_message, reason: str) -> None:
+    """Log full AIMessage details for invalid (text-only) responses."""
+    if not response_message:
+        return
+    try:
+        payload = _serialize_message(response_message)
+    except Exception as exc:
+        logger.warning(
+            "Invalid AIMessage detail (%s): failed to serialize (%s). Raw=%r",
+            reason,
+            exc,
+            response_message,
+        )
+        return
+    logger.warning("Invalid AIMessage detail (%s): %s", reason, _pretty_json(payload))
 def _replace_ai_message_in_response(response, new_message):
     """Replace AIMessage in response with a new one."""
     if hasattr(response, "result"):
@@ -344,25 +662,51 @@ def _build_json_prompt(request, response_message, has_content):
     pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
     in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
+    # Check if CURRENT todo (first in_progress or first pending) is summary/next_steps
+    # Not checking ALL pending todos - only the one we should be working on now
+    summary_keywords = [
+        "작업 요약",
+        "결과 요약",
+        "분석 요약",
+        "요약 및",
+        "다음단계",
+        "다음 단계",
+        "next step",
+    ]
+    current_todo = (
+        in_progress_todos[0]
+        if in_progress_todos
+        else pending_todos[0]
+        if pending_todos
+        else None
+    )
+    is_summary_todo = current_todo is not None and any(
+        kw in current_todo.get("content", "") for kw in summary_keywords
+    )
     if has_content:
         # If all todos completed, don't force another tool call
         if todos and not pending_todos:
             return None  # Signal to skip retry
-        # If current in_progress todo is "작업 요약 및 다음단계 제시", accept text-only response
+        # If current todo is "작업 요약 및 다음단계 제시", accept text-only response
         # The LLM is outputting the summary, we'll synthesize write_todos
-        if in_progress_todos:
-            current_todo = in_progress_todos[0].get("content", "")
-            if (
-                "작업 요약" in current_todo
-                or "다음단계" in current_todo
-                or "다음 단계" in current_todo
-            ):
-                logger.info(
-                    "Current todo is summary/next steps ('%s'), accepting text-only response",
-                    current_todo[:30],
-                )
-                return None  # Signal to skip retry - will synthesize write_todos with content
+        if is_summary_todo:
+            summary_todo = next(
+                (
+                    t
+                    for t in pending_todos
+                    if any(kw in t.get("content", "") for kw in summary_keywords)
+                ),
+                {"content": "summary"},
+            )
+            logger.info(
+                "Current todo is summary/next steps ('%s'), accepting text-only response",
+                summary_todo.get("content", "")[:30],
+            )
+            return (
+                None  # Signal to skip retry - will synthesize write_todos with content
+            )
         return (
             f"{JSON_TOOL_SCHEMA}\n\n"
@@ -370,6 +714,18 @@ def _build_json_prompt(request, response_message, has_content):
             f"Call the next appropriate tool to continue.\n"
             f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
         )
+    elif is_summary_todo:
+        # Empty response but current todo is summary - force summary JSON output
+        logger.info(
+            "Empty response but current todo is summary/next steps - forcing summary JSON prompt"
+        )
+        return (
+            f"{JSON_TOOL_SCHEMA}\n\n"
+            f"You MUST output a summary JSON with next_items. This is the final step.\n"
+            f"출력 형식 (반드시 이 형식으로 출력):\n"
+            f'{{"summary": "완료된 작업 요약 (한국어)", "next_items": [{{"subject": "다음 작업 제목", "description": "설명"}}]}}\n\n'
+            f"Do NOT call any tool. Just output the summary JSON directly in your response."
+        )
     elif pending_todos:
         todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
         example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
@@ -400,7 +756,7 @@ def _build_json_prompt(request, response_message, has_content):
         )
-def _create_synthetic_final_answer(request, response_message, has_content):
+def _create_synthetic_completion(request, response_message, has_content):
     """Create synthetic write_todos call to mark all todos as completed.
     This triggers automatic session termination via router's all_todos_completed check.
@@ -408,6 +764,15 @@ def _create_synthetic_final_answer(request, response_message, has_content):
     """
     todos = request.state.get("todos", [])
+    # Warn if there are pending todos being force-completed
+    pending_count = sum(1 for t in todos if t.get("status") == "pending")
+    if pending_count > 0:
+        logger.warning(
+            "Force-completing %d pending todos that were never started: %s",
+            pending_count,
+            [t.get("content") for t in todos if t.get("status") == "pending"]
+        )
     # Mark all todos as completed
     completed_todos = (
         [{**todo, "status": "completed"} for todo in todos]
@@ -614,80 +979,63 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
                                             )
                                             args[key] = normalized_value
-                                # Ensure write_todos includes summary todo as last item
+                                # Validate write_todos: Only ONE item should be in_progress at a time
                                 if tool_name == "write_todos" and "todos" in args:
                                     todos = args["todos"]
                                     if isinstance(todos, list) and len(todos) > 0:
-                                        # Check if any todo contains summary keywords
-                                        summary_keywords = [
-                                            "작업 요약",
-                                            "다음단계",
-                                            "다음 단계",
-                                            "요약 및",
-                                        ]
-                                        has_summary = any(
-                                            any(
-                                                kw in todo.get("content", "")
-                                                for kw in summary_keywords
-                                            )
-                                            for todo in todos
-                                            if isinstance(todo, dict)
+                                        # Validate: Only ONE item should be in_progress at a time
+                                        # If multiple in_progress, keep only the first one
+                                        in_progress_count = sum(
+                                            1
+                                            for t in todos
+                                            if isinstance(t, dict)
+                                            and t.get("status") == "in_progress"
                                         )
-                                        if not has_summary:
-                                            # Add summary todo as last item
-                                            summary_todo = {
-                                                "content": "작업 요약 및 다음단계 제시",
-                                                "status": "pending",
-                                            }
-                                            todos.append(summary_todo)
-                                            logger.info(
-                                                "Auto-added '작업 요약 및 다음단계 제시' to write_todos (total: %d todos)",
-                                                len(todos),
-                                            )
-                                        # Log warning if summary todo is completed without JSON (but don't block)
-                                        for todo in todos:
-                                            if not isinstance(todo, dict):
-                                                continue
-                                            content = todo.get("content", "")
-                                            status = todo.get("status", "")
-                                            is_summary_todo = any(
-                                                kw in content for kw in summary_keywords
-                                            )
-                                            if is_summary_todo and status == "completed":
-                                                # Check if AIMessage content has summary JSON
-                                                msg_content = getattr(msg, "content", "") or ""
-                                                if isinstance(msg_content, list):
-                                                    msg_content = " ".join(
-                                                        str(p) for p in msg_content
-                                                    )
-                                                has_summary_json = (
-                                                    '"summary"' in msg_content
-                                                    and '"next_items"' in msg_content
-                                                ) or (
-                                                    "'summary'" in msg_content
-                                                    and "'next_items'" in msg_content
-                                                )
-                                                if not has_summary_json:
-                                                    # Just log warning - don't block completion to avoid infinite loop
-                                                    logger.warning(
-                                                        "Summary todo marked completed but no summary JSON in content. "
-                                                        "Allowing completion to proceed. Content: %s",
-                                                        msg_content[:200],
-                                                    )
+                                        if in_progress_count > 1:
+                                            found_first = False
+                                            for todo in todos:
+                                                if not isinstance(todo, dict):
+                                                    continue
+                                                if todo.get("status") == "in_progress":
+                                                    if found_first:
+                                                        # Reset subsequent in_progress to pending
+                                                        todo["status"] = "pending"
+                                                        logger.info(
+                                                            "Reset duplicate in_progress todo to pending: %s",
+                                                            todo.get("content", "")[
+                                                                :30
+                                                            ],
+                                                        )
+                                                    else:
+                                                        found_first = True
+                                        # NOTE: Previously had logic to revert summary todo to in_progress
+                                        # if no summary JSON was found. This caused infinite loops
+                                        # where LLM kept calling write_todos repeatedly.
+                                        # Now we let the natural termination logic handle this.
+                                        #
+                                        # NOTE: Also removed logic to preserve todos when LLM tries to delete them.
+                                        # The LLM should be able to modify todos freely when:
+                                        # - User rejects code approval
+                                        # - User changes their request
+                                        # - Code execution fails
+                                        # We rely on prompts to guide proper todo management.
         return response
     return normalize_tool_args
-def create_inject_continuation_middleware(wrap_model_call):
-    """Create middleware to inject continuation prompt after non-HITL tool execution.
+def create_continuation_control_middleware(wrap_model_call):
+    """Create unified middleware for continuation control.
-    Non-HITL tools execute immediately without user approval, which can cause
-    Gemini to produce empty responses. This middleware injects a system message
-    to remind the LLM to continue with the next action.
+    This middleware combines two functions:
+    1. BEFORE handler: Inject continuation prompt after non-HITL tool execution
+       - Checks for summary completion and stops if done
+       - Injects "[SYSTEM] Tool completed..." messages to guide LLM
+    2. AFTER handler: Prevent auto-continuation after summary JSON output
+       - Strips write_todos from responses containing summary JSON
+       - Prevents agent from auto-creating new todos after task completion
     Args:
         wrap_model_call: LangChain's wrap_model_call decorator
@@ -696,89 +1044,198 @@ def create_inject_continuation_middleware(wrap_model_call):
         Middleware function
     """
+    def _check_summary_exists(messages, last_real_human_idx: int) -> bool:
+        """Check if summary content exists in messages after last real user message."""
+        messages_to_check = (
+            messages[last_real_human_idx + 1 :]
+            if last_real_human_idx >= 0
+            else messages[-15:]
+        )
+        for msg in messages_to_check:
+            msg_content = getattr(msg, "content", "") or ""
+            # Check for summary JSON
+            if '"summary"' in msg_content and '"next_items"' in msg_content:
+                return True
+            # Check for markdown summary (common patterns)
+            if any(
+                kw in msg_content
+                for kw in [
+                    "다음 단계 제안",
+                    "다음 단계:",
+                    "### 다음 단계",
+                    "## 다음 단계",
+                    "**다음 단계**",
+                    "모든 작업이 완료",
+                    "**작업 요약**",
+                    "### 작업 요약",
+                    "## 작업 요약",
+                ]
+            ):
+                return True
+        return False
+    def _find_last_real_human_idx(messages) -> int:
+        """Find index of last real HumanMessage (not system-injected)."""
+        last_real_human_idx = -1
+        for i, msg in enumerate(messages):
+            msg_type = getattr(msg, "type", "") or type(msg).__name__
+            if msg_type in ("human", "HumanMessage"):
+                msg_content = getattr(msg, "content", "") or ""
+                if not msg_content.startswith("[SYSTEM]"):
+                    last_real_human_idx = i
+        return last_real_human_idx
     @wrap_model_call
-    @_with_middleware_logging("inject_continuation_after_non_hitl_tool")
-    def inject_continuation_after_non_hitl_tool(request, handler):
+    @_with_middleware_logging("continuation_control")
+    def continuation_control(request, handler):
         messages = request.messages
-        if not messages:
-            return handler(request)
-        last_msg = messages[-1]
-        if getattr(last_msg, "type", "") == "tool":
-            tool_name = getattr(last_msg, "name", "") or ""
-            # Try to extract tool name from content
-            if not tool_name:
-                try:
-                    content_json = json.loads(last_msg.content)
-                    tool_name = content_json.get("tool", "")
-                except (json.JSONDecodeError, TypeError, AttributeError):
-                    pass
-            if tool_name in NON_HITL_TOOLS:
-                # Method 1: Check state.todos
-                todos = request.state.get("todos", [])
-                pending_todos = [
-                    t for t in todos if t.get("status") in ("pending", "in_progress")
-                ]
-                # If all todos are completed, DON'T call LLM - return empty response to stop agent
-                if not pending_todos and todos:
+        # ===== BEFORE HANDLER: Inject continuation prompt =====
+        if messages:
+            last_msg = messages[-1]
+            if getattr(last_msg, "type", "") == "tool":
+                tool_name = getattr(last_msg, "name", "") or ""
+                # Try to extract tool name from content
+                if not tool_name:
+                    try:
+                        content_json = json.loads(last_msg.content)
+                        tool_name = content_json.get("tool", "")
+                    except (json.JSONDecodeError, TypeError, AttributeError):
+                        pass
+                if tool_name in NON_HITL_TOOLS:
+                    todos = request.state.get("todos", [])
+                    last_real_human_idx = _find_last_real_human_idx(messages)
+                    summary_exists = _check_summary_exists(
+                        messages, last_real_human_idx
+                    )
+                    # STOP if summary exists (regardless of todo status)
+                    if summary_exists:
+                        logger.info(
+                            "Summary exists after tool: %s - stopping agent (user must request next steps)",
+                            tool_name,
+                        )
+                        return AIMessage(content="", tool_calls=[])
+                    pending_todos = [
+                        t
+                        for t in todos
+                        if t.get("status") in ("pending", "in_progress")
+                    ]
+                    # If all todos completed but no summary yet, allow LLM call for summary
+                    if not pending_todos and todos:
+                        logger.info(
+                            "All %d todos completed, no summary yet after tool: %s - allowing LLM for summary",
+                            len(todos),
+                            tool_name,
+                        )
                     logger.info(
-                        "All todos completed (from state) after tool: %s - stopping agent (no LLM call)",
+                        "Injecting continuation prompt after non-HITL tool: %s",
                         tool_name,
                     )
-                    return AIMessage(content="", tool_calls=[])
-                # Method 2: Check ToolMessage content for write_todos
-                if tool_name == "write_todos" or "Updated todo list to" in (last_msg.content or ""):
-                    try:
-                        import ast
-                        content = last_msg.content or ""
-                        if "Updated todo list to" in content:
-                            list_text = content.split("Updated todo list to", 1)[1].strip()
-                            todos_from_msg = ast.literal_eval(list_text)
-                            if isinstance(todos_from_msg, list) and len(todos_from_msg) > 0:
-                                pending = [t for t in todos_from_msg if t.get("status") in ("pending", "in_progress")]
-                                if not pending:
-                                    logger.info(
-                                        "All %d todos completed (from ToolMessage) after tool: %s - stopping agent",
-                                        len(todos_from_msg),
-                                        tool_name,
-                                    )
-                                    return AIMessage(content="", tool_calls=[])
-                    except Exception as e:
-                        logger.debug("Failed to parse todos from ToolMessage: %s", e)
+                    # Skip continuation injection for write_todos
+                    # This prevents auto-continuation to next task after completing one
+                    # Agent will decide next action based on its own reasoning
+                    if tool_name == "write_todos":
+                        logger.info(
+                            "Skipping continuation prompt after write_todos - "
+                            "agent decides next action (pending: %d)",
+                            len(pending_todos) if pending_todos else 0,
+                        )
+                        # Don't inject continuation - let agent naturally continue or stop
+                    elif pending_todos:
+                        pending_list = ", ".join(
+                            t.get("content", "")[:30] for t in pending_todos[:3]
+                        )
+                        continuation = (
+                            f"Tool '{tool_name}' completed. "
+                            f"Continue with pending tasks: {pending_list}. "
+                            f"Call jupyter_cell_tool or the next appropriate tool."
+                        )
+                        new_messages = list(messages) + [
+                            HumanMessage(content=f"[SYSTEM] {continuation}")
+                        ]
+                        request = request.override(messages=new_messages)
+                    else:
+                        continuation = (
+                            f"Tool '{tool_name}' completed. "
+                            f"Create a todo list with write_todos if needed."
+                        )
+                        new_messages = list(messages) + [
+                            HumanMessage(content=f"[SYSTEM] {continuation}")
+                        ]
+                        request = request.override(messages=new_messages)
+        # ===== CALL HANDLER =====
+        response = handler(request)
+        # ===== AFTER HANDLER: Strip write_todos if summary JSON present =====
+        response_message = _extract_ai_message(response)
+        if not response_message:
+            return response
+        # Get content - handle both string and list formats
+        content = getattr(response_message, "content", "") or ""
+        if isinstance(content, list):
+            content = " ".join(
+                str(p) if isinstance(p, str) else p.get("text", "")
+                for p in content
+                if isinstance(p, (str, dict))
+            )
+        # Check if content contains summary JSON pattern
+        has_summary_json = '"summary"' in content and '"next_items"' in content
+        if has_summary_json:
+            tool_calls = getattr(response_message, "tool_calls", []) or []
+            write_todos_calls = [
+                tc for tc in tool_calls if tc.get("name") == "write_todos"
+            ]
+            if write_todos_calls:
                 logger.info(
-                    "Injecting continuation prompt after non-HITL tool: %s",
-                    tool_name,
+                    "Summary JSON 감지 - write_todos 호출 제거 (자동 계속 방지). "
+                    "제거된 write_todos 호출 수: %d",
+                    len(write_todos_calls),
                 )
-                if pending_todos:
-                    pending_list = ", ".join(
-                        t.get("content", "")[:30] for t in pending_todos[:3]
-                    )
-                    continuation = (
-                        f"Tool '{tool_name}' completed. "
-                        f"Continue with pending tasks: {pending_list}. "
-                        f"Call jupyter_cell_tool or the next appropriate tool."
-                    )
-                else:
-                    # No todos yet - let agent create them
-                    continuation = (
-                        f"Tool '{tool_name}' completed. "
-                        f"Create a todo list with write_todos if needed."
-                    )
-                new_messages = list(messages) + [
-                    HumanMessage(content=f"[SYSTEM] {continuation}")
+                filtered_tool_calls = [
+                    tc for tc in tool_calls if tc.get("name") != "write_todos"
                 ]
-                request = request.override(messages=new_messages)
-        return handler(request)
+                new_message = AIMessage(
+                    content=response_message.content,
+                    tool_calls=filtered_tool_calls,
+                    additional_kwargs=getattr(
+                        response_message, "additional_kwargs", {}
+                    ),
+                    response_metadata=getattr(
+                        response_message, "response_metadata", {}
+                    ),
+                )
+                response = _replace_ai_message_in_response(response, new_message)
+        return response
+    return continuation_control
+# Backward compatibility aliases
+def create_inject_continuation_middleware(wrap_model_call):
+    """Deprecated: Use create_continuation_control_middleware instead."""
+    return create_continuation_control_middleware(wrap_model_call)
-    return inject_continuation_after_non_hitl_tool
+def create_prevent_auto_continuation_middleware(wrap_model_call):
+    """Deprecated: Use create_continuation_control_middleware instead."""
+    return create_continuation_control_middleware(wrap_model_call)
 def create_patch_tool_calls_middleware(AgentMiddleware, ToolMessage, Overwrite):

hdsp-jupyter-extension 2.0.11__py3-none-any.whl → 2.0.13__py3-none-any.whl

hdsp-jupyter-extension 2.0.11py3-none-any.whl → 2.0.13py3-none-any.whl