PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.26__py3-none-any.whl → 2.0.28__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.26py3-none-any.whl → 2.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

agent_server/routers/langchain_agent.py CHANGED Viewed

@@ -19,10 +19,7 @@ from langgraph.checkpoint.memory import InMemorySaver
 from pydantic import BaseModel, ConfigDict, Field
 from sse_starlette.sse import EventSourceResponse
-from agent_server.langchain.agent import (
-    _get_all_tools,
-    create_agent_system,
-)
+from agent_server.langchain.agent import create_agent_system
 from agent_server.langchain.llm_factory import create_llm
 from agent_server.langchain.logging_utils import (
     LOG_RESPONSE_END,
@@ -33,7 +30,10 @@ from agent_server.langchain.middleware.code_history_middleware import (
 )
 # Note: Subagent middleware is used by agent_factory, not directly by router
-from agent_server.langchain.middleware.subagent_events import drain_subagent_events
+from agent_server.langchain.middleware.subagent_events import (
+    drain_subagent_events,
+    drain_summarization_events,
+)
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/langchain", tags=["langchain-agent"])
@@ -63,11 +63,43 @@ def get_subagent_debug_events():
     """
     Drain subagent events and convert to SSE debug events.
+    If any subagent_complete event is found, appends a "LLM 응답 대기 중"
+    event so the UI doesn't show "완료" during the next LLM call.
     Returns:
         List of SSE event dicts for debug display
     """
     events = drain_subagent_events()
     sse_events = []
+    has_complete = False
+    for event in events:
+        sse_events.append(
+            {
+                "event": "debug",
+                "data": json.dumps(event.to_status_dict()),
+            }
+        )
+        if event.event_type == "subagent_complete":
+            has_complete = True
+    if has_complete:
+        sse_events.append(
+            {
+                "event": "debug",
+                "data": json.dumps({"status": "LLM 응답 대기 중", "icon": "thinking"}),
+            }
+        )
+    return sse_events
+def get_summarization_debug_events():
+    """
+    Drain summarization events and convert to SSE debug events.
+    Returns:
+        List of SSE event dicts for debug display
+    """
+    events = drain_summarization_events()
+    sse_events = []
     for event in events:
         sse_events.append(
             {
@@ -104,15 +136,19 @@ def _get_tool_status_message(
         }
     elif tool_name_normalized in ("task", "task_tool"):
         # Show subagent delegation details with expand support
-        agent_name = tool_args.get("agent_name", "unknown")
+        agent_name = tool_args.get("subagent_type") or tool_args.get(
+            "agent_name", "unknown"
+        )
         description = tool_args.get("description", "")
         short_desc = description[:50] + "..." if len(description) > 50 else description
         return {
-            "status": f"{agent_name} Agent 실행: {short_desc}",
+            "status": f"Subagent-{agent_name} 실행: {short_desc}",
             "icon": "agent",
             "expandable": len(description) > 50,
-            "full_text": f"{agent_name} Agent 실행: {description}",
+            "full_text": f"Subagent-{agent_name} 실행: {description}",
         }
+    elif tool_name_normalized in ("final_summary_tool", "final_summary"):
+        return {"status": "작업 마무리중...", "icon": "summary"}
     elif tool_name_normalized in ("list_workspace_tool", "list_workspace"):
         path = tool_args.get("path", ".")
         pattern = tool_args.get("pattern", "*")
@@ -224,12 +260,12 @@ class LLMConfig(BaseModel):
     system_prompt: Optional[str] = Field(
         default=None,
         alias="systemPrompt",
-        description="Override system prompt for single agent mode",
+        description="Override system prompt",
     )
     agent_prompts: Optional[AgentPromptsConfig] = Field(
         default=None,
         alias="agentPrompts",
-        description="Per-agent system prompts for multi-agent mode",
+        description="Per-agent system prompts",
     )
     resource_context: Optional[Union[Dict[str, Any], str]] = Field(
         default=None,
@@ -273,10 +309,6 @@ class AgentRequest(BaseModel):
         default=None,
         description="Thread ID for conversation persistence (required for HITL)",
     )
-    agentMode: Optional[str] = Field(
-        default="single",
-        description="Agent mode: 'single' (all tools) or 'multi' (Planner + Subagents)",
-    )
 class ResumeDecision(BaseModel):
@@ -304,10 +336,6 @@ class ResumeRequest(BaseModel):
     workspaceRoot: Optional[str] = Field(
         default=".", description="Workspace root directory"
     )
-    agentMode: Optional[str] = Field(
-        default="single",
-        description="Agent mode: 'single' (all tools) or 'multi' (Planner + Subagents)",
-    )
 class ExecutionResult(BaseModel):
@@ -354,20 +382,18 @@ def _get_agent_cache_key(
     llm_config: Dict[str, Any],
     workspace_root: str,
     system_prompt_override: Optional[str] = None,
-    agent_mode: str = "single",
     agent_prompts: Optional[Dict[str, str]] = None,
 ) -> str:
     """Generate cache key for agent instance.
     Agent instances are cached based on LLM config, workspace root, system prompt,
-    agent mode, and agent prompts. Different configurations require different agent instances.
+    and agent prompts. Different configurations require different agent instances.
     Args:
         llm_config: LLM configuration dictionary
         workspace_root: Workspace root directory
         system_prompt_override: Optional custom system prompt
-        agent_mode: "single" or "multi" agent mode
-        agent_prompts: Optional dict of per-agent prompts (for multi-agent mode)
+        agent_prompts: Optional dict of per-agent prompts
     Returns:
         MD5 hash of the configuration as cache key
@@ -382,15 +408,23 @@ def _get_agent_cache_key(
     )
     cache_data = (
-        f"{config_str}|{workspace_root}|{prompt_str}|{agent_mode}|{agent_prompts_str}"
+        f"{config_str}|{workspace_root}|{prompt_str}|{agent_prompts_str}"
     )
     cache_key = hashlib.md5(cache_data.encode()).hexdigest()
     return cache_key
-def _normalize_action_request(action: Dict[str, Any]) -> Dict[str, Any]:
-    """Normalize HITL action request payload across LangChain versions."""
+def _normalize_action_request(
+    action: Dict[str, Any],
+    state: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Normalize HITL action request payload across LangChain versions.
+    Also injects generated_content from state into tool args for display
+    (ContentInjectionMiddleware.wrap_tool_call runs at execution time,
+    but HITL interrupts before execution — so we inject here for display).
+    """
     logger.info(f"[_normalize_action_request] Called with action: {str(action)[:200]}")
     name = (
         action.get("name")
@@ -407,39 +441,32 @@ def _normalize_action_request(action: Dict[str, Any]) -> Dict[str, Any]:
         or action.get("parameters")
         or {}
     )
+    # Inject generated_content from state into tool args for HITL display
+    if state and isinstance(args, dict):
+        content = state.get("generated_content")
+        content_type = state.get("generated_content_type")
+        desc = state.get("content_description")
+        if content and content_type:
+            if content_type == "python":
+                if name == "jupyter_cell_tool" and not args.get("code"):
+                    args = {**args, "code": content}
+                    if desc and not args.get("description"):
+                        args["description"] = desc
+                elif name == "write_file_tool" and not args.get("content"):
+                    args = {**args, "content": content}
+            elif content_type == "sql":
+                if name == "markdown_tool" and not args.get("content"):
+                    sql_md = f"```sql\n{content}\n```"
+                    if desc:
+                        sql_md = f"{desc}\n\n{sql_md}"
+                    args = {**args, "content": sql_md}
     # Try to get description from action first, then from args (for jupyter_cell_tool etc)
     description = action.get("description", "") or (
         args.get("description", "") if isinstance(args, dict) else ""
     )
-    # Auto-inject description for jupyter_cell_tool from python_developer's response
-    # Only inject into args.description, keep top-level description as HITL default
-    if name == "jupyter_cell_tool":
-        logger.info(
-            f"[HITL] jupyter_cell_tool detected, current description: '{description[:50] if description else 'None'}'"
-        )
-        try:
-            from agent_server.langchain.middleware.description_injector import (
-                clear_pending_description,
-                get_pending_description,
-            )
-            pending = get_pending_description()
-            if pending:
-                # Inject into args.description only (for detailed description display)
-                # Keep top-level description as HITL approval message
-                if isinstance(args, dict):
-                    args = dict(args)
-                    args["description"] = pending
-                clear_pending_description()
-                logger.info(
-                    f"[HITL] Auto-injected description into args: {pending[:80]}..."
-                )
-            else:
-                logger.info("[HITL] No pending description from python_developer")
-        except Exception as e:
-            logger.warning(f"Failed to inject description: {e}")
     return {"name": name, "arguments": args, "description": description}
@@ -695,6 +722,19 @@ async def stream_agent(request: AgentRequest):
         thread_id,
     )
+    # Handle /reset command
+    if "/reset" in request.request:
+        logger.info(f"/reset command detected for thread: {thread_id}")
+        from agent_server.langchain.middleware.code_history_middleware import (
+            clear_code_history,
+        )
+        # Clear code history and checkpointer for this thread
+        clear_code_history(thread_id)
+        if thread_id in _simple_agent_checkpointers:
+            del _simple_agent_checkpointers[thread_id]
+        logger.info(f"Session reset complete for thread: {thread_id}")
     async def event_generator():
         try:
             # Use simple agent with HITL
@@ -739,59 +779,48 @@ async def stream_agent(request: AgentRequest):
                 len(_simple_agent_checkpointers),
             )
-            resolved_workspace_root = _resolve_workspace_root(request.workspaceRoot)
+            # Clear code history for new threads
+            if not is_existing_thread:
+                from agent_server.langchain.middleware.code_history_middleware import (
+                    clear_code_history,
+                )
+                clear_code_history(thread_id)
+                logger.info(f"Code history cleared for new thread: {thread_id}")
-            # Get agent mode (single or multi)
-            agent_mode = getattr(request, "agentMode", "single") or "single"
-            logger.info("Agent mode: %s", agent_mode)
+            resolved_workspace_root = _resolve_workspace_root(request.workspaceRoot)
-            # Get agent prompts (for multi-agent mode)
+            # Get agent prompts for per-agent customization
             agent_prompts = None
-            if agent_mode == "multi":
-                # Multi-agent mode: Use agentPrompts for per-agent customization
-                # systemPrompt is for single-agent mode only (DEFAULT_SYSTEM_PROMPT)
-                if request.llmConfig and request.llmConfig.agent_prompts:
-                    agent_prompts = {
-                        "planner": request.llmConfig.agent_prompts.planner,
-                        "python_developer": (
-                            request.llmConfig.agent_prompts.python_developer
-                        ),
-                        "researcher": request.llmConfig.agent_prompts.researcher,
-                        "athena_query": request.llmConfig.agent_prompts.athena_query,
-                    }
-                    agent_prompts = {k: v for k, v in agent_prompts.items() if v}
-                    logger.info(
-                        "Multi-agent mode: Using agentPrompts (%s)",
-                        list(agent_prompts.keys()),
-                    )
-                # In multi-agent mode, DON'T use systemPrompt as override
-                # (systemPrompt = single-agent prompt, not planner prompt)
-                # Use agentPrompts.planner instead (handled by agent_factory)
-                if system_prompt_override:
-                    logger.info(
-                        "Multi-agent mode: Ignoring systemPrompt override (len=%d) - "
-                        "use agentPrompts.planner instead",
-                        len(system_prompt_override),
-                    )
-                    system_prompt_override = None
-            elif request.llmConfig and request.llmConfig.agent_prompts:
-                # Single-agent mode: can use custom prompts (not applicable currently)
+            if request.llmConfig and request.llmConfig.agent_prompts:
                 agent_prompts = {
                     "planner": request.llmConfig.agent_prompts.planner,
-                    "python_developer": request.llmConfig.agent_prompts.python_developer,
+                    "python_developer": (
+                        request.llmConfig.agent_prompts.python_developer
+                    ),
                     "researcher": request.llmConfig.agent_prompts.researcher,
                     "athena_query": request.llmConfig.agent_prompts.athena_query,
                 }
                 agent_prompts = {k: v for k, v in agent_prompts.items() if v}
+                logger.info(
+                    "Using agentPrompts (%s)",
+                    list(agent_prompts.keys()),
+                )
+            # Don't use systemPrompt as override — use agentPrompts.planner instead
+            if system_prompt_override:
+                logger.info(
+                    "Ignoring systemPrompt override (len=%d) - "
+                    "use agentPrompts.planner instead",
+                    len(system_prompt_override),
+                )
+                system_prompt_override = None
             # Get or create cached agent
-            # DEBUG: Log cache key components
             logger.info(
-                "DEBUG: Cache key components - provider=%s, workspace=%s, mode=%s, "
+                "Cache key components - provider=%s, workspace=%s, "
                 "has_system_prompt=%s, has_agent_prompts=%s",
                 config_dict.get("provider"),
                 resolved_workspace_root[:50] if resolved_workspace_root else None,
-                agent_mode,
                 bool(system_prompt_override),
                 bool(agent_prompts),
             )
@@ -800,23 +829,20 @@ async def stream_agent(request: AgentRequest):
                 llm_config=config_dict,
                 workspace_root=resolved_workspace_root,
                 system_prompt_override=system_prompt_override,
-                agent_mode=agent_mode,
                 agent_prompts=agent_prompts,
             )
             if agent_cache_key in _simple_agent_instances:
                 agent = _simple_agent_instances[agent_cache_key]
                 logger.info(
-                    "Using cached agent for key %s (mode=%s, total cached: %d)",
+                    "Using cached agent for key %s (total cached: %d)",
                     agent_cache_key[:8],
-                    agent_mode,
                     len(_simple_agent_instances),
                 )
             else:
                 logger.info(
-                    "Creating new agent for key %s (mode=%s)",
+                    "Creating new agent for key %s",
                     agent_cache_key[:8],
-                    agent_mode,
                 )
                 agent = create_agent_system(
                     llm_config=config_dict,
@@ -824,20 +850,25 @@ async def stream_agent(request: AgentRequest):
                     enable_hitl=True,
                     checkpointer=checkpointer,
                     system_prompt_override=system_prompt_override,
-                    agent_mode=agent_mode,
                     agent_prompts=agent_prompts,
                 )
                 _simple_agent_instances[agent_cache_key] = agent
                 logger.info(
-                    "Agent cached for key %s (mode=%s, total cached: %d)",
+                    "Agent cached for key %s (total cached: %d)",
                     agent_cache_key[:8],
-                    agent_mode,
                     len(_simple_agent_instances),
                 )
             # Prepare config with thread_id
             config = {"configurable": {"thread_id": thread_id}}
+            # Set current thread_id for code history tracking
+            from agent_server.langchain.middleware.subagent_middleware import (
+                set_current_thread_id,
+            )
+            set_current_thread_id(thread_id)
             # Check existing state and ALWAYS reset todos for new request
             # Each new user request starts a fresh todo list
             should_reset_todos = False
@@ -871,8 +902,11 @@ async def stream_agent(request: AgentRequest):
             previous_todos_context = None
             if should_reset_todos:
                 try:
-                    agent.update_state(config, {"todos": []})
-                    logger.info("Reset todos in agent state for thread %s", thread_id)
+                    agent.update_state(config, {"todos": [], "todo_active": False})
+                    logger.info(
+                        "Reset todos and todo_active in agent state for thread %s",
+                        thread_id,
+                    )
                     # Prepare event to notify frontend (will be yielded after function setup)
                     todos_reset_event = {
                         "event": "todos",
@@ -891,7 +925,8 @@ async def stream_agent(request: AgentRequest):
                                 items_summary += "..."
                             previous_todos_context = (
                                 f"[SYSTEM] 이전 todo list가 완료 혹은 취소되었습니다. 완료된 작업: {items_summary}. "
-                                f"새 작업을 시작합니다. 이전 todo list에 신규 작업을 append 하지 말고 새로운 todo list를 생성하세요."
+                                f"새 작업을 시작합니다. 이전 todo list는 초기화되었습니다. "
+                                f"간단한 작업(1-2단계)이면 write_todos 없이 바로 실행하세요."
                             )
                             logger.info(
                                 "Injecting previous todos context: %s",
@@ -930,6 +965,11 @@ async def stream_agent(request: AgentRequest):
                 "data": json.dumps({"status": "LLM 응답 대기 중", "icon": "thinking"}),
             }
+            # Track message count for summarization detection
+            # SummarizationMiddleware keeps ~3 messages after compression
+            previous_message_count = 0
+            summarization_detected = False
             # Main streaming loop
             async for step in _async_stream_wrapper(
                 agent, agent_input, config, stream_mode="values"
@@ -940,6 +980,11 @@ async def stream_agent(request: AgentRequest):
                         f"Thread {thread_id} cancelled by user, stopping stream"
                     )
                     clear_cancelled_thread(thread_id)
+                    # Reset todo_active on cancellation
+                    try:
+                        agent.update_state(config, {"todo_active": False})
+                    except Exception:
+                        pass
                     yield {
                         "event": "cancelled",
                         "data": json.dumps(
@@ -976,6 +1021,30 @@ async def stream_agent(request: AgentRequest):
                 # Process messages (no continue statements to ensure interrupt check always runs)
                 if isinstance(step, dict) and "messages" in step:
                     messages = step["messages"]
+                    current_message_count = len(messages) if messages else 0
+                    # Detect summarization by checking for lc_source: "summarization" marker
+                    # SummarizationMiddleware injects summary into system prompt with this marker
+                    if not summarization_detected and messages:
+                        for msg in messages:
+                            # Check additional_kwargs for lc_source
+                            additional_kwargs = getattr(msg, "additional_kwargs", {}) or {}
+                            if additional_kwargs.get("lc_source") == "summarization":
+                                summarization_detected = True
+                                logger.info(
+                                    f"[Agent] Summarization detected via lc_source marker"
+                                )
+                                yield {
+                                    "event": "debug",
+                                    "data": json.dumps({
+                                        "status": "대화가 자동으로 압축되었습니다.",
+                                        "icon": "check"
+                                    }),
+                                }
+                                break
+                    previous_message_count = current_message_count
                     should_process_message = False
                     if messages:
                         last_message = messages[-1]
@@ -1118,10 +1187,12 @@ async def stream_agent(request: AgentRequest):
                                         }
                                         return  # Exit the generator
                                     else:
-                                        logger.warning(
-                                            "All %d todos completed but no summary JSON in current step - NOT auto-terminating",
+                                        logger.info(
+                                            "All %d todos completed (no summary in step) - "
+                                            "continuing to wait for final_summary_tool",
                                             len(todos),
                                         )
+                                        # Don't auto-terminate — let agent call final_summary_tool
                             tool_name = getattr(last_message, "name", "") or ""
                             logger.info(
@@ -1309,7 +1380,14 @@ async def stream_agent(request: AgentRequest):
                                         has_summary_json = (
                                             '"summary"' in msg_content
                                             and '"next_items"' in msg_content
+                                        ) or (
+                                            "'summary'" in msg_content
+                                            and "'next_items'" in msg_content
                                         )
+                                        # Check if last_message is a ToolMessage from final_summary_tool
+                                        is_final_summary_tool_msg = (
+                                            getattr(last_message, "name", "") or ""
+                                        ) in ("final_summary_tool", "final_summary")
                                         # Also check for markdown summary format
                                         has_markdown_summary = any(
                                             kw in msg_content
@@ -1323,15 +1401,18 @@ async def stream_agent(request: AgentRequest):
                                             ]
                                         )
                                         has_summary = (
-                                            has_summary_json or has_markdown_summary
+                                            has_summary_json
+                                            or has_markdown_summary
+                                            or is_final_summary_tool_msg
                                         )
                                         # Only check current AIMessage for summary (not history, to avoid false positives)
                                         if not has_summary:
-                                            logger.warning(
-                                                "All todos completed but no summary JSON in current message - NOT auto-terminating"
+                                            logger.info(
+                                                "All todos completed (no summary in message) - "
+                                                "continuing to wait for final_summary_tool"
                                             )
-                                            # Don't terminate - let agent continue to generate summary
+                                            # Don't auto-terminate — let agent call final_summary_tool
                                         else:
                                             logger.info(
                                                 "All %d todos completed and summary exists in current message, auto-terminating",
@@ -1587,6 +1668,10 @@ async def stream_agent(request: AgentRequest):
                 for subagent_event in get_subagent_debug_events():
                     yield subagent_event
+                # Drain and emit any summarization events (context compression)
+                for summarization_event in get_summarization_debug_events():
+                    yield summarization_event
                 # Check for interrupt AFTER processing todos and messages
                 # This ensures todos/debug events are emitted even in interrupt steps
                 if isinstance(step, dict) and "__interrupt__" in step:
@@ -1621,7 +1706,10 @@ async def stream_agent(request: AgentRequest):
                             f"[INTERRUPT] action_requests count: {len(action_requests)}, first: {str(action_requests[0])[:200] if action_requests else 'none'}"
                         )
                         normalized_actions = [
-                            _normalize_action_request(a) for a in action_requests
+                            _normalize_action_request(
+                                a, state=step if isinstance(step, dict) else None
+                            )
+                            for a in action_requests
                         ]
                         if normalized_actions:
                             _simple_agent_pending_actions[thread_id] = (
@@ -1682,7 +1770,27 @@ async def stream_agent(request: AgentRequest):
                         )
                     llm = create_llm(fallback_config)
-                    tools = _get_all_tools()
+                    from agent_server.langchain.tools import (
+                        jupyter_cell_tool,
+                        markdown_tool,
+                        ask_user_tool,
+                        read_file_tool,
+                        write_file_tool,
+                        edit_file_tool,
+                        multiedit_file_tool,
+                        search_notebook_cells_tool,
+                        execute_command_tool,
+                        check_resource_tool,
+                        diagnostics_tool,
+                        references_tool,
+                    )
+                    tools = [
+                        jupyter_cell_tool, markdown_tool, ask_user_tool,
+                        read_file_tool, write_file_tool, edit_file_tool,
+                        multiedit_file_tool, search_notebook_cells_tool,
+                        execute_command_tool, check_resource_tool,
+                        diagnostics_tool, references_tool,
+                    ]
                     # Force tool calling - use tool_config for Gemini, tool_choice for others
                     provider = config_dict.get("provider", "gemini")
                     if provider == "gemini":
@@ -2045,56 +2153,48 @@ async def resume_agent(request: ResumeRequest):
             checkpointer = _simple_agent_checkpointers.get(request.threadId)
-            # Get agent mode (single or multi)
-            agent_mode = getattr(request, "agentMode", "single") or "single"
-            logger.info("Resume: Agent mode: %s", agent_mode)
-            # Get agent prompts (for multi-agent mode)
+            # Get agent prompts for per-agent customization
             agent_prompts = None
-            if agent_mode == "multi":
-                if request.llmConfig and request.llmConfig.agent_prompts:
-                    agent_prompts = {
-                        "planner": request.llmConfig.agent_prompts.planner,
-                        "python_developer": (
-                            request.llmConfig.agent_prompts.python_developer
-                        ),
-                        "researcher": request.llmConfig.agent_prompts.researcher,
-                        "athena_query": request.llmConfig.agent_prompts.athena_query,
-                    }
-                    agent_prompts = {k: v for k, v in agent_prompts.items() if v}
-                    logger.info(
-                        "Resume: Multi-agent mode - using agentPrompts (%s)",
-                        list(agent_prompts.keys()),
-                    )
-                # In multi-agent mode, DON'T use systemPrompt as override
-                if system_prompt_override:
-                    logger.info(
-                        "Resume: Multi-agent mode - ignoring systemPrompt (len=%d)",
-                        len(system_prompt_override),
-                    )
-                    system_prompt_override = None
+            if request.llmConfig and request.llmConfig.agent_prompts:
+                agent_prompts = {
+                    "planner": request.llmConfig.agent_prompts.planner,
+                    "python_developer": (
+                        request.llmConfig.agent_prompts.python_developer
+                    ),
+                    "researcher": request.llmConfig.agent_prompts.researcher,
+                    "athena_query": request.llmConfig.agent_prompts.athena_query,
+                }
+                agent_prompts = {k: v for k, v in agent_prompts.items() if v}
+                logger.info(
+                    "Resume: Using agentPrompts (%s)",
+                    list(agent_prompts.keys()),
+                )
+            # Don't use systemPrompt as override — use agentPrompts.planner instead
+            if system_prompt_override:
+                logger.info(
+                    "Resume: Ignoring systemPrompt (len=%d)",
+                    len(system_prompt_override),
+                )
+                system_prompt_override = None
             agent_cache_key = _get_agent_cache_key(
                 llm_config=config_dict,
                 workspace_root=resolved_workspace_root,
                 system_prompt_override=system_prompt_override,
-                agent_mode=agent_mode,
                 agent_prompts=agent_prompts,
             )
             if agent_cache_key in _simple_agent_instances:
                 agent = _simple_agent_instances[agent_cache_key]
                 logger.info(
-                    "Resume: Using cached agent for key %s (mode=%s, total cached: %d)",
+                    "Resume: Using cached agent for key %s (total cached: %d)",
                     agent_cache_key[:8],
-                    agent_mode,
                     len(_simple_agent_instances),
                 )
             else:
                 logger.info(
-                    "Resume: Creating new agent for key %s (mode=%s)",
+                    "Resume: Creating new agent for key %s",
                     agent_cache_key[:8],
-                    agent_mode,
                 )
                 agent = create_agent_system(
                     llm_config=config_dict,
@@ -2102,20 +2202,25 @@ async def resume_agent(request: ResumeRequest):
                     enable_hitl=True,
                     checkpointer=checkpointer,
                     system_prompt_override=system_prompt_override,
-                    agent_mode=agent_mode,
                     agent_prompts=agent_prompts,
                 )
                 _simple_agent_instances[agent_cache_key] = agent
                 logger.info(
-                    "Resume: Agent cached for key %s (mode=%s, total cached: %d)",
+                    "Resume: Agent cached for key %s (total cached: %d)",
                     agent_cache_key[:8],
-                    agent_mode,
                     len(_simple_agent_instances),
                 )
             # Prepare config with thread_id
             config = {"configurable": {"thread_id": request.threadId}}
+            # Set current thread_id for code history tracking
+            from agent_server.langchain.middleware.subagent_middleware import (
+                set_current_thread_id,
+            )
+            set_current_thread_id(request.threadId)
             pending_actions = _simple_agent_pending_actions.get(request.threadId, [])
             # Convert decisions to LangChain format
@@ -2152,7 +2257,7 @@ async def resume_agent(request: ResumeRequest):
                         "edit_file_tool",
                         "multiedit_file_tool",
                     ):
-                        track_tool_execution(tool_name, args)
+                        track_tool_execution(tool_name, args, request.threadId)
                     langgraph_decisions.append(
                         {
                             "type": "edit",
@@ -2205,6 +2310,10 @@ async def resume_agent(request: ResumeRequest):
             step_count = 0
+            # Track message count for summarization detection
+            previous_message_count = 0
+            summarization_detected = False
             async for step in _async_stream_wrapper(
                 agent,
                 Command(resume={"decisions": langgraph_decisions}),
@@ -2217,6 +2326,11 @@ async def resume_agent(request: ResumeRequest):
                         f"Thread {request.threadId} cancelled by user, stopping resume stream"
                     )
                     clear_cancelled_thread(request.threadId)
+                    # Reset todo_active on cancellation
+                    try:
+                        agent.update_state(config, {"todo_active": False})
+                    except Exception:
+                        pass
                     yield {
                         "event": "cancelled",
                         "data": json.dumps(
@@ -2252,6 +2366,30 @@ async def resume_agent(request: ResumeRequest):
                 # Process messages (no continue statements to ensure interrupt check always runs)
                 if isinstance(step, dict) and "messages" in step:
                     messages = step["messages"]
+                    current_message_count = len(messages) if messages else 0
+                    # Detect summarization by checking for lc_source: "summarization" marker
+                    # SummarizationMiddleware injects summary into system prompt with this marker
+                    if not summarization_detected and messages:
+                        for msg in messages:
+                            # Check additional_kwargs for lc_source
+                            additional_kwargs = getattr(msg, "additional_kwargs", {}) or {}
+                            if additional_kwargs.get("lc_source") == "summarization":
+                                summarization_detected = True
+                                logger.info(
+                                    f"[Agent-Resume] Summarization detected via lc_source marker"
+                                )
+                                yield {
+                                    "event": "debug",
+                                    "data": json.dumps({
+                                        "status": "대화가 자동으로 압축되었습니다.",
+                                        "icon": "check"
+                                    }),
+                                }
+                                break
+                    previous_message_count = current_message_count
                     should_process_message = False
                     if messages:
                         last_message = messages[-1]
@@ -2421,10 +2559,12 @@ async def resume_agent(request: ResumeRequest):
                                         }
                                         return  # Exit the generator
                                     else:
-                                        logger.warning(
-                                            "Resume: All %d todos completed but no summary JSON in current step - NOT auto-terminating",
+                                        logger.info(
+                                            "Resume: All %d todos completed (no summary in step) - "
+                                            "continuing to wait for final_summary_tool",
                                             len(todos),
                                         )
+                                        # Don't auto-terminate — let agent call final_summary_tool
                             tool_name = getattr(last_message, "name", "") or ""
                             logger.info(
@@ -2628,7 +2768,14 @@ async def resume_agent(request: ResumeRequest):
                                         has_summary_json = (
                                             '"summary"' in msg_content
                                             and '"next_items"' in msg_content
+                                        ) or (
+                                            "'summary'" in msg_content
+                                            and "'next_items'" in msg_content
                                         )
+                                        # Check if last_message is a ToolMessage from final_summary_tool
+                                        is_final_summary_tool_msg = (
+                                            getattr(last_message, "name", "") or ""
+                                        ) in ("final_summary_tool", "final_summary")
                                         # Also check for markdown summary format
                                         has_markdown_summary = any(
                                             kw in msg_content
@@ -2642,15 +2789,18 @@ async def resume_agent(request: ResumeRequest):
                                             ]
                                         )
                                         has_summary = (
-                                            has_summary_json or has_markdown_summary
+                                            has_summary_json
+                                            or has_markdown_summary
+                                            or is_final_summary_tool_msg
                                         )
                                         # Only check current AIMessage for summary (not history, to avoid false positives)
                                         if not has_summary:
-                                            logger.warning(
-                                                "Resume: All todos completed but no summary JSON in current message - NOT auto-terminating"
+                                            logger.info(
+                                                "Resume: All todos completed (no summary in message) - "
+                                                "continuing to wait for final_summary_tool"
                                             )
-                                            # Don't terminate - let agent continue to generate summary
+                                            # Don't auto-terminate — let agent call final_summary_tool
                                         else:
                                             logger.info(
                                                 "Resume: All %d todos completed and summary exists in current message, auto-terminating",
@@ -2805,6 +2955,10 @@ async def resume_agent(request: ResumeRequest):
                 for subagent_event in get_subagent_debug_events():
                     yield subagent_event
+                # Drain and emit any summarization events (context compression)
+                for summarization_event in get_summarization_debug_events():
+                    yield summarization_event
                 # Check for interrupt AFTER processing todos and messages
                 # This ensures todos/debug events are emitted even in interrupt steps
                 if isinstance(step, dict) and "__interrupt__" in step:
@@ -2828,7 +2982,10 @@ async def resume_agent(request: ResumeRequest):
                             f"[RESUME INTERRUPT] action_requests count: {len(action_requests)}, first: {str(action_requests[0])[:200] if action_requests else 'none'}"
                         )
                         normalized_actions = [
-                            _normalize_action_request(a) for a in action_requests
+                            _normalize_action_request(
+                                a, state=step if isinstance(step, dict) else None
+                            )
+                            for a in action_requests
                         ]
                         if normalized_actions:
                             _simple_agent_pending_actions[request.threadId] = (

hdsp-jupyter-extension 2.0.26__py3-none-any.whl → 2.0.28__py3-none-any.whl

hdsp-jupyter-extension 2.0.26py3-none-any.whl → 2.0.28py3-none-any.whl