PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.19__py3-none-any.whl → 2.0.21__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.19py3-none-any.whl → 2.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

agent_server/langchain/agent_prompts/planner_prompt.py CHANGED Viewed

@@ -40,6 +40,9 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
 **중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
+**🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
+  - 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
 # write_todos 규칙 [필수]
     - 한국어로 작성
     - **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경

agent_server/langchain/custom_middleware.py CHANGED Viewed

@@ -1053,23 +1053,6 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
                                                             break
                                                     break
-                                # Clean AIMessage content when write_todos is called
-                                # Remove redundant todos JSON from content (keep summary JSON)
-                                if tool_name == "write_todos":
-                                    msg_content = getattr(msg, "content", "") or ""
-                                    if msg_content and '"todos"' in msg_content:
-                                        # Keep content only if it's summary JSON
-                                        is_summary_json = (
-                                            '"summary"' in msg_content
-                                            and '"next_items"' in msg_content
-                                        )
-                                        if not is_summary_json:
-                                            # Clear redundant todos content
-                                            msg.content = ""
-                                            logger.info(
-                                                "Cleared redundant todos JSON from AIMessage content (write_todos tool_call exists)"
-                                            )
         return response
     return normalize_tool_args

agent_server/langchain/llm_factory.py CHANGED Viewed

@@ -97,16 +97,44 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
     endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
     model = vllm_config.get("model", "default")
     api_key = vllm_config.get("apiKey", "dummy")
+    use_responses_api = vllm_config.get("useResponsesApi", False)
+    temperature = vllm_config.get("temperature", 0.0)
-    logger.info(f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}")
+    logger.info(
+        f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
+        f"use_responses_api: {use_responses_api}, temperature: {temperature}"
+    )
+    # Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
+    # NOTE: OpenRouter doesn't support 'developer' role - only use for direct gpt-oss endpoints
+    is_openrouter = "openrouter" in endpoint.lower()
+    if "gpt-oss" in model.lower() and not is_openrouter:
+        from agent_server.langchain.models import ChatGPTOSS
+        logger.info("Using ChatGPTOSS for gpt-oss model (developer role support)")
+        return ChatGPTOSS(
+            model=model,
+            base_url=endpoint,
+            api_key=api_key,
+            temperature=temperature,
+            max_tokens=8192,
+            streaming=False,
+            callbacks=callbacks,
+        )
+    elif "gpt-oss" in model.lower() and is_openrouter:
+        logger.warning(
+            "gpt-oss model via OpenRouter - using standard ChatOpenAI "
+            "(developer role not supported by OpenRouter)"
+        )
     return ChatOpenAI(
         model=model,
         api_key=api_key,
         base_url=endpoint,  # Use endpoint as-is (no /v1 suffix added)
         streaming=False,  # Agent mode: disable LLM streaming (SSE handled by agent server)
-        temperature=0.0,
-        max_tokens=32768,
+        temperature=temperature,
+        max_tokens=8192,
+        use_responses_api=use_responses_api,  # Use /v1/responses endpoint if True
         callbacks=callbacks,
     )
@@ -148,14 +176,26 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
                     temperature=0.0,
                 )
         elif provider == "vllm":
-            from langchain_openai import ChatOpenAI
             vllm_config = llm_config.get("vllm", {})
             # User provides full base URL (e.g., https://openrouter.ai/api/v1)
             endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
             model = vllm_config.get("model", "default")
             api_key = vllm_config.get("apiKey", "dummy")
+            # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
+            is_openrouter = "openrouter" in endpoint.lower()
+            if "gpt-oss" in model.lower() and not is_openrouter:
+                from agent_server.langchain.models import ChatGPTOSS
+                return ChatGPTOSS(
+                    model=model,
+                    base_url=endpoint,
+                    api_key=api_key,
+                    temperature=0.0,
+                )
+            from langchain_openai import ChatOpenAI
             return ChatOpenAI(
                 model=model,
                 api_key=api_key,

agent_server/langchain/logging_utils.py CHANGED Viewed

@@ -14,8 +14,40 @@ from langchain_core.callbacks import BaseCallbackHandler
 logger = logging.getLogger(__name__)
+# Dedicated logger for LLM responses - always enabled with its own handler
+llm_response_logger = logging.getLogger("agent_server.llm_response")
+llm_response_logger.setLevel(logging.INFO)
+llm_response_logger.propagate = True  # Propagate to root logger
+# Ensure it has a handler if running standalone
+if not llm_response_logger.handlers and not logging.getLogger().handlers:
+    _handler = logging.StreamHandler()
+    _handler.setFormatter(logging.Formatter("%(message)s"))
+    llm_response_logger.addHandler(_handler)
+def disable_langchain_logging():
+    """Disable all langchain logging except LLM responses."""
+    # Set all langchain loggers to CRITICAL
+    for name in list(logging.Logger.manager.loggerDict.keys()):
+        if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
+            logging.getLogger(name).setLevel(logging.CRITICAL)
+    # Keep LLM response logger at INFO
+    llm_response_logger.setLevel(logging.INFO)
+# Auto-disable on import (comment this line to re-enable all logs)
+disable_langchain_logging()
 LOG_SEPARATOR = "=" * 96
 LOG_SUBSECTION = "-" * 96
+LOG_EMOJI_LINE = "🔵" * 48
+LOG_REQUEST_START = f"\n\n{'🟢' * 48}\n{'=' * 96}\n  📤 LLM REQUEST START\n{'=' * 96}"
+LOG_REQUEST_END = f"{'=' * 96}\n  📤 LLM REQUEST END\n{'=' * 96}\n{'🟢' * 48}\n"
+LOG_RESPONSE_START = (
+    f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n  ✨ LLM RESPONSE START\n{'=' * 96}"
+)
+LOG_RESPONSE_END = f"{'=' * 96}\n  ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
 def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
@@ -179,45 +211,91 @@ class LLMTraceLogger(BaseCallbackHandler):
             logger.info("%s", "\n".join(lines))
     def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
-        if not messages:
-            logger.info(
-                "%s",
-                _format_messages_block("AGENT -> LLM PROMPT (<none>)", []),
-            )
-            return
-        self._log_prompt_batches("AGENT -> LLM PROMPT", messages)
+        """Log LLM request messages as raw structured JSON."""
+        print(LOG_REQUEST_START, flush=True)
+        # Build raw structured request data
+        request_data = {
+            "model": serialized.get("name", "unknown") if serialized else "unknown",
+            "kwargs": {k: str(v)[:200] for k, v in kwargs.items() if k != "messages"},
+            "messages": [],
+        }
+        for batch in self._normalize_batches(messages):
+            batch_messages = []
+            for msg in batch:
+                batch_messages.append(_serialize_message(msg))
+            request_data["messages"].append(batch_messages)
+        # Output beautified JSON
+        print(_pretty_json(request_data), flush=True)
+        print(LOG_REQUEST_END, flush=True)
+        # --- OLD TEXT-PARSED LOGGING (commented out) ---
+        # for batch_idx, batch in enumerate(self._normalize_batches(messages)):
+        #     msg_types = {}
+        #     for msg in batch:
+        #         msg_type = msg.__class__.__name__
+        #         msg_types[msg_type] = msg_types.get(msg_type, 0) + 1
+        #     print(f"\nBatch {batch_idx}: {len(batch)} messages - {msg_types}", flush=True)
+        #     recent_count = min(5, len(batch))
+        #     if len(batch) > recent_count:
+        #         print(f"... ({len(batch) - recent_count} earlier messages omitted)", flush=True)
+        #     for idx, message in enumerate(batch[-recent_count:], start=len(batch) - recent_count):
+        #         lines = [LOG_SUBSECTION]
+        #         lines.append(f"[{idx}] {message.__class__.__name__}")
+        #         lines.append(_pretty_json(_serialize_message(message)))
+        #         print("\n".join(lines), flush=True)
     def on_chat_model_end(self, response, **kwargs) -> None:
+        """Log LLM response as raw structured JSON."""
+        print(LOG_RESPONSE_START, flush=True)
+        # Build raw structured response data
+        response_data = {
+            "llm_output": getattr(response, "llm_output", None),
+            "generations": [],
+        }
         generations = getattr(response, "generations", None) or []
         if generations and isinstance(generations[0], list):
             batches = generations
         else:
             batches = [generations]
-        for batch_idx, batch in enumerate(batches):
-            for gen_idx, generation in enumerate(batch):
+        for batch in batches:
+            batch_data = []
+            for generation in batch:
+                gen_data = {}
                 message = getattr(generation, "message", None)
-                if not message:
-                    continue
-                title = (
-                    f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
+                if message:
+                    gen_data["message"] = _serialize_message(message)
+                gen_data["text"] = getattr(generation, "text", None)
+                gen_data["generation_info"] = getattr(
+                    generation, "generation_info", None
                 )
-                logger.info("%s", _format_messages_block(title, [message]))
-                tool_calls = getattr(message, "tool_calls", None)
-                if tool_calls:
-                    tool_title = (
-                        "LLM -> AGENT TOOL CALLS "
-                        f"(batch={batch_idx}, generation={gen_idx})"
-                    )
-                    logger.info("%s", _format_json_block(tool_title, tool_calls))
+                batch_data.append(gen_data)
+            response_data["generations"].append(batch_data)
+        # Output beautified JSON
+        print(_pretty_json(response_data), flush=True)
+        print(LOG_RESPONSE_END, flush=True)
+        # --- OLD TEXT-PARSED LOGGING (commented out) ---
+        # for batch_idx, batch in enumerate(batches):
+        #     for gen_idx, generation in enumerate(batch):
+        #         message = getattr(generation, "message", None)
+        #         if not message:
+        #             continue
+        #         title = f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
+        #         print(_format_messages_block(title, [message]), flush=True)
+        #         tool_calls = getattr(message, "tool_calls", None)
+        #         if tool_calls:
+        #             tool_title = f"LLM -> AGENT TOOL CALLS (batch={batch_idx}, generation={gen_idx})"
+        #             print(_format_json_block(tool_title, tool_calls), flush=True)
     def on_llm_start(self, serialized, prompts, **kwargs) -> None:
-        if not prompts:
-            logger.info("%s", _format_json_block("LLM PROMPT (<none>)", ""))
-            return
-        for idx, prompt in enumerate(prompts):
-            title = f"LLM PROMPT (batch={idx}, length={len(prompt)})"
-            logger.info("%s", _format_json_block(title, prompt))
+        # Request logging disabled - only log responses
+        pass

agent_server/langchain/middleware/subagent_middleware.py CHANGED Viewed

@@ -9,8 +9,11 @@ Key features:
 - Context isolation: subagents run in clean context
 - Synchronous execution: subagent returns result directly to caller
 - Nested subagent support: python_developer can call athena_query
+- Subagent caching: compiled agents are cached to avoid recompilation overhead
 """
+import hashlib
+import json
 import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
@@ -25,6 +28,8 @@ logger = logging.getLogger(__name__)
 # Global registry for subagent factories (set by AgentFactory)
 _subagent_factory = None
 _current_llm_config = None
+# Subagent cache: key = "{agent_name}_{config_hash}" -> compiled agent
+_subagent_cache: Dict[str, Any] = {}
 def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
@@ -32,10 +37,12 @@ def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
     Set the subagent factory function.
     Called by AgentFactory during initialization.
     """
-    global _subagent_factory, _current_llm_config
+    global _subagent_factory, _current_llm_config, _subagent_cache
     _subagent_factory = factory_func
     _current_llm_config = llm_config
-    logger.info("SubAgentMiddleware factory initialized")
+    # Clear cache when factory changes (new LLM config)
+    _subagent_cache.clear()
+    logger.info("SubAgentMiddleware factory initialized (cache cleared)")
 def get_subagent_factory():
@@ -43,6 +50,48 @@ def get_subagent_factory():
     return _subagent_factory, _current_llm_config
+def _get_config_hash(llm_config: Dict[str, Any]) -> str:
+    """Generate a hash of llm_config for caching."""
+    config_str = json.dumps(llm_config, sort_keys=True, default=str)
+    return hashlib.md5(config_str.encode()).hexdigest()[:12]
+def get_or_create_subagent(
+    agent_name: str, factory_func, llm_config: Dict[str, Any]
+) -> Any:
+    """
+    Get cached subagent or create new one.
+    Caching avoids expensive recompilation of LangGraph agents.
+    Cache key = "{agent_name}_{config_hash}" to handle different LLM configs.
+    """
+    global _subagent_cache
+    config_hash = _get_config_hash(llm_config)
+    cache_key = f"{agent_name}_{config_hash}"
+    if cache_key in _subagent_cache:
+        logger.info(f"Using cached subagent '{agent_name}' (key={cache_key})")
+        return _subagent_cache[cache_key]
+    logger.info(f"Creating new subagent '{agent_name}' (key={cache_key})...")
+    subagent = factory_func(agent_name, llm_config)
+    _subagent_cache[cache_key] = subagent
+    logger.info(
+        f"Cached subagent '{agent_name}' (total cached: {len(_subagent_cache)})"
+    )
+    return subagent
+def clear_subagent_cache():
+    """Clear the subagent cache. Useful for testing or config changes."""
+    global _subagent_cache
+    count = len(_subagent_cache)
+    _subagent_cache.clear()
+    logger.info(f"Subagent cache cleared ({count} entries removed)")
 def create_task_tool(
     caller_name: str,
     allowed_subagents: Optional[List[str]] = None,
@@ -96,11 +145,13 @@ def create_task_tool(
         )
         context: Optional[str] = Field(
             default=None,
-            description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc."
+            description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc.",
         )
     @tool(args_schema=TaskInput)
-    def task_tool(agent_name: str, description: str, context: Optional[str] = None) -> str:
+    def task_tool(
+        agent_name: str, description: str, context: Optional[str] = None
+    ) -> str:
         """
         Delegate a task to a specialized subagent.
@@ -133,10 +184,10 @@ def create_task_tool(
         # Import subagent event emitters
         from agent_server.langchain.middleware.subagent_events import (
-            emit_subagent_start,
+            clear_current_subagent,
             emit_subagent_complete,
+            emit_subagent_start,
             set_current_subagent,
-            clear_current_subagent,
         )
         # Emit subagent start event for UI
@@ -148,11 +199,17 @@ def create_task_tool(
             return "Error: SubAgentMiddleware not initialized. Call set_subagent_factory first."
         try:
+            import time
             # Set current subagent context for tool call tracking
             set_current_subagent(agent_name)
-            # Create the subagent
-            subagent = factory_func(agent_name, llm_config)
+            # Get or create the subagent (cached for performance)
+            # Avoids expensive LangGraph recompilation on each call
+            t0 = time.time()
+            subagent = get_or_create_subagent(agent_name, factory_func, llm_config)
+            t1 = time.time()
+            logger.info(f"[TIMING] get_or_create_subagent took {t1-t0:.2f}s")
             # Execute subagent synchronously with clean context
             # The subagent runs in isolation, receiving task description + optional context
@@ -169,15 +226,18 @@ def create_task_tool(
             enhanced_context = context
             if agent_name == "python_developer":
                 try:
+                    t2 = time.time()
                     from agent_server.langchain.middleware.code_history_middleware import (
-                        get_context_with_history,
                         get_code_history_tracker,
+                        get_context_with_history,
                     )
                     tracker = get_code_history_tracker()
                     if tracker.get_entry_count() > 0:
                         enhanced_context = get_context_with_history(context)
+                        t3 = time.time()
                         logger.info(
-                            f"[{caller_name}] Injected code history into context "
+                            f"[TIMING] code history injection took {t3-t2:.2f}s "
                             f"(entries={tracker.get_entry_count()}, "
                             f"context_len={len(enhanced_context) if enhanced_context else 0})"
                         )
@@ -194,13 +254,21 @@ def create_task_tool(
             else:
                 message_content = description
-            logger.info(f"[{caller_name}] Subagent message length: {len(message_content)}")
+            logger.info(
+                f"[{caller_name}] Subagent message length: {len(message_content)}"
+            )
             # Execute the subagent
+            t_invoke_start = time.time()
+            logger.info(f"[TIMING] About to invoke subagent '{agent_name}'...")
             result = subagent.invoke(
                 {"messages": [{"role": "user", "content": message_content}]},
                 config=subagent_config,
             )
+            t_invoke_end = time.time()
+            logger.info(
+                f"[TIMING] subagent.invoke() took {t_invoke_end-t_invoke_start:.2f}s"
+            )
             # Extract the final message from the result
             messages = result.get("messages", [])
@@ -223,6 +291,7 @@ def create_task_tool(
                     from agent_server.langchain.middleware.description_injector import (
                         process_task_tool_response,
                     )
                     process_task_tool_response(agent_name, str(response))
                 except Exception as e:
                     logger.warning(f"Failed to extract description: {e}")

agent_server/langchain/models/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Custom LangChain chat models."""
+from agent_server.langchain.models.gpt_oss_chat import ChatGPTOSS
+__all__ = ["ChatGPTOSS"]

hdsp-jupyter-extension 2.0.19__py3-none-any.whl → 2.0.21__py3-none-any.whl

hdsp-jupyter-extension 2.0.19py3-none-any.whl → 2.0.21py3-none-any.whl