PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.19__py3-none-any.whl → 2.0.20__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.19py3-none-any.whl → 2.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

agent_server/langchain/agent_prompts/planner_prompt.py CHANGED Viewed

@@ -40,6 +40,9 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
 **중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
+**🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
+  - 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
 # write_todos 규칙 [필수]
     - 한국어로 작성
     - **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경

agent_server/langchain/custom_middleware.py CHANGED Viewed

@@ -1053,23 +1053,6 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
                                                             break
                                                     break
-                                # Clean AIMessage content when write_todos is called
-                                # Remove redundant todos JSON from content (keep summary JSON)
-                                if tool_name == "write_todos":
-                                    msg_content = getattr(msg, "content", "") or ""
-                                    if msg_content and '"todos"' in msg_content:
-                                        # Keep content only if it's summary JSON
-                                        is_summary_json = (
-                                            '"summary"' in msg_content
-                                            and '"next_items"' in msg_content
-                                        )
-                                        if not is_summary_json:
-                                            # Clear redundant todos content
-                                            msg.content = ""
-                                            logger.info(
-                                                "Cleared redundant todos JSON from AIMessage content (write_todos tool_call exists)"
-                                            )
         return response
     return normalize_tool_args

agent_server/langchain/llm_factory.py CHANGED Viewed

@@ -97,16 +97,37 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
     endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
     model = vllm_config.get("model", "default")
     api_key = vllm_config.get("apiKey", "dummy")
+    use_responses_api = vllm_config.get("useResponsesApi", False)
+    temperature = vllm_config.get("temperature", 0.0)
-    logger.info(f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}")
+    logger.info(
+        f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
+        f"use_responses_api: {use_responses_api}, temperature: {temperature}"
+    )
+    # Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
+    if "gpt-oss" in model.lower():
+        from agent_server.langchain.models import ChatGPTOSS
+        logger.info(f"Using ChatGPTOSS for gpt-oss model (developer role support)")
+        return ChatGPTOSS(
+            model=model,
+            base_url=endpoint,
+            api_key=api_key,
+            temperature=temperature,
+            max_tokens=8192,
+            streaming=False,
+            callbacks=callbacks,
+        )
     return ChatOpenAI(
         model=model,
         api_key=api_key,
         base_url=endpoint,  # Use endpoint as-is (no /v1 suffix added)
         streaming=False,  # Agent mode: disable LLM streaming (SSE handled by agent server)
-        temperature=0.0,
-        max_tokens=32768,
+        temperature=temperature,
+        max_tokens=8192,
+        use_responses_api=use_responses_api,  # Use /v1/responses endpoint if True
         callbacks=callbacks,
     )
@@ -148,14 +169,25 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
                     temperature=0.0,
                 )
         elif provider == "vllm":
-            from langchain_openai import ChatOpenAI
             vllm_config = llm_config.get("vllm", {})
             # User provides full base URL (e.g., https://openrouter.ai/api/v1)
             endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
             model = vllm_config.get("model", "default")
             api_key = vllm_config.get("apiKey", "dummy")
+            # Use ChatGPTOSS for gpt-oss models
+            if "gpt-oss" in model.lower():
+                from agent_server.langchain.models import ChatGPTOSS
+                return ChatGPTOSS(
+                    model=model,
+                    base_url=endpoint,
+                    api_key=api_key,
+                    temperature=0.0,
+                )
+            from langchain_openai import ChatOpenAI
             return ChatOpenAI(
                 model=model,
                 api_key=api_key,

agent_server/langchain/logging_utils.py CHANGED Viewed

@@ -14,8 +14,36 @@ from langchain_core.callbacks import BaseCallbackHandler
 logger = logging.getLogger(__name__)
+# Dedicated logger for LLM responses - always enabled with its own handler
+llm_response_logger = logging.getLogger("agent_server.llm_response")
+llm_response_logger.setLevel(logging.INFO)
+llm_response_logger.propagate = True  # Propagate to root logger
+# Ensure it has a handler if running standalone
+if not llm_response_logger.handlers and not logging.getLogger().handlers:
+    _handler = logging.StreamHandler()
+    _handler.setFormatter(logging.Formatter('%(message)s'))
+    llm_response_logger.addHandler(_handler)
+def disable_langchain_logging():
+    """Disable all langchain logging except LLM responses."""
+    # Set all langchain loggers to CRITICAL
+    for name in list(logging.Logger.manager.loggerDict.keys()):
+        if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
+            logging.getLogger(name).setLevel(logging.CRITICAL)
+    # Keep LLM response logger at INFO
+    llm_response_logger.setLevel(logging.INFO)
+# Auto-disable on import (comment this line to re-enable all logs)
+disable_langchain_logging()
 LOG_SEPARATOR = "=" * 96
 LOG_SUBSECTION = "-" * 96
+LOG_EMOJI_LINE = "🔵" * 48
+LOG_RESPONSE_START = f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n  ✨ LLM RESPONSE START\n{'=' * 96}"
+LOG_RESPONSE_END = f"{'=' * 96}\n  ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
 def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
@@ -179,15 +207,15 @@ class LLMTraceLogger(BaseCallbackHandler):
             logger.info("%s", "\n".join(lines))
     def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
-        if not messages:
-            logger.info(
-                "%s",
-                _format_messages_block("AGENT -> LLM PROMPT (<none>)", []),
-            )
-            return
-        self._log_prompt_batches("AGENT -> LLM PROMPT", messages)
+        # Request logging disabled - only log responses
+        pass
     def on_chat_model_end(self, response, **kwargs) -> None:
+        # Debug: Check if callback is even called
+        print("[DEBUG] on_chat_model_end CALLED!", flush=True)
+        # Use print for guaranteed visibility
+        print(LOG_RESPONSE_START, flush=True)
         generations = getattr(response, "generations", None) or []
         if generations and isinstance(generations[0], list):
             batches = generations
@@ -203,7 +231,7 @@ class LLMTraceLogger(BaseCallbackHandler):
                 title = (
                     f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
                 )
-                logger.info("%s", _format_messages_block(title, [message]))
+                print(_format_messages_block(title, [message]), flush=True)
                 tool_calls = getattr(message, "tool_calls", None)
                 if tool_calls:
@@ -211,13 +239,10 @@ class LLMTraceLogger(BaseCallbackHandler):
                         "LLM -> AGENT TOOL CALLS "
                         f"(batch={batch_idx}, generation={gen_idx})"
                     )
-                    logger.info("%s", _format_json_block(tool_title, tool_calls))
+                    print(_format_json_block(tool_title, tool_calls), flush=True)
-    def on_llm_start(self, serialized, prompts, **kwargs) -> None:
-        if not prompts:
-            logger.info("%s", _format_json_block("LLM PROMPT (<none>)", ""))
-            return
+        print(LOG_RESPONSE_END, flush=True)
-        for idx, prompt in enumerate(prompts):
-            title = f"LLM PROMPT (batch={idx}, length={len(prompt)})"
-            logger.info("%s", _format_json_block(title, prompt))
+    def on_llm_start(self, serialized, prompts, **kwargs) -> None:
+        # Request logging disabled - only log responses
+        pass

agent_server/langchain/models/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Custom LangChain chat models."""
+from agent_server.langchain.models.gpt_oss_chat import ChatGPTOSS
+__all__ = ["ChatGPTOSS"]

agent_server/langchain/models/gpt_oss_chat.py ADDED Viewed

@@ -0,0 +1,351 @@
+"""
+ChatGPTOSS: Custom ChatModel for gpt-oss (Harmony format).
+gpt-oss uses a different instruction hierarchy:
+- developer: behavioral rules/instructions (highest priority)
+- system: metadata (date, cutoff, tools)
+- user: actual questions
+LangChain's ChatOpenAI sends everything as 'system', which gpt-oss treats as low-priority metadata.
+This class converts SystemMessage to 'developer' role for proper instruction following.
+"""
+import json
+import logging
+import uuid
+from typing import Any, Dict, Iterator, List, Optional, Union
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    HumanMessage,
+    SystemMessage,
+    ToolMessage,
+)
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.tools import BaseTool
+from openai import OpenAI
+from pydantic import Field
+logger = logging.getLogger(__name__)
+class ChatGPTOSS(BaseChatModel):
+    """ChatModel for gpt-oss with developer role support.
+    Converts SystemMessage to 'developer' role for proper instruction hierarchy
+    in gpt-oss (Harmony format) models.
+    """
+    client: Any = Field(default=None, exclude=True)
+    model: str = Field(default="openai/gpt-oss-120b")
+    base_url: str = Field(default="http://localhost:8000/v1")
+    api_key: str = Field(default="dummy")
+    temperature: float = Field(default=0.0)
+    max_tokens: int = Field(default=8192)
+    streaming: bool = Field(default=False)
+    # Tool-related fields (private, not exposed to pydantic)
+    _tools: Optional[List[Dict[str, Any]]] = None
+    _tool_choice: Optional[Union[str, Dict[str, Any]]] = None
+    def __init__(self, callbacks=None, **kwargs):
+        # Remove callbacks from kwargs before super().__init__ if present
+        # BaseChatModel handles callbacks through its own mechanism
+        super().__init__(callbacks=callbacks, **kwargs)
+        # Initialize OpenAI client
+        self.client = OpenAI(
+            base_url=self.base_url,
+            api_key=self.api_key,
+        )
+        self._tools = None
+        self._tool_choice = None
+    @property
+    def _llm_type(self) -> str:
+        return "gpt-oss"
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        return {
+            "model": self.model,
+            "base_url": self.base_url,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+        }
+    def bind_tools(
+        self,
+        tools: List[Union[BaseTool, Dict[str, Any]]],
+        *,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        **kwargs,
+    ) -> "ChatGPTOSS":
+        """Bind tools to the model.
+        Returns a new instance with tools bound.
+        """
+        # Convert tools to OpenAI format
+        formatted_tools = []
+        for tool in tools:
+            if isinstance(tool, BaseTool):
+                # Convert LangChain tool to OpenAI format
+                tool_schema = {
+                    "type": "function",
+                    "function": {
+                        "name": tool.name,
+                        "description": tool.description or "",
+                        "parameters": tool.args_schema.schema() if tool.args_schema else {"type": "object", "properties": {}},
+                    },
+                }
+                formatted_tools.append(tool_schema)
+            elif isinstance(tool, dict):
+                # Already in dict format, ensure it has correct structure
+                if "type" not in tool:
+                    tool = {"type": "function", "function": tool}
+                formatted_tools.append(tool)
+        # Create new instance with tools bound
+        new_instance = ChatGPTOSS(
+            model=self.model,
+            base_url=self.base_url,
+            api_key=self.api_key,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+            streaming=self.streaming,
+        )
+        new_instance._tools = formatted_tools
+        new_instance._tool_choice = tool_choice
+        return new_instance
+    def _convert_messages(self, messages: List[BaseMessage]) -> List[Dict[str, Any]]:
+        """Convert LangChain messages to OpenAI format with developer role.
+        Key conversion: SystemMessage -> role=developer
+        """
+        result = []
+        for msg in messages:
+            if isinstance(msg, SystemMessage):
+                # Convert system to developer for gpt-oss instruction hierarchy
+                result.append({
+                    "role": "developer",
+                    "content": msg.content,
+                })
+            elif isinstance(msg, HumanMessage):
+                result.append({
+                    "role": "user",
+                    "content": msg.content,
+                })
+            elif isinstance(msg, AIMessage):
+                ai_msg: Dict[str, Any] = {
+                    "role": "assistant",
+                    "content": msg.content or "",
+                }
+                # Include tool calls if present
+                tool_calls = getattr(msg, "tool_calls", None)
+                if tool_calls:
+                    ai_msg["tool_calls"] = [
+                        {
+                            "id": tc.get("id", str(uuid.uuid4())[:8]),
+                            "type": "function",
+                            "function": {
+                                "name": tc["name"],
+                                "arguments": json.dumps(tc["args"]) if isinstance(tc["args"], dict) else tc["args"],
+                            },
+                        }
+                        for tc in tool_calls
+                    ]
+                result.append(ai_msg)
+            elif isinstance(msg, ToolMessage):
+                result.append({
+                    "role": "tool",
+                    "tool_call_id": msg.tool_call_id,
+                    "content": msg.content,
+                })
+            else:
+                # Fallback for other message types
+                role = getattr(msg, "role", "user")
+                result.append({
+                    "role": role,
+                    "content": msg.content,
+                })
+        return result
+    def _create_chat_result(self, response) -> ChatResult:
+        """Convert OpenAI response to LangChain ChatResult."""
+        choice = response.choices[0]
+        message = choice.message
+        # Build AIMessage
+        content = message.content or ""
+        additional_kwargs: Dict[str, Any] = {}
+        tool_calls_list = []
+        if message.tool_calls:
+            additional_kwargs["tool_calls"] = [
+                {
+                    "id": tc.id,
+                    "type": "function",
+                    "function": {
+                        "name": tc.function.name,
+                        "arguments": tc.function.arguments,
+                    },
+                }
+                for tc in message.tool_calls
+            ]
+            # Also convert to LangChain tool_calls format
+            for tc in message.tool_calls:
+                try:
+                    args = json.loads(tc.function.arguments)
+                except json.JSONDecodeError:
+                    args = {"raw": tc.function.arguments}
+                tool_calls_list.append({
+                    "name": tc.function.name,
+                    "args": args,
+                    "id": tc.id,
+                    "type": "tool_call",
+                })
+        ai_message = AIMessage(
+            content=content,
+            additional_kwargs=additional_kwargs,
+            tool_calls=tool_calls_list if tool_calls_list else [],
+            response_metadata={
+                "model_name": response.model,
+                "finish_reason": choice.finish_reason,
+                "id": response.id,
+            },
+        )
+        # Add usage metadata if available
+        if response.usage:
+            ai_message.usage_metadata = {
+                "input_tokens": response.usage.prompt_tokens,
+                "output_tokens": response.usage.completion_tokens,
+                "total_tokens": response.usage.total_tokens,
+            }
+        generation = ChatGeneration(message=ai_message)
+        return ChatResult(generations=[generation])
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs,
+    ) -> ChatResult:
+        """Generate a response from the model."""
+        openai_messages = self._convert_messages(messages)
+        # Build request kwargs
+        request_kwargs: Dict[str, Any] = {
+            "model": self.model,
+            "messages": openai_messages,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+        }
+        if stop:
+            request_kwargs["stop"] = stop
+        if self._tools:
+            request_kwargs["tools"] = self._tools
+            if self._tool_choice:
+                request_kwargs["tool_choice"] = self._tool_choice
+        # Make API call
+        logger.debug(f"ChatGPTOSS request: model={self.model}, messages_count={len(openai_messages)}")
+        response = self.client.chat.completions.create(**request_kwargs)
+        return self._create_chat_result(response)
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs,
+    ) -> Iterator[ChatGenerationChunk]:
+        """Stream responses from the model."""
+        openai_messages = self._convert_messages(messages)
+        # Build request kwargs
+        request_kwargs: Dict[str, Any] = {
+            "model": self.model,
+            "messages": openai_messages,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "stream": True,
+        }
+        if stop:
+            request_kwargs["stop"] = stop
+        if self._tools:
+            request_kwargs["tools"] = self._tools
+            if self._tool_choice:
+                request_kwargs["tool_choice"] = self._tool_choice
+        # Make streaming API call
+        response = self.client.chat.completions.create(**request_kwargs)
+        # Accumulate tool calls across chunks
+        tool_calls_accum: Dict[int, Dict[str, Any]] = {}
+        for chunk in response:
+            if not chunk.choices:
+                continue
+            choice = chunk.choices[0]
+            delta = choice.delta
+            content = delta.content or ""
+            additional_kwargs: Dict[str, Any] = {}
+            tool_call_chunks = []
+            # Handle tool calls in streaming
+            if delta.tool_calls:
+                for tc in delta.tool_calls:
+                    idx = tc.index
+                    if idx not in tool_calls_accum:
+                        tool_calls_accum[idx] = {
+                            "id": tc.id or "",
+                            "name": "",
+                            "arguments": "",
+                        }
+                    if tc.id:
+                        tool_calls_accum[idx]["id"] = tc.id
+                    if tc.function:
+                        if tc.function.name:
+                            tool_calls_accum[idx]["name"] = tc.function.name
+                        if tc.function.arguments:
+                            tool_calls_accum[idx]["arguments"] += tc.function.arguments
+                    # Build tool call chunk for LangChain
+                    tool_call_chunks.append({
+                        "index": idx,
+                        "id": tool_calls_accum[idx]["id"],
+                        "name": tool_calls_accum[idx]["name"],
+                        "args": tool_calls_accum[idx]["arguments"],
+                    })
+            # Create chunk message
+            chunk_message = AIMessageChunk(
+                content=content,
+                additional_kwargs=additional_kwargs,
+                tool_call_chunks=tool_call_chunks if tool_call_chunks else [],
+            )
+            # Add finish reason on last chunk
+            if choice.finish_reason:
+                chunk_message.response_metadata = {
+                    "finish_reason": choice.finish_reason,
+                }
+            yield ChatGenerationChunk(message=chunk_message)

agent_server/langchain/prompts.py CHANGED Viewed

@@ -42,6 +42,7 @@ DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter n
 - check_resource_tool: 대용량 파일/데이터프레임 작업 전 필수
 - read_file_tool: 대용량 파일은 limit=100으로 먼저 확인
 - jupyter_cell_tool: 차트 라벨은 영어로
+  - **KeyboardInterrupt 발생 시**: ask_user_tool로 중단 사유를 사용자에게 확인 (예: "코드 실행이 중단되었습니다. 중단 사유를 알려주시면 도움이 됩니다.")
 - **파일 수정 후**: diagnostics_tool로 오류 확인 필수
 # 사용자 입력 요청 [중요]

agent_server/routers/langchain_agent.py CHANGED Viewed

@@ -1082,6 +1082,11 @@ async def stream_agent(request: AgentRequest):
                         # Handle AIMessage
                         elif isinstance(last_message, AIMessage):
+                            # LLM Response separator for easy log reading
+                            print("\n" + "🔵" * 48, flush=True)
+                            print("=" * 96, flush=True)
+                            print("  ✨ LLM RESPONSE", flush=True)
+                            print("=" * 96, flush=True)
                             logger.info(
                                 "SimpleAgent AIMessage content: %s",
                                 last_message.content or "",
@@ -1115,6 +1120,11 @@ async def stream_agent(request: AgentRequest):
                                     ensure_ascii=False,
                                 ),
                             )
+                            # LLM Response end separator
+                            print("=" * 96, flush=True)
+                            print("  ✅ LLM RESPONSE END", flush=True)
+                            print("=" * 96, flush=True)
+                            print("🔵" * 48 + "\n", flush=True)
                             last_finish_reason = (
                                 getattr(last_message, "response_metadata", {}) or {}
                             ).get("finish_reason")

{hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json RENAMED Viewed

@@ -722,7 +722,7 @@
             "@mui/material": {},
             "react-markdown": {},
             "hdsp-agent": {
-              "version": "2.0.19",
+              "version": "2.0.20",
               "singleton": true,
               "import": "/Users/a421721/Desktop/hdsp/hdsp_agent/extensions/jupyter/lib/index.js"
             }

{hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/package.json RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "hdsp-agent",
-  "version": "2.0.19",
+  "version": "2.0.20",
   "description": "HDSP Agent JupyterLab Extension - Thin client for Agent Server",
   "keywords": [
     "jupyter",
@@ -132,7 +132,7 @@
       }
     },
     "_build": {
-      "load": "static/remoteEntry.d686ab71eb65b5ef8f15.js",
+      "load": "static/remoteEntry.586bf5521d043cdd37b8.js",
       "extension": "./extension",
       "style": "./style"
     }

hdsp-jupyter-extension 2.0.19__py3-none-any.whl → 2.0.20__py3-none-any.whl

hdsp-jupyter-extension 2.0.19py3-none-any.whl → 2.0.20py3-none-any.whl