PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.8__py3-none-any.whl → 2.0.10__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.8py3-none-any.whl → 2.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

agent_server/core/rag_manager.py CHANGED Viewed

@@ -90,16 +90,25 @@ class RAGManager:
             # 2. Initialize embedding service (local or vLLM backend)
             import os
-            embedding_backend = os.environ.get("HDSP_EMBEDDING_BACKEND", "local").lower()
+            embedding_backend = os.environ.get(
+                "HDSP_EMBEDDING_BACKEND", "local"
+            ).lower()
             if embedding_backend == "vllm":
-                from agent_server.core.vllm_embedding_service import get_vllm_embedding_service
-                self._embedding_service = get_vllm_embedding_service(self._config.embedding)
+                from agent_server.core.vllm_embedding_service import (
+                    get_vllm_embedding_service,
+                )
+                self._embedding_service = get_vllm_embedding_service(
+                    self._config.embedding
+                )
                 logger.info(
                     f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
                 )
             else:
                 from agent_server.core.embedding_service import get_embedding_service
                 self._embedding_service = get_embedding_service(self._config.embedding)
                 # Load model to get dimension
                 await self._embedding_service._ensure_model_loaded()

agent_server/core/retriever.py CHANGED Viewed

@@ -96,7 +96,8 @@ class Retriever:
                 query=query_embedding,
                 query_filter=qdrant_filter,
                 limit=effective_top_k,
-                score_threshold=effective_threshold * 0.5,  # Lower for initial retrieval
+                score_threshold=effective_threshold
+                * 0.5,  # Lower for initial retrieval
                 with_payload=True,
                 with_vectors=False,
             )

agent_server/core/vllm_embedding_service.py CHANGED Viewed

@@ -17,7 +17,6 @@ import os
 from typing import TYPE_CHECKING, List, Optional
 import httpx
-import time
 if TYPE_CHECKING:
     from hdsp_agent_core.models.rag import EmbeddingConfig
@@ -66,7 +65,7 @@ class VLLMEmbeddingService:
         self._client = httpx.AsyncClient(
             base_url=self._endpoint,
             timeout=httpx.Timeout(30.0),
-            limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
+            limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
         )
         logger.info(
@@ -79,7 +78,9 @@ class VLLMEmbeddingService:
         """Get embedding dimension"""
         return self._dimension
-    async def _call_vllm_api(self, texts: List[str], max_retries: int = 3) -> List[List[float]]:
+    async def _call_vllm_api(
+        self, texts: List[str], max_retries: int = 3
+    ) -> List[List[float]]:
         """
         Call vLLM embedding API with retry logic.
@@ -126,7 +127,9 @@ class VLLMEmbeddingService:
                 logger.error(f"Unexpected error calling vLLM API: {e}")
                 break
-        raise Exception(f"Failed to connect to vLLM after {max_retries} attempts: {last_error}")
+        raise Exception(
+            f"Failed to connect to vLLM after {max_retries} attempts: {last_error}"
+        )
     async def embed_texts(self, texts: List[str]) -> List[List[float]]:
         """
@@ -240,4 +243,4 @@ def reset_vllm_embedding_service() -> None:
         _vllm_embedding_service._initialized = False
         _vllm_embedding_service = None
     VLLMEmbeddingService._instance = None
-    VLLMEmbeddingService._initialized = False
+    VLLMEmbeddingService._initialized = False

agent_server/langchain/ARCHITECTURE.md CHANGED Viewed

@@ -151,13 +151,10 @@ jupyter_ext/
 ```python
 - jupyter_cell_tool            # Python 코드 실행
 - markdown_tool                # 마크다운 셀 추가
-- final_answer_tool            # 작업 완료 및 요약
 - read_file_tool               # 파일 읽기
 - write_file_tool              # 파일 쓰기
-- list_files_tool              # 디렉토리 목록
-- search_workspace_tool        # 워크스페이스 검색 (grep/rg)
 - search_notebook_cells_tool   # 노트북 셀 검색
-- execute_command_tool         # 쉘 명령 실행
+- execute_command_tool         # 쉘 명령 실행 (파일 검색은 find/grep 사용)
 - check_resource_tool          # 리소스 확인
 ```
@@ -441,8 +438,6 @@ non-HITL 도구 실행 후 continuation 프롬프트를 주입합니다.
 NON_HITL_TOOLS = {
     "markdown_tool",
     "read_file_tool",
-    "list_files_tool",
-    "search_workspace_tool",
     "search_notebook_cells_tool",
     "write_todos",
 }
@@ -508,8 +503,7 @@ LLM 호출 횟수를 제한합니다.
 **설정**:
 ```python
-- write_todos: run_limit=5, exit_behavior="continue"
-- list_files_tool: run_limit=5, exit_behavior="continue"
+- write_todos: run_limit=20, exit_behavior="continue"
 ```
 ### 9. `SummarizationMiddleware` (LangChain 내장)
@@ -634,52 +628,14 @@ Python 코드를 Jupyter 셀에서 실행합니다.
 **특징**:
 - HITL 대상 (사용자 승인 필요)
-#### `list_files_tool`
-디렉토리 목록을 가져옵니다.
-**파라미터**:
-- `path`: 디렉토리 경로 (기본 ".")
-- `recursive`: 재귀 탐색 여부 (기본 False)
-**반환**:
-```python
-{
-    "tool": "list_files",
-    "parameters": {"path": ".", "recursive": False},
-    "status": "completed",
-    "files": ["file1.py", "file2.csv", ...]
-}
-```
 ---
 ### Search Tools (`search_tools.py`)
-#### `search_workspace_tool`
-워크스페이스에서 패턴을 검색합니다 (grep/ripgrep).
-**파라미터**:
-- `pattern`: 정규식 패턴
-- `file_types`: 파일 타입 필터 (예: ["py", "md"])
-- `path`: 검색 경로 (기본 ".")
-**반환**:
-```python
-{
-    "tool": "search_workspace",
-    "parameters": {"pattern": "...", "file_types": ["py"], "path": "."},
-    "status": "completed",
-    "results": [
-        {"file": "file1.py", "line_number": 10, "line": "..."},
-        ...
-    ],
-    "command": "rg ... (또는 grep ...)"
-}
-```
-**특징**:
-- ripgrep 우선 사용 (속도)
-- 없으면 grep 사용
+> **Note**: 파일 검색 기능은 `execute_command_tool`을 통해 `find`/`grep` 명령을 직접 사용합니다.
+>
+> - 파일명 검색: `execute_command_tool(command="find . -iname '*pattern*' 2>/dev/null")`
+> - 파일 내용 검색: `execute_command_tool(command="grep -rn 'pattern' --include='*.py' .")`
 #### `search_notebook_cells_tool`
 Jupyter 노트북 셀에서 패턴을 검색합니다.
@@ -961,7 +917,7 @@ return
 - **HITL**: 사용자 승인 필요
   - `jupyter_cell_tool`, `execute_command_tool`, `write_file_tool`
 - **non-HITL**: 즉시 실행
-  - `markdown_tool`, `read_file_tool`, `list_files_tool`, `search_*_tool`
+  - `markdown_tool`, `read_file_tool`, `search_*_tool`
 - **클라이언트 실행**: 서버에서 실행하지 않음
   - `check_resource_tool`: CheckResourceHandler에서 처리

agent_server/langchain/agent.py CHANGED Viewed

@@ -26,15 +26,12 @@ from agent_server.langchain.tools import (
     diagnostics_tool,
     edit_file_tool,
     execute_command_tool,
-    final_answer_tool,
     jupyter_cell_tool,
-    list_files_tool,
     markdown_tool,
     multiedit_file_tool,
     read_file_tool,
     references_tool,
     search_notebook_cells_tool,
-    search_workspace_tool,
     write_file_tool,
 )
@@ -46,13 +43,10 @@ def _get_all_tools():
     return [
         jupyter_cell_tool,
         markdown_tool,
-        final_answer_tool,
         read_file_tool,
         write_file_tool,
         edit_file_tool,
         multiedit_file_tool,
-        list_files_tool,
-        search_workspace_tool,
         search_notebook_cells_tool,
         execute_command_tool,
         check_resource_tool,
@@ -115,7 +109,6 @@ def create_simple_chat_agent(
     # Configure middleware
     middleware = []
     # Add empty response handler middleware
     handle_empty_response = create_handle_empty_response_middleware(wrap_model_call)
     middleware.append(handle_empty_response)
@@ -125,7 +118,9 @@ def create_simple_chat_agent(
     middleware.append(limit_tool_calls)
     # Add tool args normalization middleware (convert list args to strings based on schema)
-    normalize_tool_args = create_normalize_tool_args_middleware(wrap_model_call, tools=tools)
+    normalize_tool_args = create_normalize_tool_args_middleware(
+        wrap_model_call, tools=tools
+    )
     middleware.append(normalize_tool_args)
     # Add continuation prompt middleware
@@ -164,22 +159,14 @@ def create_simple_chat_agent(
     logger.info("Added ModelCallLimitMiddleware with run_limit=30")
     # ToolCallLimitMiddleware: Prevent specific tools from being called too many times
-    # Limit write_todos to prevent loops
+    # run_limit resets automatically per user message
     write_todos_limit = ToolCallLimitMiddleware(
         tool_name="write_todos",
-        run_limit=5,  # Max 5 write_todos calls per user message
-        exit_behavior="continue",  # Let agent continue with other tools
-    )
-    middleware.append(write_todos_limit)
-    # Limit list_files_tool to prevent excessive directory listing
-    list_files_limit = ToolCallLimitMiddleware(
-        tool_name="list_files_tool",
-        run_limit=5,  # Max 5 list_files calls per user message
+        run_limit=20,  # Max 20 write_todos calls per user message
         exit_behavior="continue",
     )
-    middleware.append(list_files_limit)
-    logger.info("Added ToolCallLimitMiddleware for write_todos and list_files_tool")
+    middleware.append(write_todos_limit)
+    logger.info("Added ToolCallLimitMiddleware for write_todos (20/msg)")
     # Add SummarizationMiddleware to maintain context across cycles
     summary_llm = create_summarization_llm(llm_config)
@@ -218,6 +205,30 @@ Example: "데이터를 로드하겠습니다." then call jupyter_cell_tool.
         system_prompt = system_prompt + "\n" + gemini_content_prompt
         logger.info("Added Gemini 2.5 Flash specific prompt for content inclusion")
+    # Add vLLM/gpt-oss specific prompt for Korean responses and proper todo structure
+    provider = llm_config.get("provider", "")
+    if provider == "vllm":
+        vllm_prompt = """
+## 🔴 중요: 한국어로 응답하세요
+- 모든 응답, 설명, todo 항목은 반드시 한국어로 작성하세요.
+- 코드 주석과 출력 설명도 한국어로 작성하세요.
+- 영어로 응답하지 마세요.
+## 🔴 MANDATORY: Todo List Structure
+When creating todos with write_todos, you MUST:
+1. Write all todo items in Korean
+2. ALWAYS include "작업 요약 및 다음단계 제시" as the LAST todo item
+3. Example structure:
+   - 데이터 로드 및 확인
+   - 데이터 분석 수행
+   - 작업 요약 및 다음단계 제시  ← 반드시 마지막에 포함!
+## 🔴 IMPORTANT: Never return empty responses
+If you have nothing to say, call a tool instead. NEVER return an empty response.
+"""
+        system_prompt = system_prompt + "\n" + vllm_prompt
+        logger.info("Added vLLM/gpt-oss specific prompt for Korean responses")
     logger.info("SimpleChatAgent system_prompt: %s", system_prompt)
     # Create agent with checkpointer (required for HITL)

agent_server/langchain/custom_middleware.py CHANGED Viewed

@@ -139,6 +139,19 @@ def create_handle_empty_response_middleware(wrap_model_call):
     def handle_empty_response(request, handler):
         max_retries = 2
+        # Check if all todos are completed - if so, skip processing entirely
+        todos = request.state.get("todos", [])
+        if todos:
+            pending_todos = [
+                t for t in todos if t.get("status") in ("pending", "in_progress")
+            ]
+            if not pending_todos:
+                logger.info(
+                    "All %d todos completed - skipping handle_empty_response middleware",
+                    len(todos),
+                )
+                return handler(request)
         # Check if last message is final_answer_tool result - if so, don't retry/synthesize
         # This allows agent to naturally terminate after final_answer_tool
         messages = request.messages
@@ -206,6 +219,25 @@ def create_handle_empty_response_middleware(wrap_model_call):
             # Invalid response - retry with JSON schema prompt
             if response_message and attempt < max_retries:
                 reason = "text-only" if has_content else "empty"
+                json_prompt = _build_json_prompt(request, response_message, has_content)
+                # If _build_json_prompt returns None, skip retry and synthesize write_todos
+                # This happens when: all todos completed OR current todo is summary/next_steps
+                if json_prompt is None:
+                    logger.info(
+                        "Skipping retry for %s response, synthesizing write_todos with content",
+                        reason,
+                    )
+                    # Synthesize write_todos while preserving the content (summary)
+                    synthetic_message = _create_synthetic_final_answer(
+                        request, response_message, has_content
+                    )
+                    response = _replace_ai_message_in_response(
+                        response, synthetic_message
+                    )
+                    return response
                 logger.warning(
                     "Invalid AIMessage (%s) detected (attempt %d/%d). "
                     "Retrying with JSON schema prompt...",
@@ -214,16 +246,26 @@ def create_handle_empty_response_middleware(wrap_model_call):
                     max_retries + 1,
                 )
-                json_prompt = _build_json_prompt(request, response_message, has_content)
                 request = request.override(
                     messages=request.messages + [HumanMessage(content=json_prompt)]
                 )
                 continue
-            # Max retries exhausted - synthesize final_answer
+            # Max retries exhausted - synthesize write_todos to complete
             if response_message:
+                # Check if todos are already all completed - if so, just return
+                todos = request.state.get("todos", [])
+                pending_todos = [
+                    t for t in todos if t.get("status") in ("pending", "in_progress")
+                ]
+                if todos and not pending_todos:
+                    logger.info(
+                        "Max retries exhausted but all todos completed - returning response as-is"
+                    )
+                    return response
                 logger.warning(
-                    "Max retries exhausted. Synthesizing final_answer response."
+                    "Max retries exhausted. Synthesizing write_todos to complete."
                 )
                 synthetic_message = _create_synthetic_final_answer(
                     request, response_message, has_content
@@ -274,14 +316,33 @@ def _build_json_prompt(request, response_message, has_content):
     """Build JSON-forcing prompt based on context."""
     todos = request.state.get("todos", [])
     pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
+    in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
     if has_content:
-        content_preview = response_message.content[:300]
+        # If all todos completed, don't force another tool call
+        if todos and not pending_todos:
+            return None  # Signal to skip retry
+        # If current in_progress todo is "작업 요약 및 다음단계 제시", accept text-only response
+        # The LLM is outputting the summary, we'll synthesize write_todos
+        if in_progress_todos:
+            current_todo = in_progress_todos[0].get("content", "")
+            if (
+                "작업 요약" in current_todo
+                or "다음단계" in current_todo
+                or "다음 단계" in current_todo
+            ):
+                logger.info(
+                    "Current todo is summary/next steps ('%s'), accepting text-only response",
+                    current_todo[:30],
+                )
+                return None  # Signal to skip retry - will synthesize write_todos with content
         return (
             f"{JSON_TOOL_SCHEMA}\n\n"
             f"Your previous response was text, not JSON. "
-            f"Wrap your answer in final_answer_tool:\n"
-            f'{{"tool": "final_answer_tool", "arguments": {{"answer": "{content_preview}..."}}}}'
+            f"Call the next appropriate tool to continue.\n"
+            f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
         )
     elif pending_todos:
         todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
@@ -292,39 +353,62 @@ def _build_json_prompt(request, response_message, has_content):
             f"Call jupyter_cell_tool with Python code to complete the next task.\n"
             f"Example: {example_json}"
         )
+    elif not todos:
+        # No todos yet = new task starting, LLM must create todos or call a tool
+        # This happens when LLM returns empty response at the start of a new task
+        logger.info("No todos exist yet - forcing retry to create todos or call tool")
+        return (
+            f"{JSON_TOOL_SCHEMA}\n\n"
+            f"Your response was empty. You MUST call a tool to proceed.\n"
+            f"한국어로 응답하고, write_todos로 작업 목록을 만들거나 jupyter_cell_tool/read_file_tool을 호출하세요.\n"
+            f'Example: {{"tool": "write_todos", "arguments": {{"todos": [{{"content": "데이터 분석", "status": "in_progress"}}]}}}}'
+        )
     else:
+        # Todos exist but all completed - ask for summary
+        logger.info("All todos completed but response empty - asking for summary")
         return (
             f"{JSON_TOOL_SCHEMA}\n\n"
-            f"All tasks completed. Call final_answer_tool:\n"
-            f'{{"tool": "final_answer_tool", "arguments": {{"answer": "작업이 완료되었습니다."}}}}'
+            f"All tasks completed. Call markdown_tool to provide a summary in Korean.\n"
+            f"한국어로 작업 요약을 작성하세요.\n"
+            f'Example: {{"tool": "markdown_tool", "arguments": {{"content": "작업이 완료되었습니다."}}}}'
         )
 def _create_synthetic_final_answer(request, response_message, has_content):
-    """Create synthetic final_answer message."""
-    if has_content and response_message.content:
-        summary = response_message.content
+    """Create synthetic write_todos call to mark all todos as completed.
+    This triggers automatic session termination via router's all_todos_completed check.
+    Preserves the LLM's text content (summary) if present.
+    """
+    todos = request.state.get("todos", [])
+    # Mark all todos as completed
+    completed_todos = (
+        [{**todo, "status": "completed"} for todo in todos]
+        if todos
+        else [{"content": "작업 완료", "status": "completed"}]
+    )
+    # Preserve original content (summary JSON) if present
+    original_content = ""
+    if has_content and response_message and response_message.content:
+        original_content = response_message.content
         logger.info(
-            "Using LLM's text content as final answer (length=%d)",
-            len(summary),
+            "Creating synthetic write_todos with preserved content (length=%d)",
+            len(original_content),
         )
     else:
-        todos = request.state.get("todos", [])
-        completed_todos = [
-            t.get("content", "") for t in todos if t.get("status") == "completed"
-        ]
-        summary = (
-            f"작업이 완료되었습니다. 완료된 항목: {', '.join(completed_todos[:5])}"
-            if completed_todos
-            else "작업이 완료되었습니다."
+        logger.info(
+            "Creating synthetic write_todos to mark %d todos as completed",
+            len(completed_todos),
         )
     return AIMessage(
-        content="",
+        content=original_content,  # Preserve the summary content for UI
         tool_calls=[
             {
-                "name": "final_answer_tool",
-                "args": {"answer": summary},
+                "name": "write_todos",
+                "args": {"todos": completed_todos},
                 "id": str(uuid.uuid4()),
                 "type": "tool_call",
             }
@@ -504,6 +588,30 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
                                             )
                                             args[key] = normalized_value
+                                # Ensure write_todos includes summary todo as last item
+                                if tool_name == "write_todos" and "todos" in args:
+                                    todos = args["todos"]
+                                    if isinstance(todos, list) and len(todos) > 0:
+                                        # Check if any todo contains summary keywords
+                                        summary_keywords = ["작업 요약", "다음단계", "다음 단계", "요약 및"]
+                                        has_summary = any(
+                                            any(kw in todo.get("content", "") for kw in summary_keywords)
+                                            for todo in todos
+                                            if isinstance(todo, dict)
+                                        )
+                                        if not has_summary:
+                                            # Add summary todo as last item
+                                            summary_todo = {
+                                                "content": "작업 요약 및 다음단계 제시",
+                                                "status": "pending"
+                                            }
+                                            todos.append(summary_todo)
+                                            logger.info(
+                                                "Auto-added '작업 요약 및 다음단계 제시' to write_todos (total: %d todos)",
+                                                len(todos),
+                                            )
         return response
     return normalize_tool_args
@@ -543,16 +651,24 @@ def create_inject_continuation_middleware(wrap_model_call):
                     pass
             if tool_name in NON_HITL_TOOLS:
-                logger.info(
-                    "Injecting continuation prompt after non-HITL tool: %s",
-                    tool_name,
-                )
                 todos = request.state.get("todos", [])
                 pending_todos = [
                     t for t in todos if t.get("status") in ("pending", "in_progress")
                 ]
+                # If all todos are completed, don't inject continuation - let router handle termination
+                if not pending_todos and todos:
+                    logger.info(
+                        "All todos completed, skipping continuation for tool: %s",
+                        tool_name,
+                    )
+                    return handler(request)
+                logger.info(
+                    "Injecting continuation prompt after non-HITL tool: %s",
+                    tool_name,
+                )
                 if pending_todos:
                     pending_list = ", ".join(
                         t.get("content", "")[:30] for t in pending_todos[:3]
@@ -563,9 +679,10 @@ def create_inject_continuation_middleware(wrap_model_call):
                         f"Call jupyter_cell_tool or the next appropriate tool."
                     )
                 else:
+                    # No todos yet - let agent create them
                     continuation = (
-                        f"Tool '{tool_name}' completed. All tasks done. "
-                        f"Call final_answer_tool with a summary NOW."
+                        f"Tool '{tool_name}' completed. "
+                        f"Create a todo list with write_todos if needed."
                     )
                 new_messages = list(messages) + [

agent_server/langchain/hitl_config.py CHANGED Viewed

@@ -32,17 +32,9 @@ def get_hitl_interrupt_config() -> Dict[str, Any]:
             "allowed_decisions": ["approve", "edit"],
             "description": "📄 파일 읽기 실행 중",
         },
-        "list_files_tool": {
-            "allowed_decisions": ["approve", "edit"],
-            "description": "📂 파일 목록 조회 중",
-        },
         "write_todos": False,  # Todo updates don't need approval
         # Search tools need HITL for client-side execution (auto-approved by frontend)
         # Uses 'edit' decision to pass execution_result back
-        "search_workspace_tool": {
-            "allowed_decisions": ["approve", "edit"],
-            "description": "🔍 Searching workspace files",
-        },
         "search_notebook_cells_tool": {
             "allowed_decisions": ["approve", "edit"],
             "description": "🔍 Searching notebook cells",

hdsp-jupyter-extension 2.0.8__py3-none-any.whl → 2.0.10__py3-none-any.whl

hdsp-jupyter-extension 2.0.8py3-none-any.whl → 2.0.10py3-none-any.whl