PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.10__py3-none-any.whl → 2.0.13__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.10py3-none-any.whl → 2.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

agent_server/langchain/middleware/description_injector.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""
+Description Injector Middleware
+Automatically extracts description from python_developer's JSON response
+and injects it into jupyter_cell_tool calls when description is missing.
+This middleware only activates when:
+1. python_developer returns a response (to extract description)
+2. jupyter_cell_tool is called without description (to inject)
+"""
+import logging
+import re
+from typing import Any, Dict, Optional
+logger = logging.getLogger(__name__)
+# Thread-local storage for pending description
+_pending_description: Optional[str] = None
+def get_pending_description() -> Optional[str]:
+    """Get the pending description from last python_developer call."""
+    global _pending_description
+    logger.info(f"[DescriptionInjector] GET pending description: {_pending_description[:50] if _pending_description else 'None'}...")
+    return _pending_description
+def set_pending_description(description: Optional[str]) -> None:
+    """Set the pending description."""
+    global _pending_description
+    _pending_description = description
+    if description:
+        logger.info(f"[DescriptionInjector] SET pending description: {description[:80]}...")
+def clear_pending_description() -> None:
+    """Clear the pending description after use."""
+    global _pending_description
+    _pending_description = None
+    logger.debug("[DescriptionInjector] Cleared pending description")
+def extract_description_from_python_developer(response: str) -> Optional[str]:
+    """
+    Extract description from python_developer's response.
+    Expected format:
+    [DESCRIPTION]
+    설명 내용 (2~3줄)
+    [CODE]
+    ```python
+    코드
+    ```
+    Handles various edge cases and formatting variations.
+    """
+    if not response:
+        return None
+    # Pattern 1: [DESCRIPTION] ... [CODE] (primary format)
+    # Handles newlines and various whitespace
+    patterns = [
+        # Standard format: [DESCRIPTION]\n...\n[CODE]
+        r'\[DESCRIPTION\]\s*\n(.*?)(?=\n\s*\[CODE\])',
+        # With extra whitespace: [DESCRIPTION]  \n...\n  [CODE]
+        r'\[DESCRIPTION\]\s*(.*?)(?=\s*\[CODE\])',
+        # Until code block: [DESCRIPTION]\n...\n```
+        r'\[DESCRIPTION\]\s*\n(.*?)(?=\n\s*```)',
+        # Until end of text if no [CODE] marker
+        r'\[DESCRIPTION\]\s*\n(.+?)(?=\n\n|\Z)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, response, re.DOTALL | re.IGNORECASE)
+        if match:
+            description = match.group(1).strip()
+            # Clean up: remove leading/trailing empty lines
+            description = '\n'.join(line for line in description.split('\n') if line.strip() or description.count('\n') < 5)
+            description = description.strip()
+            if description and len(description) > 5:  # Minimum meaningful description
+                logger.info(f"[DescriptionInjector] Extracted description: {description[:80]}...")
+                return description
+    # Fallback: Try to find description-like content at the start
+    # (for cases where format markers are missing)
+    lines = response.strip().split('\n')
+    if lines:
+        # Check if first few lines look like a description (no code markers)
+        potential_desc = []
+        for line in lines[:5]:
+            line = line.strip()
+            if line.startswith('```') or line.startswith('import ') or line.startswith('def ') or line.startswith('class '):
+                break
+            if line and not line.startswith('['):
+                potential_desc.append(line)
+        if potential_desc:
+            description = '\n'.join(potential_desc)
+            if 10 < len(description) < 500:  # Reasonable description length
+                logger.info(f"[DescriptionInjector] Extracted description (fallback): {description[:80]}...")
+                return description
+    logger.debug("[DescriptionInjector] No description found in response")
+    return None
+def process_task_tool_response(agent_name: str, response: str) -> str:
+    """
+    Process task_tool response to extract description if from python_developer.
+    Only activates when agent_name is 'python_developer'.
+    """
+    if agent_name != "python_developer":
+        return response
+    description = extract_description_from_python_developer(response)
+    if description:
+        set_pending_description(description)
+    return response
+def inject_description_if_needed(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Inject pending description into jupyter_cell_tool if description is missing.
+    Only activates when:
+    - tool_name is 'jupyter_cell_tool'
+    - description is None or empty
+    - there's a pending description available
+    """
+    if tool_name != "jupyter_cell_tool":
+        return args
+    # Check if description is already provided
+    current_desc = args.get("description")
+    if current_desc:
+        # Description already provided, no need to inject
+        return args
+    # Try to inject pending description
+    pending = get_pending_description()
+    if pending:
+        logger.info(f"[DescriptionInjector] Injecting description into jupyter_cell_tool: {pending[:50]}...")
+        args = dict(args)  # Make a copy
+        args["description"] = pending
+        clear_pending_description()  # Use once
+    return args

agent_server/langchain/middleware/skill_middleware.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""
+SkillMiddleware
+Middleware that provides progressive skill loading for code generation agents.
+Based on LangChain Skills pattern for token-efficient context injection.
+Key features:
+- Injects skill metadata into system prompt (~250 tokens)
+- Provides load_skill tool for on-demand full content loading
+- Reads skill definitions from markdown files
+"""
+import logging
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import yaml
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+logger = logging.getLogger(__name__)
+# Skills directory path
+SKILLS_DIR = Path(__file__).parent.parent / "skills"
+def _parse_skill_metadata(file_path: Path) -> Optional[Dict[str, str]]:
+    """
+    Parse skill metadata from markdown file's YAML frontmatter.
+    Expected format:
+    ---
+    name: skill-name
+    description: Skill description
+    ---
+    Returns:
+        Dict with 'name', 'description', 'path' or None if invalid
+    """
+    try:
+        content = file_path.read_text(encoding="utf-8")
+        # Check for YAML frontmatter
+        if not content.startswith("---"):
+            logger.warning(f"Skill file missing YAML frontmatter: {file_path}")
+            return None
+        # Extract frontmatter
+        parts = content.split("---", 2)
+        if len(parts) < 3:
+            logger.warning(f"Invalid YAML frontmatter format: {file_path}")
+            return None
+        frontmatter = yaml.safe_load(parts[1])
+        if not frontmatter or "name" not in frontmatter:
+            logger.warning(f"Missing 'name' in frontmatter: {file_path}")
+            return None
+        return {
+            "name": frontmatter.get("name"),
+            "description": frontmatter.get("description", "No description"),
+            "path": str(file_path),
+        }
+    except Exception as e:
+        logger.error(f"Failed to parse skill file {file_path}: {e}")
+        return None
+def _load_all_skills() -> Dict[str, Dict[str, str]]:
+    """
+    Load all skill metadata from the skills directory.
+    Returns:
+        Dict mapping skill name to metadata (name, description, path)
+    """
+    skills = {}
+    if not SKILLS_DIR.exists():
+        logger.warning(f"Skills directory not found: {SKILLS_DIR}")
+        return skills
+    for file_path in SKILLS_DIR.glob("*.md"):
+        metadata = _parse_skill_metadata(file_path)
+        if metadata:
+            skills[metadata["name"]] = metadata
+            logger.debug(f"Loaded skill: {metadata['name']}")
+    logger.info(f"Loaded {len(skills)} skills from {SKILLS_DIR}")
+    return skills
+def _get_skill_content(skill_name: str, skills: Dict[str, Dict[str, str]]) -> str:
+    """
+    Load full content of a skill file (excluding YAML frontmatter).
+    Args:
+        skill_name: Name of the skill to load
+        skills: Dictionary of skill metadata
+    Returns:
+        Full skill content as string, or error message
+    """
+    if skill_name not in skills:
+        available = ", ".join(skills.keys())
+        return f"Error: Unknown skill '{skill_name}'. Available skills: {available}"
+    try:
+        file_path = Path(skills[skill_name]["path"])
+        content = file_path.read_text(encoding="utf-8")
+        # Remove YAML frontmatter
+        if content.startswith("---"):
+            parts = content.split("---", 2)
+            if len(parts) >= 3:
+                content = parts[2].strip()
+        return content
+    except Exception as e:
+        return f"Error loading skill '{skill_name}': {str(e)}"
+def _build_skills_prompt_section(skills: Dict[str, Dict[str, str]]) -> str:
+    """
+    Build the Available Skills section for system prompt injection.
+    Args:
+        skills: Dictionary of skill metadata
+    Returns:
+        Formatted string for system prompt (~250 tokens)
+    """
+    if not skills:
+        return ""
+    lines = ["## Available Skills"]
+    lines.append("")
+    lines.append("### Context 리소스 정보")
+    lines.append("Main Agent가 task 호출 시 제공하는 Context에는 다음 정보가 포함됩니다:")
+    lines.append("- **파일 크기**: `file_size: 200MB`, `total_size: 1.5GB` 등")
+    lines.append("- **시스템 메모리**: `available_memory: 16GB`, `gpu_memory: 8GB` 등")
+    lines.append("- **데이터 행 수**: `row_count: 10000000` 등")
+    lines.append("")
+    lines.append("### 스킬 로드 규칙 (MANDATORY)")
+    lines.append("**아래 조건에 해당하면 반드시 load_skill_tool()을 먼저 호출하세요:**")
+    lines.append("1. 파일 크기 >= 100MB → `load_skill_tool('data_loading')`")
+    lines.append("2. DataFrame 행 수 >= 100만 → `load_skill_tool('data_analysis')`")
+    lines.append("3. GPU/CUDA 사용 또는 모델 훈련 → `load_skill_tool('model_training')`")
+    lines.append("4. 모델 추론 최적화 필요 → `load_skill_tool('inference')`")
+    lines.append("5. PySpark/분산처리 → `load_skill_tool('pyspark')`")
+    lines.append("")
+    for name, metadata in sorted(skills.items()):
+        lines.append(f"- **{name}**: {metadata['description']}")
+    lines.append("")
+    lines.append("### 스킬 미사용 시점")
+    lines.append("- 단순 print(), 기본 연산")
+    lines.append("- 소형 파일 (< 100MB) 기본 처리")
+    lines.append("- DataFrame 행 수 < 100만")
+    lines.append("- Context에 리소스 정보가 명시되지 않은 경우")
+    return "\n".join(lines)
+def create_load_skill_tool(skills: Dict[str, Dict[str, str]]):
+    """
+    Create the load_skill_tool for on-demand skill loading.
+    Args:
+        skills: Dictionary of skill metadata
+    Returns:
+        LangChain tool for loading skills
+    """
+    available_skills = ", ".join(sorted(skills.keys()))
+    class LoadSkillInput(BaseModel):
+        """Input schema for load_skill_tool"""
+        skill_name: str = Field(
+            description=f"Name of the skill to load. Available: {available_skills}"
+        )
+    @tool(args_schema=LoadSkillInput)
+    def load_skill_tool(skill_name: str) -> str:
+        """
+        Load detailed optimization guide for a specific skill.
+        Use this tool when you need specific optimization patterns for:
+        - data_loading: Large file handling (chunking, dtype, Dask)
+        - data_analysis: DataFrame operations (vectorization, groupby)
+        - model_training: GPU/memory optimization (fp16, gradient checkpointing)
+        - inference: Model serving optimization (batching, quantization, TensorRT)
+        - pyspark: Distributed processing (partitioning, caching, broadcast join)
+        Args:
+            skill_name: Name of the skill to load
+        Returns:
+            Full optimization guide with code patterns and best practices
+        """
+        logger.info(f"Loading skill: {skill_name}")
+        # Emit subagent tool call event for UI
+        try:
+            from agent_server.langchain.middleware.subagent_events import emit_subagent_tool_call
+            # Pass explicit subagent name as fallback since thread-local context may not be available
+            emit_subagent_tool_call(
+                "load_skill_tool",
+                {"skill_name": skill_name},
+                subagent_name="python_developer"  # load_skill is only used by python_developer
+            )
+            logger.info(f"Emitted load_skill_tool event for skill: {skill_name}")
+        except Exception as e:
+            logger.warning(f"Failed to emit load_skill_tool event: {e}")
+        content = _get_skill_content(skill_name, skills)
+        # Log content length for monitoring
+        if not content.startswith("Error"):
+            logger.info(f"Skill '{skill_name}' loaded: {len(content)} chars")
+        return content
+    return load_skill_tool
+class SkillMiddleware:
+    """
+    Middleware that adds skill loading capability to code generation agents.
+    This middleware:
+    1. Injects Available Skills section into system prompt (~250 tokens)
+    2. Adds the `load_skill` tool for on-demand content loading
+    3. Enables token-efficient progressive disclosure of optimization guides
+    Usage:
+        skill_middleware = SkillMiddleware()
+        # Get prompt section to append to system prompt
+        skills_section = skill_middleware.get_prompt_section()
+        # Get load_skill tool to add to agent's tools
+        tools = skill_middleware.get_tools()
+    """
+    def __init__(self):
+        """Initialize SkillMiddleware by loading all skill metadata."""
+        self.skills = _load_all_skills()
+        self.prompt_section = _build_skills_prompt_section(self.skills)
+        self.load_skill_tool = create_load_skill_tool(self.skills)
+        logger.info(
+            f"SkillMiddleware initialized with {len(self.skills)} skills: "
+            f"{list(self.skills.keys())}"
+        )
+    def get_prompt_section(self) -> str:
+        """
+        Get the Available Skills section for system prompt.
+        Returns:
+            Formatted string to append to system prompt
+        """
+        return self.prompt_section
+    def get_tools(self) -> List[Any]:
+        """
+        Get the tools provided by this middleware.
+        Returns:
+            List containing the load_skill tool
+        """
+        return [self.load_skill_tool]
+    def __call__(self, tools: List[Any]) -> List[Any]:
+        """
+        Add load_skill tool to the agent's toolset.
+        This is called during agent creation to augment the tool list.
+        """
+        return tools + [self.load_skill_tool]
+# Singleton instance for reuse
+_skill_middleware_instance: Optional[SkillMiddleware] = None
+def get_skill_middleware() -> SkillMiddleware:
+    """
+    Get the singleton SkillMiddleware instance.
+    Returns:
+        SkillMiddleware instance
+    """
+    global _skill_middleware_instance
+    if _skill_middleware_instance is None:
+        _skill_middleware_instance = SkillMiddleware()
+    return _skill_middleware_instance

agent_server/langchain/middleware/subagent_events.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""
+Subagent Event Queue
+Provides a thread-safe queue for subagent events that can be
+consumed by the main streaming loop for UI display.
+Events include:
+- subagent_start: When a subagent is invoked
+- subagent_tool_call: When a subagent calls a tool
+- subagent_complete: When a subagent finishes
+"""
+import logging
+import threading
+from dataclasses import dataclass, field
+from datetime import datetime
+from queue import Empty, Queue
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger(__name__)
+# Thread-local storage for current subagent context
+_subagent_context = threading.local()
+# Global event queue (thread-safe)
+_event_queue: Queue = Queue()
+@dataclass
+class SubagentEvent:
+    """Represents a subagent event for UI display."""
+    event_type: str  # subagent_start, subagent_tool_call, subagent_complete
+    subagent_name: str
+    tool_name: Optional[str] = None
+    tool_args: Optional[Dict[str, Any]] = None
+    description: Optional[str] = None
+    result_preview: Optional[str] = None
+    timestamp: datetime = field(default_factory=datetime.now)
+    def to_debug_message(self) -> str:
+        """Convert to debug message for UI display (legacy, for logging)."""
+        if self.event_type == "subagent_start":
+            desc_preview = self.description[:80] + "..." if self.description and len(self.description) > 80 else self.description
+            return f"Subagent - {self.subagent_name} - 작업 시작: {desc_preview}"
+        elif self.event_type == "subagent_tool_call":
+            return f"Subagent - {self.subagent_name} - Tool 실행: {self.tool_name}"
+        elif self.event_type == "subagent_complete":
+            return f"Subagent - {self.subagent_name} - 완료"
+        else:
+            return f"Subagent - {self.subagent_name} - {self.event_type}"
+    def to_status_dict(self) -> Dict[str, Any]:
+        """Convert to status dict with icon for SSE streaming."""
+        if self.event_type == "subagent_start":
+            desc_preview = self.description[:80] + "..." if self.description and len(self.description) > 80 else self.description
+            return {
+                "status": f"Subagent - {self.subagent_name} - 작업 시작: {desc_preview}",
+                "icon": "subagentStart"
+            }
+        elif self.event_type == "subagent_tool_call":
+            return {
+                "status": f"Subagent - {self.subagent_name} - Tool 실행: {self.tool_name}",
+                "icon": "tool"
+            }
+        elif self.event_type == "subagent_complete":
+            return {
+                "status": f"Subagent - {self.subagent_name} - 완료",
+                "icon": "subagentComplete"
+            }
+        else:
+            return {
+                "status": f"Subagent - {self.subagent_name} - {self.event_type}",
+                "icon": "info"
+            }
+def set_current_subagent(name: str) -> None:
+    """Set the current subagent context (for tool call tracking)."""
+    _subagent_context.name = name
+    logger.debug(f"Subagent context set: {name}")
+def get_current_subagent() -> Optional[str]:
+    """Get the current subagent name, if any."""
+    return getattr(_subagent_context, "name", None)
+def clear_current_subagent() -> None:
+    """Clear the current subagent context."""
+    _subagent_context.name = None
+def emit_subagent_event(event: SubagentEvent) -> None:
+    """Emit a subagent event to the queue."""
+    _event_queue.put(event)
+    logger.info(f"Subagent event: {event.to_debug_message()}")
+def emit_subagent_start(name: str, description: str) -> None:
+    """Emit a subagent start event."""
+    emit_subagent_event(SubagentEvent(
+        event_type="subagent_start",
+        subagent_name=name,
+        description=description,
+    ))
+def emit_subagent_tool_call(
+    tool_name: str,
+    tool_args: Optional[Dict] = None,
+    subagent_name: Optional[str] = None,
+) -> None:
+    """
+    Emit a subagent tool call event.
+    Args:
+        tool_name: Name of the tool being called
+        tool_args: Optional arguments passed to the tool
+        subagent_name: Explicit subagent name (fallback to thread-local context)
+    """
+    # Use explicit subagent_name or fall back to thread-local context
+    name = subagent_name or get_current_subagent()
+    if name:
+        emit_subagent_event(SubagentEvent(
+            event_type="subagent_tool_call",
+            subagent_name=name,
+            tool_name=tool_name,
+            tool_args=tool_args,
+        ))
+    else:
+        # Log warning but still emit with generic name for visibility
+        logger.warning(f"No subagent context for tool call: {tool_name}")
+        emit_subagent_event(SubagentEvent(
+            event_type="subagent_tool_call",
+            subagent_name="subagent",  # Generic fallback
+            tool_name=tool_name,
+            tool_args=tool_args,
+        ))
+def emit_subagent_complete(name: str, result_preview: Optional[str] = None) -> None:
+    """Emit a subagent complete event."""
+    emit_subagent_event(SubagentEvent(
+        event_type="subagent_complete",
+        subagent_name=name,
+        result_preview=result_preview,
+    ))
+def drain_subagent_events() -> List[SubagentEvent]:
+    """
+    Drain all pending subagent events from the queue.
+    Returns:
+        List of SubagentEvent objects
+    """
+    events = []
+    while True:
+        try:
+            event = _event_queue.get_nowait()
+            events.append(event)
+        except Empty:
+            break
+    return events
+def get_pending_event_count() -> int:
+    """Get the number of pending events in the queue."""
+    return _event_queue.qsize()

hdsp-jupyter-extension 2.0.10__py3-none-any.whl → 2.0.13__py3-none-any.whl

hdsp-jupyter-extension 2.0.10py3-none-any.whl → 2.0.13py3-none-any.whl