PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.6__py3-none-any.whl → 2.0.8__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.6py3-none-any.whl → 2.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

agent_server/langchain/tools/lsp_tools.py ADDED Viewed

@@ -0,0 +1,264 @@
+"""
+LSP Tools for LangChain Agent
+Provides tools for LSP (Language Server Protocol) integration:
+- diagnostics_tool: Get code diagnostics (errors, warnings)
+- references_tool: Find symbol references
+Crush 패턴 적용:
+- 진단 결과 포맷팅 (severity 기반 정렬)
+- 출력 제한 (최대 10개 + 요약)
+- Grep-then-LSP 패턴 (references)
+"""
+from typing import Any, Dict, List, Optional
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+class DiagnosticsInput(BaseModel):
+    """Input schema for diagnostics tool"""
+    path: Optional[str] = Field(
+        default=None,
+        description="File path to get diagnostics for. If not provided, returns project-wide diagnostics.",
+    )
+    severity_filter: Optional[str] = Field(
+        default=None,
+        description="Filter by severity: 'error', 'warning', 'hint', or None for all",
+    )
+    execution_result: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="LSP diagnostics result from client",
+    )
+class ReferencesInput(BaseModel):
+    """Input schema for references tool"""
+    symbol: str = Field(description="Symbol name to find references for")
+    path: Optional[str] = Field(
+        default=None,
+        description="File path where the symbol is located (optional)",
+    )
+    line: Optional[int] = Field(
+        default=None, description="Line number (1-indexed, optional)"
+    )
+    character: Optional[int] = Field(
+        default=None, description="Character position (optional)"
+    )
+    execution_result: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="LSP references result from client",
+    )
+@tool(args_schema=DiagnosticsInput)
+def diagnostics_tool(
+    path: Optional[str] = None,
+    severity_filter: Optional[str] = None,
+    execution_result: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Get LSP diagnostics (errors, warnings) for a file or the entire project.
+    Use this tool to:
+    - Check for syntax errors before running code
+    - Find type errors in Python/TypeScript files
+    - Identify unused imports or variables
+    - Verify code quality issues after editing
+    The diagnostics are provided by language servers (pylsp, etc.)
+    and are more accurate than simple linting.
+    **Best Practice**: Always check diagnostics after editing code:
+    1. edit_file_tool(...) - make changes
+    2. diagnostics_tool(path="file.py") - verify no new errors
+    Args:
+        path: Optional file path. None = project-wide diagnostics
+        severity_filter: Optional filter ('error', 'warning', 'hint')
+    Returns:
+        Formatted diagnostics with severity, location, and message
+    """
+    if execution_result is None:
+        # Client needs to execute this
+        return {
+            "tool": "diagnostics_tool",
+            "parameters": {
+                "path": path,
+                "severity_filter": severity_filter,
+            },
+            "status": "pending_execution",
+            "message": "Diagnostics request queued for LSP bridge execution",
+        }
+    # Process client result (Crush 패턴)
+    diagnostics = execution_result.get("diagnostics", [])
+    lsp_available = execution_result.get("lsp_available", False)
+    if not lsp_available:
+        return {
+            "tool": "diagnostics_tool",
+            "success": True,
+            "output": "LSP not available. Install jupyterlab-lsp for code diagnostics.\nUse search_workspace_tool for text-based code search instead.",
+            "counts": {"errors": 0, "warnings": 0, "total": 0},
+        }
+    # Severity ordering (errors first)
+    severity_order = {"error": 0, "warning": 1, "information": 2, "hint": 3}
+    # Sort diagnostics
+    sorted_diags = sorted(
+        diagnostics,
+        key=lambda d: (
+            severity_order.get(d.get("severity", "hint"), 3),
+            d.get("file", ""),
+            d.get("line", 0),
+        ),
+    )
+    # Filter by severity if specified
+    if severity_filter:
+        sorted_diags = [
+            d for d in sorted_diags if d.get("severity") == severity_filter
+        ]
+    # Format output (Crush의 formatDiagnostics 패턴)
+    formatted_lines = []
+    for d in sorted_diags[:10]:  # 최대 10개
+        severity = d.get("severity", "hint").upper()
+        line = d.get("line", 0)
+        col = d.get("character", 0)
+        source = d.get("source", "")
+        code = d.get("code", "")
+        message = d.get("message", "")
+        file = d.get("file", path or "")
+        location = f"{file}:{line}:{col}" if file else f"L{line}:{col}"
+        source_info = f"[{source}]" if source else ""
+        code_info = f"[{code}]" if code else ""
+        formatted_lines.append(
+            f"{severity} {location} {source_info}{code_info} {message}"
+        )
+    # Calculate counts
+    total = len(diagnostics)
+    errors = sum(1 for d in diagnostics if d.get("severity") == "error")
+    warnings = sum(1 for d in diagnostics if d.get("severity") == "warning")
+    # Add summary
+    summary = f"\n--- Summary: {errors} errors, {warnings} warnings, {total} total"
+    if total > 10:
+        summary += f" (showing first 10)"
+    output = (
+        "\n".join(formatted_lines) + summary
+        if formatted_lines
+        else f"No diagnostics found.{' LSP is available.' if lsp_available else ''}"
+    )
+    return {
+        "tool": "diagnostics_tool",
+        "success": True,
+        "output": output,
+        "counts": {"errors": errors, "warnings": warnings, "total": total},
+    }
+@tool(args_schema=ReferencesInput)
+def references_tool(
+    symbol: str,
+    path: Optional[str] = None,
+    line: Optional[int] = None,
+    character: Optional[int] = None,
+    execution_result: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Find all references to a symbol across the codebase.
+    Use this tool to:
+    - Check if a function/class is used before renaming/deleting
+    - Understand how a variable is used throughout the code
+    - Find all usages before refactoring
+    If LSP is not available, falls back to search_workspace_tool.
+    Args:
+        symbol: Symbol name (function, class, variable)
+        path: Optional file path where symbol is located
+        line: Optional line number (1-indexed)
+        character: Optional character position
+    Returns:
+        List of locations where the symbol is referenced
+    """
+    if execution_result is None:
+        return {
+            "tool": "references_tool",
+            "parameters": {
+                "symbol": symbol,
+                "path": path,
+                "line": line,
+                "character": character,
+            },
+            "status": "pending_execution",
+            "message": "References search queued for execution",
+        }
+    locations = execution_result.get("locations", [])
+    lsp_available = execution_result.get("lsp_available", False)
+    used_grep = execution_result.get("used_grep", False)
+    if not locations:
+        if not lsp_available:
+            return {
+                "tool": "references_tool",
+                "success": True,
+                "output": f"LSP not available. Use search_workspace_tool with pattern='{symbol}' for text-based search.",
+                "count": 0,
+            }
+        return {
+            "tool": "references_tool",
+            "success": True,
+            "output": f"No references found for '{symbol}'",
+            "count": 0,
+        }
+    # Group by file (Crush 패턴)
+    by_file: Dict[str, List] = {}
+    for loc in locations:
+        file = loc.get("file", "unknown")
+        if file not in by_file:
+            by_file[file] = []
+        by_file[file].append(loc)
+    # Format output
+    method_note = " (grep-based)" if used_grep else " (LSP)"
+    formatted_lines = [f"Found {len(locations)} references to '{symbol}'{method_note}:\n"]
+    for file, locs in sorted(by_file.items()):
+        formatted_lines.append(f"\n📄 {file}")
+        for loc in sorted(locs, key=lambda x: x.get("line", 0)):
+            line_num = loc.get("line", 0)
+            col = loc.get("character", 0)
+            preview = (loc.get("preview", "") or "")[:60]
+            formatted_lines.append(f"  L{line_num}:{col}  {preview}")
+    return {
+        "tool": "references_tool",
+        "success": True,
+        "output": "\n".join(formatted_lines),
+        "count": len(locations),
+        "by_file": {f: len(locs) for f, locs in by_file.items()},
+    }
+# Export all LSP tools
+LSP_TOOLS = [
+    diagnostics_tool,
+    references_tool,
+]

agent_server/langchain/tools/resource_tools.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+Resource Check Tools for LangChain Agent
+Provides a tool for checking resource availability before data processing.
+This tool is executed on the client (Jupyter) side to accurately measure:
+- System resources (RAM, CPU)
+- File sizes for target files
+- In-memory DataFrame shapes
+Key features:
+- On-demand resource checking (only when LLM needs it)
+- Returns actionable recommendations (in-memory vs DASK/Chunking)
+- Supports both file paths and DataFrame variable names
+"""
+import logging
+from typing import Any, Dict, List, Optional
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+logger = logging.getLogger(__name__)
+class CheckResourceInput(BaseModel):
+    """Input schema for check_resource tool"""
+    files: List[str] = Field(
+        default=[],
+        description="List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])",
+    )
+    dataframes: List[str] = Field(
+        default=[],
+        description="List of DataFrame variable names to check in memory (e.g., ['df', 'train_df'])",
+    )
+    execution_result: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Execution result payload from the client",
+    )
+def _build_file_size_command(files: List[str]) -> str:
+    """
+    Build a shell command to get file sizes.
+    Uses stat for cross-platform compatibility.
+    """
+    if not files:
+        return ""
+    # Use stat with format that works on both macOS and Linux
+    # macOS: stat -f "%z %N"
+    # Linux: stat -c "%s %n"
+    # We use a portable approach with ls -l
+    file_list = " ".join(f"'{f}'" for f in files)
+    return f"ls -l {file_list} 2>/dev/null | awk '{{print $5, $NF}}'"
+def _build_dataframe_check_code(dataframes: List[str]) -> str:
+    """
+    Build Python code to check DataFrame shapes and memory usage.
+    Returns a JSON-serializable result.
+    """
+    if not dataframes:
+        return ""
+    df_checks = []
+    for df_name in dataframes:
+        df_checks.append(f'''
+try:
+    _df = {df_name}
+    _info = {{
+        "name": "{df_name}",
+        "exists": True,
+        "rows": len(_df) if hasattr(_df, '__len__') else None,
+        "cols": len(_df.columns) if hasattr(_df, 'columns') else None,
+        "memory_mb": round(_df.memory_usage(deep=True).sum() / 1024 / 1024, 2) if hasattr(_df, 'memory_usage') else None,
+        "type": type(_df).__name__
+    }}
+except NameError:
+    _info = {{"name": "{df_name}", "exists": False}}
+_results.append(_info)
+''')
+    code = f'''
+import json
+_results = []
+{chr(10).join(df_checks)}
+print(json.dumps(_results))
+'''
+    return code.strip()
+@tool(args_schema=CheckResourceInput)
+def check_resource_tool(
+    files: List[str] = None,
+    dataframes: List[str] = None,
+    execution_result: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Check system resources, file sizes, and DataFrame shapes before data processing.
+    IMPORTANT: Call this tool BEFORE writing any data analysis or ML code to ensure
+    the generated code uses appropriate memory strategies (ex. in-memory vs DASK/Chunking).
+    Args:
+        files: List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])
+        dataframes: List of DataFrame variable names in memory (e.g., ['df', 'train_df'])
+    Returns:
+        Dict with:
+        - system: Current RAM/CPU availability (ram_available_mb, ram_total_mb, cpu_cores)
+        - files: File sizes in MB for each requested file
+        - dataframes: DataFrame shapes and memory usage for each requested variable
+    """
+    if files is None:
+        files = []
+    if dataframes is None:
+        dataframes = []
+    # Build commands for client-side execution
+    file_size_command = _build_file_size_command(files)
+    dataframe_check_code = _build_dataframe_check_code(dataframes)
+    response: Dict[str, Any] = {
+        "tool": "check_resource_tool",
+        "parameters": {
+            "files": files,
+            "dataframes": dataframes,
+        },
+        "file_size_command": file_size_command,
+        "dataframe_check_code": dataframe_check_code,
+        "status": "pending_execution",
+        "message": "Resource check queued for execution by client",
+    }
+    if execution_result is not None:
+        response["execution_result"] = execution_result
+        response["status"] = "complete"
+        response["message"] = "Resource check completed"
+        # Parse the execution result
+        if isinstance(execution_result, dict):
+            response["success"] = execution_result.get("success", False)
+            # System resources
+            response["system"] = execution_result.get("system", {})
+            # File sizes
+            response["files"] = execution_result.get("files", [])
+            # DataFrame info
+            response["dataframes"] = execution_result.get("dataframes", [])
+            if "error" in execution_result:
+                response["error"] = execution_result["error"]
+    return response
+# Export
+RESOURCE_TOOLS = [check_resource_tool]

hdsp-jupyter-extension 2.0.6__py3-none-any.whl → 2.0.8__py3-none-any.whl

hdsp-jupyter-extension 2.0.6py3-none-any.whl → 2.0.8py3-none-any.whl