PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.5py3-none-any.whl → 2.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

agent_server/langchain/tools/jupyter_tools.py CHANGED Viewed

@@ -15,28 +15,28 @@ from pydantic import BaseModel, Field
 class JupyterCellInput(BaseModel):
     """Input schema for jupyter_cell tool"""
     code: str = Field(description="Python code to execute in the notebook cell")
     description: Optional[str] = Field(
-        default=None,
-        description="Optional description of what this code does"
+        default=None, description="Optional description of what this code does"
     )
     execution_result: Optional[Dict[str, Any]] = Field(
-        default=None,
-        description="Optional execution result payload from the client"
+        default=None, description="Optional execution result payload from the client"
     )
 class MarkdownInput(BaseModel):
     """Input schema for markdown tool"""
     content: str = Field(description="Markdown content to add to the notebook")
 class FinalAnswerInput(BaseModel):
     """Input schema for final_answer tool"""
     answer: str = Field(description="Final answer/summary to present to the user")
     summary: Optional[str] = Field(
-        default=None,
-        description="Optional brief summary of what was accomplished"
+        default=None, description="Optional brief summary of what was accomplished"
     )
@@ -48,14 +48,14 @@ def jupyter_cell_tool(
 ) -> Dict[str, Any]:
     """
     Execute Python code in a new Jupyter notebook cell.
     This tool adds a new code cell at the end of the notebook and executes it.
     The execution is handled by JupyterExecutionMiddleware.
     Args:
         code: Python code to execute
         description: Optional description of the code's purpose
     Returns:
         Dict containing execution request (actual execution by middleware)
     """
@@ -76,7 +76,7 @@ def jupyter_cell_tool(
             "description": description,
         },
         "status": "pending_execution",
-        "message": "Code cell queued for execution by JupyterExecutionMiddleware"
+        "message": "Code cell queued for execution by JupyterExecutionMiddleware",
     }
     if execution_result is not None:
         response["execution_result"] = execution_result
@@ -89,13 +89,13 @@ def jupyter_cell_tool(
 def markdown_tool(content: str) -> Dict[str, Any]:
     """
     Add a markdown cell to the Jupyter notebook.
     This tool adds a new markdown cell at the end of the notebook.
     Useful for adding explanations, documentation, or section headers.
     Args:
         content: Markdown content to add
     Returns:
         Dict containing the markdown addition request
     """
@@ -105,7 +105,7 @@ def markdown_tool(content: str) -> Dict[str, Any]:
             "content": content,
         },
         "status": "completed",
-        "message": "Markdown cell added successfully. Continue with the next task."
+        "message": "Markdown cell added successfully. Continue with the next task.",
     }
@@ -113,14 +113,14 @@ def markdown_tool(content: str) -> Dict[str, Any]:
 def final_answer_tool(answer: str, summary: Optional[str] = None) -> Dict[str, Any]:
     """
     Complete the task and provide final answer to the user.
     Use this tool when you have successfully completed the user's request.
     Provide a clear summary of what was accomplished.
     Args:
         answer: Final answer/message to the user
         summary: Optional brief summary
     Returns:
         Dict marking task completion
     """
@@ -131,7 +131,7 @@ def final_answer_tool(answer: str, summary: Optional[str] = None) -> Dict[str, A
             "summary": summary,
         },
         "status": "complete",
-        "message": "Task completed successfully"
+        "message": "Task completed successfully",
     }

agent_server/langchain/tools/resource_tools.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+Resource Check Tools for LangChain Agent
+Provides a tool for checking resource availability before data processing.
+This tool is executed on the client (Jupyter) side to accurately measure:
+- System resources (RAM, CPU)
+- File sizes for target files
+- In-memory DataFrame shapes
+Key features:
+- On-demand resource checking (only when LLM needs it)
+- Returns actionable recommendations (in-memory vs DASK/Chunking)
+- Supports both file paths and DataFrame variable names
+"""
+import logging
+from typing import Any, Dict, List, Optional
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+logger = logging.getLogger(__name__)
+class CheckResourceInput(BaseModel):
+    """Input schema for check_resource tool"""
+    files: List[str] = Field(
+        default=[],
+        description="List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])",
+    )
+    dataframes: List[str] = Field(
+        default=[],
+        description="List of DataFrame variable names to check in memory (e.g., ['df', 'train_df'])",
+    )
+    execution_result: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Execution result payload from the client",
+    )
+def _build_file_size_command(files: List[str]) -> str:
+    """
+    Build a shell command to get file sizes.
+    Uses stat for cross-platform compatibility.
+    """
+    if not files:
+        return ""
+    # Use stat with format that works on both macOS and Linux
+    # macOS: stat -f "%z %N"
+    # Linux: stat -c "%s %n"
+    # We use a portable approach with ls -l
+    file_list = " ".join(f"'{f}'" for f in files)
+    return f"ls -l {file_list} 2>/dev/null | awk '{{print $5, $NF}}'"
+def _build_dataframe_check_code(dataframes: List[str]) -> str:
+    """
+    Build Python code to check DataFrame shapes and memory usage.
+    Returns a JSON-serializable result.
+    """
+    if not dataframes:
+        return ""
+    df_checks = []
+    for df_name in dataframes:
+        df_checks.append(f'''
+try:
+    _df = {df_name}
+    _info = {{
+        "name": "{df_name}",
+        "exists": True,
+        "rows": len(_df) if hasattr(_df, '__len__') else None,
+        "cols": len(_df.columns) if hasattr(_df, 'columns') else None,
+        "memory_mb": round(_df.memory_usage(deep=True).sum() / 1024 / 1024, 2) if hasattr(_df, 'memory_usage') else None,
+        "type": type(_df).__name__
+    }}
+except NameError:
+    _info = {{"name": "{df_name}", "exists": False}}
+_results.append(_info)
+''')
+    code = f'''
+import json
+_results = []
+{chr(10).join(df_checks)}
+print(json.dumps(_results))
+'''
+    return code.strip()
+@tool(args_schema=CheckResourceInput)
+def check_resource_tool(
+    files: List[str] = None,
+    dataframes: List[str] = None,
+    execution_result: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """
+    Check system resources, file sizes, and DataFrame shapes before data processing.
+    IMPORTANT: Call this tool BEFORE writing any data analysis or ML code to ensure
+    the generated code uses appropriate memory strategies (ex. in-memory vs DASK/Chunking).
+    Args:
+        files: List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])
+        dataframes: List of DataFrame variable names in memory (e.g., ['df', 'train_df'])
+    Returns:
+        Dict with:
+        - system: Current RAM/CPU availability (ram_available_mb, ram_total_mb, cpu_cores)
+        - files: File sizes in MB for each requested file
+        - dataframes: DataFrame shapes and memory usage for each requested variable
+    """
+    if files is None:
+        files = []
+    if dataframes is None:
+        dataframes = []
+    # Build commands for client-side execution
+    file_size_command = _build_file_size_command(files)
+    dataframe_check_code = _build_dataframe_check_code(dataframes)
+    response: Dict[str, Any] = {
+        "tool": "check_resource_tool",
+        "parameters": {
+            "files": files,
+            "dataframes": dataframes,
+        },
+        "file_size_command": file_size_command,
+        "dataframe_check_code": dataframe_check_code,
+        "status": "pending_execution",
+        "message": "Resource check queued for execution by client",
+    }
+    if execution_result is not None:
+        response["execution_result"] = execution_result
+        response["status"] = "complete"
+        response["message"] = "Resource check completed"
+        # Parse the execution result
+        if isinstance(execution_result, dict):
+            response["success"] = execution_result.get("success", False)
+            # System resources
+            response["system"] = execution_result.get("system", {})
+            # File sizes
+            response["files"] = execution_result.get("files", [])
+            # DataFrame info
+            response["dataframes"] = execution_result.get("dataframes", [])
+            if "error" in execution_result:
+                response["error"] = execution_result["error"]
+    return response
+# Export
+RESOURCE_TOOLS = [check_resource_tool]

hdsp-jupyter-extension 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl

hdsp-jupyter-extension 2.0.5py3-none-any.whl → 2.0.7py3-none-any.whl