hdsp-jupyter-extension 2.0.5__py3-none-any.whl → 2.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/reflection_engine.py +0 -1
- agent_server/knowledge/watchdog_service.py +1 -1
- agent_server/langchain/ARCHITECTURE.md +1193 -0
- agent_server/langchain/agent.py +74 -551
- agent_server/langchain/custom_middleware.py +636 -0
- agent_server/langchain/executors/__init__.py +2 -7
- agent_server/langchain/executors/notebook_searcher.py +46 -38
- agent_server/langchain/hitl_config.py +66 -0
- agent_server/langchain/llm_factory.py +166 -0
- agent_server/langchain/logging_utils.py +184 -0
- agent_server/langchain/prompts.py +119 -0
- agent_server/langchain/state.py +16 -6
- agent_server/langchain/tools/__init__.py +6 -0
- agent_server/langchain/tools/file_tools.py +91 -129
- agent_server/langchain/tools/jupyter_tools.py +18 -18
- agent_server/langchain/tools/resource_tools.py +161 -0
- agent_server/langchain/tools/search_tools.py +198 -216
- agent_server/langchain/tools/shell_tools.py +54 -0
- agent_server/main.py +4 -1
- agent_server/routers/health.py +1 -1
- agent_server/routers/langchain_agent.py +941 -305
- hdsp_agent_core/prompts/auto_agent_prompts.py +3 -3
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8cc4873c413ed56ff485.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js +314 -8
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +1 -0
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.a223ea20056954479ae9.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js +1547 -330
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +1 -0
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.37299706f55c6d46099d.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js +8 -8
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +1 -0
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +209 -2
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +2 -209
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js → hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +3 -212
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
- {hdsp_jupyter_extension-2.0.5.dist-info → hdsp_jupyter_extension-2.0.7.dist-info}/METADATA +2 -1
- {hdsp_jupyter_extension-2.0.5.dist-info → hdsp_jupyter_extension-2.0.7.dist-info}/RECORD +71 -68
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +1176 -58
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.8cc4873c413ed56ff485.js → frontend_styles_index_js.4770ec0fb2d173b6deb4.js} +314 -8
- jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.a223ea20056954479ae9.js → lib_index_js.29cf4312af19e86f82af.js} +1547 -330
- jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.37299706f55c6d46099d.js → remoteEntry.61343eb4cf0577e74b50.js} +8 -8
- jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +1 -0
- jupyter_ext/labextension/static/{vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js → vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js} +209 -2
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +2 -209
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
- jupyter_ext/labextension/static/{vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js → vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js} +3 -212
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
- jupyter_ext/resource_usage.py +180 -0
- jupyter_ext/tests/test_handlers.py +58 -0
- agent_server/langchain/executors/jupyter_executor.py +0 -429
- agent_server/langchain/middleware/__init__.py +0 -36
- agent_server/langchain/middleware/code_search_middleware.py +0 -278
- agent_server/langchain/middleware/error_handling_middleware.py +0 -338
- agent_server/langchain/middleware/jupyter_execution_middleware.py +0 -301
- agent_server/langchain/middleware/rag_middleware.py +0 -227
- agent_server/langchain/middleware/validation_middleware.py +0 -240
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8cc4873c413ed56ff485.js.map +0 -1
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.a223ea20056954479ae9.js.map +0 -1
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.37299706f55c6d46099d.js.map +0 -1
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -1
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -1
- hdsp_jupyter_extension-2.0.5.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.8cc4873c413ed56ff485.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.a223ea20056954479ae9.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.37299706f55c6d46099d.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -1
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.5.data → hdsp_jupyter_extension-2.0.7.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.5.dist-info → hdsp_jupyter_extension-2.0.7.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.5.dist-info → hdsp_jupyter_extension-2.0.7.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,28 +15,28 @@ from pydantic import BaseModel, Field
|
|
|
15
15
|
|
|
16
16
|
class JupyterCellInput(BaseModel):
|
|
17
17
|
"""Input schema for jupyter_cell tool"""
|
|
18
|
+
|
|
18
19
|
code: str = Field(description="Python code to execute in the notebook cell")
|
|
19
20
|
description: Optional[str] = Field(
|
|
20
|
-
default=None,
|
|
21
|
-
description="Optional description of what this code does"
|
|
21
|
+
default=None, description="Optional description of what this code does"
|
|
22
22
|
)
|
|
23
23
|
execution_result: Optional[Dict[str, Any]] = Field(
|
|
24
|
-
default=None,
|
|
25
|
-
description="Optional execution result payload from the client"
|
|
24
|
+
default=None, description="Optional execution result payload from the client"
|
|
26
25
|
)
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
class MarkdownInput(BaseModel):
|
|
30
29
|
"""Input schema for markdown tool"""
|
|
30
|
+
|
|
31
31
|
content: str = Field(description="Markdown content to add to the notebook")
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class FinalAnswerInput(BaseModel):
|
|
35
35
|
"""Input schema for final_answer tool"""
|
|
36
|
+
|
|
36
37
|
answer: str = Field(description="Final answer/summary to present to the user")
|
|
37
38
|
summary: Optional[str] = Field(
|
|
38
|
-
default=None,
|
|
39
|
-
description="Optional brief summary of what was accomplished"
|
|
39
|
+
default=None, description="Optional brief summary of what was accomplished"
|
|
40
40
|
)
|
|
41
41
|
|
|
42
42
|
|
|
@@ -48,14 +48,14 @@ def jupyter_cell_tool(
|
|
|
48
48
|
) -> Dict[str, Any]:
|
|
49
49
|
"""
|
|
50
50
|
Execute Python code in a new Jupyter notebook cell.
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
This tool adds a new code cell at the end of the notebook and executes it.
|
|
53
53
|
The execution is handled by JupyterExecutionMiddleware.
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
Args:
|
|
56
56
|
code: Python code to execute
|
|
57
57
|
description: Optional description of the code's purpose
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
Returns:
|
|
60
60
|
Dict containing execution request (actual execution by middleware)
|
|
61
61
|
"""
|
|
@@ -76,7 +76,7 @@ def jupyter_cell_tool(
|
|
|
76
76
|
"description": description,
|
|
77
77
|
},
|
|
78
78
|
"status": "pending_execution",
|
|
79
|
-
"message": "Code cell queued for execution by JupyterExecutionMiddleware"
|
|
79
|
+
"message": "Code cell queued for execution by JupyterExecutionMiddleware",
|
|
80
80
|
}
|
|
81
81
|
if execution_result is not None:
|
|
82
82
|
response["execution_result"] = execution_result
|
|
@@ -89,13 +89,13 @@ def jupyter_cell_tool(
|
|
|
89
89
|
def markdown_tool(content: str) -> Dict[str, Any]:
|
|
90
90
|
"""
|
|
91
91
|
Add a markdown cell to the Jupyter notebook.
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
This tool adds a new markdown cell at the end of the notebook.
|
|
94
94
|
Useful for adding explanations, documentation, or section headers.
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
Args:
|
|
97
97
|
content: Markdown content to add
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
Returns:
|
|
100
100
|
Dict containing the markdown addition request
|
|
101
101
|
"""
|
|
@@ -105,7 +105,7 @@ def markdown_tool(content: str) -> Dict[str, Any]:
|
|
|
105
105
|
"content": content,
|
|
106
106
|
},
|
|
107
107
|
"status": "completed",
|
|
108
|
-
"message": "Markdown cell added successfully. Continue with the next task."
|
|
108
|
+
"message": "Markdown cell added successfully. Continue with the next task.",
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
|
|
@@ -113,14 +113,14 @@ def markdown_tool(content: str) -> Dict[str, Any]:
|
|
|
113
113
|
def final_answer_tool(answer: str, summary: Optional[str] = None) -> Dict[str, Any]:
|
|
114
114
|
"""
|
|
115
115
|
Complete the task and provide final answer to the user.
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
Use this tool when you have successfully completed the user's request.
|
|
118
118
|
Provide a clear summary of what was accomplished.
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
Args:
|
|
121
121
|
answer: Final answer/message to the user
|
|
122
122
|
summary: Optional brief summary
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
Returns:
|
|
125
125
|
Dict marking task completion
|
|
126
126
|
"""
|
|
@@ -131,7 +131,7 @@ def final_answer_tool(answer: str, summary: Optional[str] = None) -> Dict[str, A
|
|
|
131
131
|
"summary": summary,
|
|
132
132
|
},
|
|
133
133
|
"status": "complete",
|
|
134
|
-
"message": "Task completed successfully"
|
|
134
|
+
"message": "Task completed successfully",
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Resource Check Tools for LangChain Agent
|
|
3
|
+
|
|
4
|
+
Provides a tool for checking resource availability before data processing.
|
|
5
|
+
This tool is executed on the client (Jupyter) side to accurately measure:
|
|
6
|
+
- System resources (RAM, CPU)
|
|
7
|
+
- File sizes for target files
|
|
8
|
+
- In-memory DataFrame shapes
|
|
9
|
+
|
|
10
|
+
Key features:
|
|
11
|
+
- On-demand resource checking (only when LLM needs it)
|
|
12
|
+
- Returns actionable recommendations (in-memory vs DASK/Chunking)
|
|
13
|
+
- Supports both file paths and DataFrame variable names
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
from langchain_core.tools import tool
|
|
20
|
+
from pydantic import BaseModel, Field
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CheckResourceInput(BaseModel):
|
|
26
|
+
"""Input schema for check_resource tool"""
|
|
27
|
+
|
|
28
|
+
files: List[str] = Field(
|
|
29
|
+
default=[],
|
|
30
|
+
description="List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])",
|
|
31
|
+
)
|
|
32
|
+
dataframes: List[str] = Field(
|
|
33
|
+
default=[],
|
|
34
|
+
description="List of DataFrame variable names to check in memory (e.g., ['df', 'train_df'])",
|
|
35
|
+
)
|
|
36
|
+
execution_result: Optional[Dict[str, Any]] = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="Execution result payload from the client",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _build_file_size_command(files: List[str]) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Build a shell command to get file sizes.
|
|
45
|
+
Uses stat for cross-platform compatibility.
|
|
46
|
+
"""
|
|
47
|
+
if not files:
|
|
48
|
+
return ""
|
|
49
|
+
|
|
50
|
+
# Use stat with format that works on both macOS and Linux
|
|
51
|
+
# macOS: stat -f "%z %N"
|
|
52
|
+
# Linux: stat -c "%s %n"
|
|
53
|
+
# We use a portable approach with ls -l
|
|
54
|
+
file_list = " ".join(f"'{f}'" for f in files)
|
|
55
|
+
return f"ls -l {file_list} 2>/dev/null | awk '{{print $5, $NF}}'"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _build_dataframe_check_code(dataframes: List[str]) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Build Python code to check DataFrame shapes and memory usage.
|
|
61
|
+
Returns a JSON-serializable result.
|
|
62
|
+
"""
|
|
63
|
+
if not dataframes:
|
|
64
|
+
return ""
|
|
65
|
+
|
|
66
|
+
df_checks = []
|
|
67
|
+
for df_name in dataframes:
|
|
68
|
+
df_checks.append(f'''
|
|
69
|
+
try:
|
|
70
|
+
_df = {df_name}
|
|
71
|
+
_info = {{
|
|
72
|
+
"name": "{df_name}",
|
|
73
|
+
"exists": True,
|
|
74
|
+
"rows": len(_df) if hasattr(_df, '__len__') else None,
|
|
75
|
+
"cols": len(_df.columns) if hasattr(_df, 'columns') else None,
|
|
76
|
+
"memory_mb": round(_df.memory_usage(deep=True).sum() / 1024 / 1024, 2) if hasattr(_df, 'memory_usage') else None,
|
|
77
|
+
"type": type(_df).__name__
|
|
78
|
+
}}
|
|
79
|
+
except NameError:
|
|
80
|
+
_info = {{"name": "{df_name}", "exists": False}}
|
|
81
|
+
_results.append(_info)
|
|
82
|
+
''')
|
|
83
|
+
|
|
84
|
+
code = f'''
|
|
85
|
+
import json
|
|
86
|
+
_results = []
|
|
87
|
+
{chr(10).join(df_checks)}
|
|
88
|
+
print(json.dumps(_results))
|
|
89
|
+
'''
|
|
90
|
+
return code.strip()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@tool(args_schema=CheckResourceInput)
|
|
94
|
+
def check_resource_tool(
|
|
95
|
+
files: List[str] = None,
|
|
96
|
+
dataframes: List[str] = None,
|
|
97
|
+
execution_result: Optional[Dict[str, Any]] = None,
|
|
98
|
+
) -> Dict[str, Any]:
|
|
99
|
+
"""
|
|
100
|
+
Check system resources, file sizes, and DataFrame shapes before data processing.
|
|
101
|
+
|
|
102
|
+
IMPORTANT: Call this tool BEFORE writing any data analysis or ML code to ensure
|
|
103
|
+
the generated code uses appropriate memory strategies (ex. in-memory vs DASK/Chunking).
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
files: List of file paths to check sizes for (e.g., ['data.csv', 'train.parquet'])
|
|
107
|
+
dataframes: List of DataFrame variable names in memory (e.g., ['df', 'train_df'])
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Dict with:
|
|
111
|
+
- system: Current RAM/CPU availability (ram_available_mb, ram_total_mb, cpu_cores)
|
|
112
|
+
- files: File sizes in MB for each requested file
|
|
113
|
+
- dataframes: DataFrame shapes and memory usage for each requested variable
|
|
114
|
+
"""
|
|
115
|
+
if files is None:
|
|
116
|
+
files = []
|
|
117
|
+
if dataframes is None:
|
|
118
|
+
dataframes = []
|
|
119
|
+
|
|
120
|
+
# Build commands for client-side execution
|
|
121
|
+
file_size_command = _build_file_size_command(files)
|
|
122
|
+
dataframe_check_code = _build_dataframe_check_code(dataframes)
|
|
123
|
+
|
|
124
|
+
response: Dict[str, Any] = {
|
|
125
|
+
"tool": "check_resource_tool",
|
|
126
|
+
"parameters": {
|
|
127
|
+
"files": files,
|
|
128
|
+
"dataframes": dataframes,
|
|
129
|
+
},
|
|
130
|
+
"file_size_command": file_size_command,
|
|
131
|
+
"dataframe_check_code": dataframe_check_code,
|
|
132
|
+
"status": "pending_execution",
|
|
133
|
+
"message": "Resource check queued for execution by client",
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if execution_result is not None:
|
|
137
|
+
response["execution_result"] = execution_result
|
|
138
|
+
response["status"] = "complete"
|
|
139
|
+
response["message"] = "Resource check completed"
|
|
140
|
+
|
|
141
|
+
# Parse the execution result
|
|
142
|
+
if isinstance(execution_result, dict):
|
|
143
|
+
response["success"] = execution_result.get("success", False)
|
|
144
|
+
|
|
145
|
+
# System resources
|
|
146
|
+
response["system"] = execution_result.get("system", {})
|
|
147
|
+
|
|
148
|
+
# File sizes
|
|
149
|
+
response["files"] = execution_result.get("files", [])
|
|
150
|
+
|
|
151
|
+
# DataFrame info
|
|
152
|
+
response["dataframes"] = execution_result.get("dataframes", [])
|
|
153
|
+
|
|
154
|
+
if "error" in execution_result:
|
|
155
|
+
response["error"] = execution_result["error"]
|
|
156
|
+
|
|
157
|
+
return response
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# Export
|
|
161
|
+
RESOURCE_TOOLS = [check_resource_tool]
|