ds-agent-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ds-agent.js +451 -0
- package/ds_agent/__init__.py +8 -0
- package/package.json +28 -0
- package/requirements.txt +126 -0
- package/setup.py +35 -0
- package/src/__init__.py +7 -0
- package/src/_compress_tool_result.py +118 -0
- package/src/api/__init__.py +4 -0
- package/src/api/app.py +1626 -0
- package/src/cache/__init__.py +5 -0
- package/src/cache/cache_manager.py +561 -0
- package/src/cli.py +2886 -0
- package/src/dynamic_prompts.py +281 -0
- package/src/orchestrator.py +4799 -0
- package/src/progress_manager.py +139 -0
- package/src/reasoning/__init__.py +332 -0
- package/src/reasoning/business_summary.py +431 -0
- package/src/reasoning/data_understanding.py +356 -0
- package/src/reasoning/model_explanation.py +383 -0
- package/src/reasoning/reasoning_trace.py +239 -0
- package/src/registry/__init__.py +3 -0
- package/src/registry/tools_registry.py +3 -0
- package/src/session_memory.py +448 -0
- package/src/session_store.py +370 -0
- package/src/storage/__init__.py +19 -0
- package/src/storage/artifact_store.py +620 -0
- package/src/storage/helpers.py +116 -0
- package/src/storage/huggingface_storage.py +694 -0
- package/src/storage/r2_storage.py +0 -0
- package/src/storage/user_files_service.py +288 -0
- package/src/tools/__init__.py +335 -0
- package/src/tools/advanced_analysis.py +823 -0
- package/src/tools/advanced_feature_engineering.py +708 -0
- package/src/tools/advanced_insights.py +578 -0
- package/src/tools/advanced_preprocessing.py +549 -0
- package/src/tools/advanced_training.py +906 -0
- package/src/tools/agent_tool_mapping.py +326 -0
- package/src/tools/auto_pipeline.py +420 -0
- package/src/tools/autogluon_training.py +1480 -0
- package/src/tools/business_intelligence.py +860 -0
- package/src/tools/cloud_data_sources.py +581 -0
- package/src/tools/code_interpreter.py +390 -0
- package/src/tools/computer_vision.py +614 -0
- package/src/tools/data_cleaning.py +614 -0
- package/src/tools/data_profiling.py +593 -0
- package/src/tools/data_type_conversion.py +268 -0
- package/src/tools/data_wrangling.py +433 -0
- package/src/tools/eda_reports.py +284 -0
- package/src/tools/enhanced_feature_engineering.py +241 -0
- package/src/tools/feature_engineering.py +302 -0
- package/src/tools/matplotlib_visualizations.py +1327 -0
- package/src/tools/model_training.py +520 -0
- package/src/tools/nlp_text_analytics.py +761 -0
- package/src/tools/plotly_visualizations.py +497 -0
- package/src/tools/production_mlops.py +852 -0
- package/src/tools/time_series.py +507 -0
- package/src/tools/tools_registry.py +2133 -0
- package/src/tools/visualization_engine.py +559 -0
- package/src/utils/__init__.py +42 -0
- package/src/utils/error_recovery.py +313 -0
- package/src/utils/parallel_executor.py +402 -0
- package/src/utils/polars_helpers.py +248 -0
- package/src/utils/schema_extraction.py +132 -0
- package/src/utils/semantic_layer.py +392 -0
- package/src/utils/token_budget.py +411 -0
- package/src/utils/validation.py +377 -0
- package/src/workflow_state.py +154 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Code Interpreter Tool
|
|
3
|
+
Allows the AI agent to write and execute custom Python code for tasks that don't have predefined tools.
|
|
4
|
+
This is what makes it a TRUE AI Agent, not just a function-calling bot.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import subprocess
|
|
10
|
+
import tempfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, Any, Optional
|
|
13
|
+
import polars as pl
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def execute_python_code(
|
|
17
|
+
code: str,
|
|
18
|
+
working_directory: str = "./outputs/code",
|
|
19
|
+
timeout: int = 60,
|
|
20
|
+
allow_file_operations: bool = True,
|
|
21
|
+
output_file: Optional[str] = None
|
|
22
|
+
) -> Dict[str, Any]:
|
|
23
|
+
"""
|
|
24
|
+
Execute custom Python code written by the AI agent.
|
|
25
|
+
|
|
26
|
+
This is the KEY tool that transforms the agent from a function-calling bot
|
|
27
|
+
into a true AI agent capable of solving ANY data science problem.
|
|
28
|
+
|
|
29
|
+
Use cases:
|
|
30
|
+
- Custom visualizations not covered by existing tools
|
|
31
|
+
- Data transformations too specific for generic tools
|
|
32
|
+
- Domain-specific calculations
|
|
33
|
+
- Interactive dashboards
|
|
34
|
+
- Custom export formats
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
code: Python code to execute
|
|
38
|
+
working_directory: Where to run the code (default: ./outputs/code)
|
|
39
|
+
timeout: Maximum execution time in seconds
|
|
40
|
+
allow_file_operations: Whether code can read/write files
|
|
41
|
+
output_file: Optional file path to save output (e.g., HTML plot)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Dict with execution results, stdout, stderr, and any generated files
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
# Agent can write custom Plotly code for specific visualizations
|
|
48
|
+
code = '''
|
|
49
|
+
import plotly.express as px
|
|
50
|
+
import pandas as pd
|
|
51
|
+
|
|
52
|
+
df = pd.read_csv('./temp/sales_data.csv')
|
|
53
|
+
fig = px.line(df, x='month', y='sales', color='bike_model',
|
|
54
|
+
title='Extended Sales by Month for Each Bike Model')
|
|
55
|
+
|
|
56
|
+
# Add dropdown filter
|
|
57
|
+
fig.update_layout(
|
|
58
|
+
updatemenus=[{
|
|
59
|
+
'buttons': [{'label': model, 'method': 'update',
|
|
60
|
+
'args': [{'visible': [model == m for m in df['bike_model'].unique()]}]}
|
|
61
|
+
for model in df['bike_model'].unique()],
|
|
62
|
+
'direction': 'down',
|
|
63
|
+
'showactive': True
|
|
64
|
+
}]
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
fig.write_html('./outputs/code/bike_sales_interactive.html')
|
|
68
|
+
print("Chart saved to: ./outputs/code/bike_sales_interactive.html")
|
|
69
|
+
'''
|
|
70
|
+
|
|
71
|
+
result = execute_python_code(code)
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
# ⚠️ CRITICAL: Basic syntax validation BEFORE execution
|
|
75
|
+
try:
|
|
76
|
+
compile(code, '<string>', 'exec')
|
|
77
|
+
except SyntaxError as e:
|
|
78
|
+
return {
|
|
79
|
+
"success": False,
|
|
80
|
+
"error": f"Syntax error in generated code: {str(e)}",
|
|
81
|
+
"error_type": "SyntaxError",
|
|
82
|
+
"line": e.lineno,
|
|
83
|
+
"suggestion": "Fix syntax errors in the code. Common issues: missing quotes, parentheses, indentation"
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Create working directory with proper permissions
|
|
87
|
+
try:
|
|
88
|
+
os.makedirs(working_directory, exist_ok=True)
|
|
89
|
+
# Ensure directory is writable
|
|
90
|
+
test_file = os.path.join(working_directory, '.write_test')
|
|
91
|
+
with open(test_file, 'w') as f:
|
|
92
|
+
f.write('test')
|
|
93
|
+
os.remove(test_file)
|
|
94
|
+
except PermissionError:
|
|
95
|
+
return {
|
|
96
|
+
"success": False,
|
|
97
|
+
"error": f"No write permission for directory: {working_directory}",
|
|
98
|
+
"error_type": "PermissionError",
|
|
99
|
+
"suggestion": f"Check folder permissions or use a different directory"
|
|
100
|
+
}
|
|
101
|
+
except Exception as e:
|
|
102
|
+
return {
|
|
103
|
+
"success": False,
|
|
104
|
+
"error": f"Failed to create working directory: {str(e)}",
|
|
105
|
+
"error_type": type(e).__name__
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Security: Validate code doesn't contain dangerous operations
|
|
109
|
+
dangerous_patterns = {
|
|
110
|
+
'subprocess': 'Use specialized tools instead of shell commands',
|
|
111
|
+
'__import__': 'Dynamic imports not allowed for security',
|
|
112
|
+
'eval(': 'eval() is dangerous - rewrite without it',
|
|
113
|
+
'exec(': 'exec() is dangerous - rewrite without it',
|
|
114
|
+
'compile(': 'compile() not needed - write code directly',
|
|
115
|
+
'os.system': 'Shell commands not allowed - use Python libraries',
|
|
116
|
+
'os.popen': 'Shell commands not allowed - use Python libraries'
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
for pattern, reason in dangerous_patterns.items():
|
|
120
|
+
if pattern in code:
|
|
121
|
+
return {
|
|
122
|
+
"success": False,
|
|
123
|
+
"error": f"Code contains restricted operation: {pattern}",
|
|
124
|
+
"error_type": "SecurityError",
|
|
125
|
+
"reason": reason,
|
|
126
|
+
"suggestion": "Rewrite code using safe Python operations"
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Create temporary Python file with better error handling
|
|
130
|
+
temp_file = None
|
|
131
|
+
try:
|
|
132
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False,
|
|
133
|
+
dir=working_directory, encoding='utf-8') as f:
|
|
134
|
+
temp_file = f.name
|
|
135
|
+
|
|
136
|
+
# Add helper imports at the top + error handling wrapper
|
|
137
|
+
enhanced_code = """
|
|
138
|
+
# Auto-imported libraries for convenience
|
|
139
|
+
import pandas as pd
|
|
140
|
+
import polars as pl
|
|
141
|
+
import numpy as np
|
|
142
|
+
import matplotlib
|
|
143
|
+
matplotlib.use('Agg') # Non-interactive backend
|
|
144
|
+
import matplotlib.pyplot as plt
|
|
145
|
+
import seaborn as sns
|
|
146
|
+
import plotly.express as px
|
|
147
|
+
import plotly.graph_objects as go
|
|
148
|
+
from pathlib import Path
|
|
149
|
+
import json
|
|
150
|
+
import sys
|
|
151
|
+
import traceback
|
|
152
|
+
|
|
153
|
+
# Ensure output directory exists
|
|
154
|
+
import os
|
|
155
|
+
os.makedirs('./outputs/code', exist_ok=True)
|
|
156
|
+
os.makedirs('./outputs/data', exist_ok=True)
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
# User's code starts here
|
|
160
|
+
""" + "\n".join(" " + line for line in code.split("\n")) + """
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
print(f"❌ Error in code execution: {str(e)}", file=sys.stderr)
|
|
164
|
+
traceback.print_exc()
|
|
165
|
+
sys.exit(1)
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
f.write(enhanced_code)
|
|
169
|
+
|
|
170
|
+
except Exception as e:
|
|
171
|
+
return {
|
|
172
|
+
"success": False,
|
|
173
|
+
"error": f"Failed to write temporary file: {str(e)}",
|
|
174
|
+
"error_type": type(e).__name__,
|
|
175
|
+
"suggestion": "Check file write permissions"
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Track existing files BEFORE execution to detect new files
|
|
179
|
+
existing_files = set()
|
|
180
|
+
# 🔥 FIX: Also scan /tmp/data_science_agent/ since LLM often saves files there
|
|
181
|
+
scan_dirs = ['./outputs/code', './outputs/data', './outputs/plots', '/tmp/data_science_agent']
|
|
182
|
+
if allow_file_operations:
|
|
183
|
+
for output_dir in scan_dirs:
|
|
184
|
+
if os.path.exists(output_dir):
|
|
185
|
+
for file_path in Path(output_dir).resolve().glob('**/*'):
|
|
186
|
+
if file_path.is_file():
|
|
187
|
+
existing_files.add(file_path.resolve())
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
# Execute the code with better error capture
|
|
191
|
+
# Use absolute path and normalize it for Windows
|
|
192
|
+
abs_temp_file = os.path.abspath(temp_file)
|
|
193
|
+
abs_cwd = os.path.abspath(Path.cwd())
|
|
194
|
+
|
|
195
|
+
result = subprocess.run(
|
|
196
|
+
[sys.executable, abs_temp_file],
|
|
197
|
+
capture_output=True,
|
|
198
|
+
text=True,
|
|
199
|
+
timeout=timeout,
|
|
200
|
+
cwd=abs_cwd # Use absolute path to avoid permission issues
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
stdout = result.stdout.strip()
|
|
204
|
+
stderr = result.stderr.strip()
|
|
205
|
+
returncode = result.returncode
|
|
206
|
+
|
|
207
|
+
# Check for errors with detailed diagnostics
|
|
208
|
+
if returncode != 0:
|
|
209
|
+
# Parse error message for common issues
|
|
210
|
+
error_hints = []
|
|
211
|
+
if "PermissionError" in stderr:
|
|
212
|
+
error_hints.append("💡 File permission issue - check if file is open in another program")
|
|
213
|
+
if "FileNotFoundError" in stderr:
|
|
214
|
+
error_hints.append("💡 File not found - check if path is correct (use relative paths like './outputs/data/file.csv')")
|
|
215
|
+
if "KeyError" in stderr:
|
|
216
|
+
error_hints.append("💡 Column not found - check column names in the CSV")
|
|
217
|
+
if "ModuleNotFoundError" in stderr:
|
|
218
|
+
error_hints.append("💡 Missing library - may need to install additional packages")
|
|
219
|
+
if "ValueError" in stderr:
|
|
220
|
+
error_hints.append("💡 Data type mismatch - check data types and conversions")
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
"success": False,
|
|
224
|
+
"error": f"Code execution failed",
|
|
225
|
+
"stderr": stderr,
|
|
226
|
+
"stdout": stdout if stdout else None,
|
|
227
|
+
"error_type": "ExecutionError",
|
|
228
|
+
"exit_code": returncode,
|
|
229
|
+
"hints": error_hints if error_hints else ["Check the error message above for details"]
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
# Success! Find NEWLY generated files (not existing before execution)
|
|
233
|
+
generated_files = []
|
|
234
|
+
# 🔥 FIX: Also scan /tmp/data_science_agent/ for files created by LLM code
|
|
235
|
+
scan_dirs = ['./outputs/code', './outputs/data', './outputs/plots', '/tmp/data_science_agent']
|
|
236
|
+
if allow_file_operations:
|
|
237
|
+
cwd = Path.cwd()
|
|
238
|
+
for output_dir in scan_dirs:
|
|
239
|
+
if os.path.exists(output_dir):
|
|
240
|
+
abs_output_dir = Path(output_dir).resolve()
|
|
241
|
+
for file_path in abs_output_dir.glob('**/*'):
|
|
242
|
+
if file_path.is_file():
|
|
243
|
+
abs_file = file_path.resolve()
|
|
244
|
+
|
|
245
|
+
# Only include if it's NEW (didn't exist before) or MODIFIED
|
|
246
|
+
is_new = abs_file not in existing_files
|
|
247
|
+
|
|
248
|
+
# Check if file was modified in last 5 seconds (just created/updated)
|
|
249
|
+
import time
|
|
250
|
+
file_age = time.time() - file_path.stat().st_mtime
|
|
251
|
+
is_recent = file_age < 5
|
|
252
|
+
|
|
253
|
+
if (is_new or is_recent):
|
|
254
|
+
# Get relative path safely (handle Windows paths)
|
|
255
|
+
try:
|
|
256
|
+
rel_path = file_path.relative_to(cwd)
|
|
257
|
+
except ValueError:
|
|
258
|
+
# Fallback: just use the file name with output dir
|
|
259
|
+
rel_path = Path(output_dir) / file_path.name
|
|
260
|
+
|
|
261
|
+
# Only include if not temp file and has content
|
|
262
|
+
abs_temp = Path(temp_file).resolve() if temp_file else None
|
|
263
|
+
if file_path != abs_temp and file_path.stat().st_size > 0:
|
|
264
|
+
generated_files.append(str(rel_path).replace('\\', '/'))
|
|
265
|
+
|
|
266
|
+
# Sort by modification time (newest first)
|
|
267
|
+
if generated_files:
|
|
268
|
+
generated_files = sorted(
|
|
269
|
+
generated_files,
|
|
270
|
+
key=lambda x: Path(x).stat().st_mtime,
|
|
271
|
+
reverse=True
|
|
272
|
+
)[:10] # Limit to 10 most recent files
|
|
273
|
+
|
|
274
|
+
return {
|
|
275
|
+
"success": True,
|
|
276
|
+
"stdout": stdout if stdout else "✅ Code executed successfully (no output)",
|
|
277
|
+
"stderr": stderr if stderr else None,
|
|
278
|
+
"message": "✅ Code executed successfully",
|
|
279
|
+
"generated_files": generated_files,
|
|
280
|
+
"working_directory": working_directory,
|
|
281
|
+
"execution_summary": {
|
|
282
|
+
"lines_of_code": len(code.split('\n')),
|
|
283
|
+
"files_generated": len(generated_files)
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
finally:
|
|
288
|
+
# Clean up temp file
|
|
289
|
+
if temp_file and os.path.exists(temp_file):
|
|
290
|
+
try:
|
|
291
|
+
os.unlink(temp_file)
|
|
292
|
+
except Exception:
|
|
293
|
+
pass # Ignore cleanup errors
|
|
294
|
+
|
|
295
|
+
except subprocess.TimeoutExpired:
|
|
296
|
+
return {
|
|
297
|
+
"success": False,
|
|
298
|
+
"error": f"Code execution timed out after {timeout} seconds",
|
|
299
|
+
"error_type": "TimeoutError",
|
|
300
|
+
"suggestion": "Code is taking too long. Optimize it or increase timeout. Avoid large loops or heavy computations."
|
|
301
|
+
}
|
|
302
|
+
except Exception as e:
|
|
303
|
+
return {
|
|
304
|
+
"success": False,
|
|
305
|
+
"error": f"Unexpected error: {str(e)}",
|
|
306
|
+
"error_type": type(e).__name__,
|
|
307
|
+
"suggestion": "This is an unexpected error. Try simplifying the code."
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def execute_code_from_file(
|
|
312
|
+
file_path: str,
|
|
313
|
+
working_directory: str = "./outputs/code",
|
|
314
|
+
timeout: int = 60
|
|
315
|
+
) -> Dict[str, Any]:
|
|
316
|
+
"""
|
|
317
|
+
Execute Python code from a file.
|
|
318
|
+
|
|
319
|
+
Useful when code is too long to pass as a string, or when the agent
|
|
320
|
+
wants to run an existing script.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
file_path: Path to Python file to execute
|
|
324
|
+
working_directory: Where to run the code
|
|
325
|
+
timeout: Maximum execution time in seconds
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Dict with execution results
|
|
329
|
+
"""
|
|
330
|
+
try:
|
|
331
|
+
# Read code from file
|
|
332
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
333
|
+
code = f.read()
|
|
334
|
+
|
|
335
|
+
return execute_python_code(
|
|
336
|
+
code=code,
|
|
337
|
+
working_directory=working_directory,
|
|
338
|
+
timeout=timeout
|
|
339
|
+
)
|
|
340
|
+
except FileNotFoundError:
|
|
341
|
+
return {
|
|
342
|
+
"success": False,
|
|
343
|
+
"error": f"File not found: {file_path}",
|
|
344
|
+
"error_type": "FileNotFoundError"
|
|
345
|
+
}
|
|
346
|
+
except Exception as e:
|
|
347
|
+
return {
|
|
348
|
+
"success": False,
|
|
349
|
+
"error": f"Failed to read file: {str(e)}",
|
|
350
|
+
"error_type": type(e).__name__
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def generate_custom_visualization(
|
|
355
|
+
data_file: str,
|
|
356
|
+
visualization_description: str,
|
|
357
|
+
output_path: str = "./outputs/code/custom_plot.html",
|
|
358
|
+
timeout: int = 60
|
|
359
|
+
) -> Dict[str, Any]:
|
|
360
|
+
"""
|
|
361
|
+
HIGH-LEVEL helper: Generate custom visualization from natural language description.
|
|
362
|
+
|
|
363
|
+
The agent describes what it wants, and this function attempts to generate the code.
|
|
364
|
+
This is a convenience wrapper that could use an LLM to generate the plotting code.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
data_file: Path to dataset
|
|
368
|
+
visualization_description: Natural language description of desired plot
|
|
369
|
+
output_path: Where to save the visualization
|
|
370
|
+
timeout: Execution timeout
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Dict with execution results
|
|
374
|
+
|
|
375
|
+
Example:
|
|
376
|
+
result = generate_custom_visualization(
|
|
377
|
+
data_file="./temp/sales.csv",
|
|
378
|
+
visualization_description="Line plot of sales by month for each bike model, with dropdown filter",
|
|
379
|
+
output_path="./outputs/code/sales_plot.html"
|
|
380
|
+
)
|
|
381
|
+
"""
|
|
382
|
+
# This is a placeholder - in a full implementation, this would use an LLM
|
|
383
|
+
# to generate the Plotly code from the description
|
|
384
|
+
|
|
385
|
+
return {
|
|
386
|
+
"success": False,
|
|
387
|
+
"error": "Not yet implemented - use execute_python_code with explicit code instead",
|
|
388
|
+
"error_type": "NotImplementedError",
|
|
389
|
+
"suggestion": "Write the Plotly code explicitly and use execute_python_code()"
|
|
390
|
+
}
|