ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,313 @@
1
+ """
2
+ Error Recovery and Graceful Degradation System
3
+
4
+ Provides retry mechanisms, fallback strategies, and workflow checkpointing
5
+ to make the agent resilient to tool failures and API errors.
6
+ """
7
+
8
+ import functools
9
+ import time
10
+ import json
11
+ import traceback
12
+ from typing import Callable, Any, Dict, Optional, List, Tuple
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+
16
+
17
+ class RetryStrategy:
18
+ """Configuration for retry behavior."""
19
+
20
+ def __init__(self, max_retries: int = 3, base_delay: float = 1.0,
21
+ exponential_backoff: bool = True, fallback_tools: Optional[List[str]] = None):
22
+ self.max_retries = max_retries
23
+ self.base_delay = base_delay
24
+ self.exponential_backoff = exponential_backoff
25
+ self.fallback_tools = fallback_tools or []
26
+
27
+
28
+ # Tool-specific retry strategies
29
+ TOOL_RETRY_STRATEGIES = {
30
+ # Data loading tools - retry with backoff
31
+ "profile_dataset": RetryStrategy(max_retries=2, base_delay=1.0),
32
+ "detect_data_quality_issues": RetryStrategy(max_retries=2, base_delay=1.0),
33
+
34
+ # Expensive tools - don't retry, use fallback
35
+ "train_baseline_models": RetryStrategy(max_retries=0, fallback_tools=["execute_python_code"]),
36
+ "hyperparameter_tuning": RetryStrategy(max_retries=0),
37
+ "train_ensemble_models": RetryStrategy(max_retries=0),
38
+
39
+ # Visualization - retry once
40
+ "generate_interactive_scatter": RetryStrategy(max_retries=1),
41
+ "generate_plotly_dashboard": RetryStrategy(max_retries=1),
42
+
43
+ # Code execution - retry with longer delay
44
+ "execute_python_code": RetryStrategy(max_retries=1, base_delay=2.0),
45
+
46
+ # Feature engineering - retry with alternative methods
47
+ "encode_categorical": RetryStrategy(max_retries=1, fallback_tools=["force_numeric_conversion"]),
48
+ "clean_missing_values": RetryStrategy(max_retries=1, fallback_tools=["handle_outliers"]),
49
+ }
50
+
51
+
52
+ def retry_with_fallback(tool_name: Optional[str] = None):
53
+ """
54
+ Decorator for automatic retry with exponential backoff and fallback strategies.
55
+
56
+ Features:
57
+ - Configurable retry attempts per tool
58
+ - Exponential backoff between retries
59
+ - Fallback to alternative tools on persistent failure
60
+ - Detailed error logging
61
+
62
+ Args:
63
+ tool_name: Name of tool (for strategy lookup)
64
+
65
+ Example:
66
+ @retry_with_fallback(tool_name="train_baseline_models")
67
+ def execute_tool(tool_name, arguments):
68
+ # Tool execution logic
69
+ pass
70
+ """
71
+ def decorator(func: Callable) -> Callable:
72
+ @functools.wraps(func)
73
+ def wrapper(*args, **kwargs) -> Any:
74
+ # Get tool name from kwargs or args
75
+ actual_tool_name = tool_name or kwargs.get('tool_name') or (args[0] if args else None)
76
+
77
+ # Get retry strategy
78
+ strategy = TOOL_RETRY_STRATEGIES.get(
79
+ actual_tool_name,
80
+ RetryStrategy(max_retries=1) # Default strategy
81
+ )
82
+
83
+ last_error = None
84
+
85
+ # Attempt execution with retries
86
+ for attempt in range(strategy.max_retries + 1):
87
+ try:
88
+ result = func(*args, **kwargs)
89
+
90
+ # Success - check if result indicates error
91
+ if isinstance(result, dict):
92
+ if result.get("success") is False or "error" in result:
93
+ last_error = result.get("error", "Tool returned error")
94
+ # Don't retry if it's a validation error
95
+ if "does not exist" in str(last_error) or "not found" in str(last_error):
96
+ return result # Validation errors shouldn't retry
97
+ raise Exception(last_error)
98
+
99
+ # Success!
100
+ if attempt > 0:
101
+ print(f"✅ Retry successful on attempt {attempt + 1}")
102
+ return result
103
+
104
+ except Exception as e:
105
+ last_error = e
106
+
107
+ if attempt < strategy.max_retries:
108
+ # Calculate delay with exponential backoff
109
+ delay = strategy.base_delay * (2 ** attempt) if strategy.exponential_backoff else strategy.base_delay
110
+ print(f"⚠️ {actual_tool_name} failed (attempt {attempt + 1}/{strategy.max_retries + 1}): {str(e)[:100]}")
111
+ print(f" Retrying in {delay:.1f}s...")
112
+ time.sleep(delay)
113
+ else:
114
+ # Max retries exhausted
115
+ print(f"❌ {actual_tool_name} failed after {strategy.max_retries + 1} attempts")
116
+
117
+ # All retries failed - return error result with fallback info
118
+ error_result = {
119
+ "success": False,
120
+ "error": str(last_error),
121
+ "error_type": type(last_error).__name__,
122
+ "traceback": traceback.format_exc(),
123
+ "tool_name": actual_tool_name,
124
+ "attempts": strategy.max_retries + 1,
125
+ "fallback_suggestions": strategy.fallback_tools
126
+ }
127
+
128
+ print(f"💡 Suggested fallback tools: {strategy.fallback_tools}")
129
+
130
+ return error_result
131
+
132
+ return wrapper
133
+ return decorator
134
+
135
+
136
+ class WorkflowCheckpointManager:
137
+ """
138
+ Manages workflow checkpoints for crash recovery.
139
+
140
+ Saves workflow state after each successful tool execution,
141
+ allowing resume from last successful step if process crashes.
142
+ """
143
+
144
+ def __init__(self, checkpoint_dir: str = "./checkpoints"):
145
+ self.checkpoint_dir = Path(checkpoint_dir)
146
+ self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
147
+
148
+ def save_checkpoint(self, session_id: str, workflow_state: Any,
149
+ last_tool: str, iteration: int) -> str:
150
+ """
151
+ Save workflow checkpoint.
152
+
153
+ Args:
154
+ session_id: Session identifier
155
+ workflow_state: WorkflowState object
156
+ last_tool: Last successfully executed tool
157
+ iteration: Current iteration number
158
+
159
+ Returns:
160
+ Path to checkpoint file
161
+ """
162
+ checkpoint_data = {
163
+ "session_id": session_id,
164
+ "timestamp": datetime.now().isoformat(),
165
+ "iteration": iteration,
166
+ "last_tool": last_tool,
167
+ "workflow_state": workflow_state.to_dict() if hasattr(workflow_state, 'to_dict') else {},
168
+ "can_resume": True
169
+ }
170
+
171
+ checkpoint_path = self.checkpoint_dir / f"{session_id}_checkpoint.json"
172
+
173
+ try:
174
+ with open(checkpoint_path, 'w') as f:
175
+ json.dump(checkpoint_data, f, indent=2, default=str)
176
+
177
+ print(f"💾 Checkpoint saved: iteration {iteration}, last tool: {last_tool}")
178
+ return str(checkpoint_path)
179
+
180
+ except Exception as e:
181
+ print(f"⚠️ Failed to save checkpoint: {e}")
182
+ return ""
183
+
184
+ def load_checkpoint(self, session_id: str) -> Optional[Dict[str, Any]]:
185
+ """
186
+ Load checkpoint for session.
187
+
188
+ Args:
189
+ session_id: Session identifier
190
+
191
+ Returns:
192
+ Checkpoint data or None if not found
193
+ """
194
+ checkpoint_path = self.checkpoint_dir / f"{session_id}_checkpoint.json"
195
+
196
+ if not checkpoint_path.exists():
197
+ return None
198
+
199
+ try:
200
+ with open(checkpoint_path, 'r') as f:
201
+ checkpoint = json.load(f)
202
+
203
+ print(f"📂 Checkpoint loaded: iteration {checkpoint['iteration']}, last tool: {checkpoint['last_tool']}")
204
+ return checkpoint
205
+
206
+ except Exception as e:
207
+ print(f"⚠️ Failed to load checkpoint: {e}")
208
+ return None
209
+
210
+ def can_resume(self, session_id: str) -> bool:
211
+ """Check if session has resumable checkpoint."""
212
+ checkpoint = self.load_checkpoint(session_id)
213
+ return checkpoint is not None and checkpoint.get("can_resume", False)
214
+
215
+ def clear_checkpoint(self, session_id: str):
216
+ """Clear checkpoint after successful completion."""
217
+ checkpoint_path = self.checkpoint_dir / f"{session_id}_checkpoint.json"
218
+
219
+ if checkpoint_path.exists():
220
+ try:
221
+ checkpoint_path.unlink()
222
+ print(f"🗑️ Checkpoint cleared for session {session_id}")
223
+ except Exception as e:
224
+ print(f"⚠️ Failed to clear checkpoint: {e}")
225
+
226
+ def list_checkpoints(self) -> List[Tuple[str, datetime]]:
227
+ """List all available checkpoints with timestamps."""
228
+ checkpoints = []
229
+
230
+ for checkpoint_file in self.checkpoint_dir.glob("*_checkpoint.json"):
231
+ try:
232
+ with open(checkpoint_file, 'r') as f:
233
+ data = json.load(f)
234
+
235
+ session_id = data['session_id']
236
+ timestamp = datetime.fromisoformat(data['timestamp'])
237
+ checkpoints.append((session_id, timestamp))
238
+ except:
239
+ continue
240
+
241
+ return sorted(checkpoints, key=lambda x: x[1], reverse=True)
242
+
243
+
244
+ class ErrorRecoveryManager:
245
+ """
246
+ Centralized error recovery management.
247
+
248
+ Combines retry logic, checkpointing, and error analysis.
249
+ """
250
+
251
+ def __init__(self, checkpoint_dir: str = "./checkpoints"):
252
+ self.checkpoint_manager = WorkflowCheckpointManager(checkpoint_dir)
253
+ self.error_history: Dict[str, List[Dict[str, Any]]] = {}
254
+
255
+ def log_error(self, session_id: str, tool_name: str, error: Exception,
256
+ context: Optional[Dict[str, Any]] = None):
257
+ """Log error for analysis and pattern detection."""
258
+ if session_id not in self.error_history:
259
+ self.error_history[session_id] = []
260
+
261
+ error_entry = {
262
+ "timestamp": datetime.now().isoformat(),
263
+ "tool_name": tool_name,
264
+ "error_type": type(error).__name__,
265
+ "error_message": str(error),
266
+ "context": context or {}
267
+ }
268
+
269
+ self.error_history[session_id].append(error_entry)
270
+
271
+ def get_error_patterns(self, session_id: str) -> Dict[str, Any]:
272
+ """Analyze error patterns for session."""
273
+ if session_id not in self.error_history:
274
+ return {}
275
+
276
+ errors = self.error_history[session_id]
277
+
278
+ # Count errors by tool
279
+ tool_errors = {}
280
+ for error in errors:
281
+ tool = error['tool_name']
282
+ tool_errors[tool] = tool_errors.get(tool, 0) + 1
283
+
284
+ # Count errors by type
285
+ error_types = {}
286
+ for error in errors:
287
+ err_type = error['error_type']
288
+ error_types[err_type] = error_types.get(err_type, 0) + 1
289
+
290
+ return {
291
+ "total_errors": len(errors),
292
+ "errors_by_tool": tool_errors,
293
+ "errors_by_type": error_types,
294
+ "most_recent": errors[-3:] if errors else []
295
+ }
296
+
297
+ def should_abort(self, session_id: str, max_errors: int = 10) -> bool:
298
+ """Check if session should abort due to too many errors."""
299
+ if session_id not in self.error_history:
300
+ return False
301
+
302
+ return len(self.error_history[session_id]) >= max_errors
303
+
304
+
305
+ # Global error recovery manager
306
+ _recovery_manager = None
307
+
308
+ def get_recovery_manager() -> ErrorRecoveryManager:
309
+ """Get or create global error recovery manager."""
310
+ global _recovery_manager
311
+ if _recovery_manager is None:
312
+ _recovery_manager = ErrorRecoveryManager()
313
+ return _recovery_manager