ds-agent-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ds-agent.js +451 -0
- package/ds_agent/__init__.py +8 -0
- package/package.json +28 -0
- package/requirements.txt +126 -0
- package/setup.py +35 -0
- package/src/__init__.py +7 -0
- package/src/_compress_tool_result.py +118 -0
- package/src/api/__init__.py +4 -0
- package/src/api/app.py +1626 -0
- package/src/cache/__init__.py +5 -0
- package/src/cache/cache_manager.py +561 -0
- package/src/cli.py +2886 -0
- package/src/dynamic_prompts.py +281 -0
- package/src/orchestrator.py +4799 -0
- package/src/progress_manager.py +139 -0
- package/src/reasoning/__init__.py +332 -0
- package/src/reasoning/business_summary.py +431 -0
- package/src/reasoning/data_understanding.py +356 -0
- package/src/reasoning/model_explanation.py +383 -0
- package/src/reasoning/reasoning_trace.py +239 -0
- package/src/registry/__init__.py +3 -0
- package/src/registry/tools_registry.py +3 -0
- package/src/session_memory.py +448 -0
- package/src/session_store.py +370 -0
- package/src/storage/__init__.py +19 -0
- package/src/storage/artifact_store.py +620 -0
- package/src/storage/helpers.py +116 -0
- package/src/storage/huggingface_storage.py +694 -0
- package/src/storage/r2_storage.py +0 -0
- package/src/storage/user_files_service.py +288 -0
- package/src/tools/__init__.py +335 -0
- package/src/tools/advanced_analysis.py +823 -0
- package/src/tools/advanced_feature_engineering.py +708 -0
- package/src/tools/advanced_insights.py +578 -0
- package/src/tools/advanced_preprocessing.py +549 -0
- package/src/tools/advanced_training.py +906 -0
- package/src/tools/agent_tool_mapping.py +326 -0
- package/src/tools/auto_pipeline.py +420 -0
- package/src/tools/autogluon_training.py +1480 -0
- package/src/tools/business_intelligence.py +860 -0
- package/src/tools/cloud_data_sources.py +581 -0
- package/src/tools/code_interpreter.py +390 -0
- package/src/tools/computer_vision.py +614 -0
- package/src/tools/data_cleaning.py +614 -0
- package/src/tools/data_profiling.py +593 -0
- package/src/tools/data_type_conversion.py +268 -0
- package/src/tools/data_wrangling.py +433 -0
- package/src/tools/eda_reports.py +284 -0
- package/src/tools/enhanced_feature_engineering.py +241 -0
- package/src/tools/feature_engineering.py +302 -0
- package/src/tools/matplotlib_visualizations.py +1327 -0
- package/src/tools/model_training.py +520 -0
- package/src/tools/nlp_text_analytics.py +761 -0
- package/src/tools/plotly_visualizations.py +497 -0
- package/src/tools/production_mlops.py +852 -0
- package/src/tools/time_series.py +507 -0
- package/src/tools/tools_registry.py +2133 -0
- package/src/tools/visualization_engine.py +559 -0
- package/src/utils/__init__.py +42 -0
- package/src/utils/error_recovery.py +313 -0
- package/src/utils/parallel_executor.py +402 -0
- package/src/utils/polars_helpers.py +248 -0
- package/src/utils/schema_extraction.py +132 -0
- package/src/utils/semantic_layer.py +392 -0
- package/src/utils/token_budget.py +411 -0
- package/src/utils/validation.py +377 -0
- package/src/workflow_state.py +154 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Production-grade tool result compression for small context window models.
|
|
3
|
+
Add this function to orchestrator.py before _parse_text_tool_calls method.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
def _compress_tool_result(self, tool_name: str, result: Dict[str, Any]) -> Dict[str, Any]:
|
|
7
|
+
"""
|
|
8
|
+
Compress tool results for small context models (production-grade approach).
|
|
9
|
+
|
|
10
|
+
Keep only:
|
|
11
|
+
- Status (success/failure)
|
|
12
|
+
- Key metrics (5-10 most important numbers)
|
|
13
|
+
- File paths created
|
|
14
|
+
- Next action hints
|
|
15
|
+
|
|
16
|
+
Full results stored in workflow_history and session memory.
|
|
17
|
+
LLM doesn't need verbose output - only decision-making info.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
tool_name: Name of the tool executed
|
|
21
|
+
result: Full tool result dict
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Compressed result dict (typically 100-500 tokens vs 5K-10K)
|
|
25
|
+
"""
|
|
26
|
+
if not result.get("success", True):
|
|
27
|
+
# Keep full error info (critical for debugging)
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
compressed = {
|
|
31
|
+
"success": True,
|
|
32
|
+
"tool": tool_name
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# Tool-specific compression rules
|
|
36
|
+
if tool_name == "profile_dataset":
|
|
37
|
+
# Original: ~5K tokens with full stats
|
|
38
|
+
# Compressed: ~200 tokens with key metrics
|
|
39
|
+
r = result.get("result", {})
|
|
40
|
+
compressed["summary"] = {
|
|
41
|
+
"rows": r.get("num_rows"),
|
|
42
|
+
"cols": r.get("num_columns"),
|
|
43
|
+
"missing_pct": r.get("missing_percentage"),
|
|
44
|
+
"numeric_cols": len(r.get("numeric_columns", [])),
|
|
45
|
+
"categorical_cols": len(r.get("categorical_columns", [])),
|
|
46
|
+
"file_size_mb": round(r.get("memory_usage_mb", 0), 1),
|
|
47
|
+
"key_columns": list(r.get("columns", {}).keys())[:5] # First 5 columns only
|
|
48
|
+
}
|
|
49
|
+
compressed["next_steps"] = ["clean_missing_values", "detect_data_quality_issues"]
|
|
50
|
+
|
|
51
|
+
elif tool_name == "detect_data_quality_issues":
|
|
52
|
+
r = result.get("result", {})
|
|
53
|
+
compressed["summary"] = {
|
|
54
|
+
"total_issues": r.get("total_issues", 0),
|
|
55
|
+
"critical_issues": r.get("critical_issues", 0),
|
|
56
|
+
"missing_data": r.get("has_missing"),
|
|
57
|
+
"outliers": r.get("has_outliers"),
|
|
58
|
+
"duplicates": r.get("has_duplicates")
|
|
59
|
+
}
|
|
60
|
+
compressed["next_steps"] = ["clean_missing_values", "handle_outliers"]
|
|
61
|
+
|
|
62
|
+
elif tool_name in ["clean_missing_values", "handle_outliers", "encode_categorical"]:
|
|
63
|
+
r = result.get("result", {})
|
|
64
|
+
compressed["summary"] = {
|
|
65
|
+
"output_file": r.get("output_file", r.get("output_path")),
|
|
66
|
+
"rows_processed": r.get("rows_after", r.get("num_rows")),
|
|
67
|
+
"changes_made": bool(r.get("changes", {}) or r.get("imputed_columns"))
|
|
68
|
+
}
|
|
69
|
+
compressed["next_steps"] = ["Use this file for next step"]
|
|
70
|
+
|
|
71
|
+
elif tool_name == "train_baseline_models":
|
|
72
|
+
r = result.get("result", {})
|
|
73
|
+
models = r.get("models", [])
|
|
74
|
+
if models:
|
|
75
|
+
best = max(models, key=lambda m: m.get("test_score", 0))
|
|
76
|
+
compressed["summary"] = {
|
|
77
|
+
"best_model": best.get("model"),
|
|
78
|
+
"test_score": round(best.get("test_score", 0), 4),
|
|
79
|
+
"train_score": round(best.get("train_score", 0), 4),
|
|
80
|
+
"task_type": r.get("task_type"),
|
|
81
|
+
"models_trained": len(models)
|
|
82
|
+
}
|
|
83
|
+
compressed["next_steps"] = ["hyperparameter_tuning", "generate_combined_eda_report"]
|
|
84
|
+
|
|
85
|
+
elif tool_name in ["generate_plotly_dashboard", "generate_ydata_profiling_report", "generate_combined_eda_report"]:
|
|
86
|
+
r = result.get("result", {})
|
|
87
|
+
compressed["summary"] = {
|
|
88
|
+
"report_path": r.get("report_path", r.get("output_path")),
|
|
89
|
+
"report_type": tool_name,
|
|
90
|
+
"success": True
|
|
91
|
+
}
|
|
92
|
+
compressed["next_steps"] = ["Report ready for viewing"]
|
|
93
|
+
|
|
94
|
+
elif tool_name == "hyperparameter_tuning":
|
|
95
|
+
r = result.get("result", {})
|
|
96
|
+
compressed["summary"] = {
|
|
97
|
+
"best_params": r.get("best_params", {}),
|
|
98
|
+
"best_score": round(r.get("best_score", 0), 4),
|
|
99
|
+
"model_type": r.get("model_type"),
|
|
100
|
+
"trials_completed": r.get("n_trials")
|
|
101
|
+
}
|
|
102
|
+
compressed["next_steps"] = ["perform_cross_validation", "generate_model_performance_plots"]
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
# Generic compression: Keep only key fields
|
|
106
|
+
r = result.get("result", {})
|
|
107
|
+
if isinstance(r, dict):
|
|
108
|
+
# Extract key fields (common patterns)
|
|
109
|
+
key_fields = {}
|
|
110
|
+
for key in ["output_path", "output_file", "status", "message", "success"]:
|
|
111
|
+
if key in r:
|
|
112
|
+
key_fields[key] = r[key]
|
|
113
|
+
compressed["summary"] = key_fields or {"result": "completed"}
|
|
114
|
+
else:
|
|
115
|
+
compressed["summary"] = {"result": str(r)[:200] if r else "completed"}
|
|
116
|
+
compressed["next_steps"] = ["Continue workflow"]
|
|
117
|
+
|
|
118
|
+
return compressed
|