ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,370 @@
1
+ """
2
+ Session Storage Manager
3
+ Persists session memory to SQLite database for cross-session continuity.
4
+
5
+ Enables users to resume conversations even after restarting the agent.
6
+ """
7
+
8
+ import sqlite3
9
+ import json
10
+ from typing import Optional, List, Dict, Any
11
+ from pathlib import Path
12
+ from datetime import datetime, timedelta
13
+
14
+ from session_memory import SessionMemory
15
+
16
+
17
+ class SessionStore:
18
+ """
19
+ Persistent storage for session memory using SQLite.
20
+
21
+ Features:
22
+ - Save/load sessions by ID
23
+ - Resume most recent session automatically
24
+ - Cleanup old sessions
25
+ - List all sessions
26
+
27
+ Storage location: ./cache_db/sessions.db
28
+ """
29
+
30
+ def __init__(self, db_path: str = "./cache_db/sessions.db"):
31
+ """
32
+ Initialize session store.
33
+
34
+ Args:
35
+ db_path: Path to SQLite database file
36
+ """
37
+ self.db_path = db_path
38
+
39
+ # Create directory if it doesn't exist
40
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
41
+
42
+ # Initialize database schema
43
+ self._init_database()
44
+
45
+ def _init_database(self):
46
+ """Create sessions table if it doesn't exist."""
47
+ try:
48
+ conn = sqlite3.connect(self.db_path)
49
+ cursor = conn.cursor()
50
+
51
+ cursor.execute("""
52
+ CREATE TABLE IF NOT EXISTS sessions (
53
+ session_id TEXT PRIMARY KEY,
54
+ created_at TEXT NOT NULL,
55
+ last_active TEXT NOT NULL,
56
+ context_json TEXT NOT NULL
57
+ )
58
+ """)
59
+
60
+ # Create index on last_active for faster queries
61
+ cursor.execute("""
62
+ CREATE INDEX IF NOT EXISTS idx_last_active
63
+ ON sessions(last_active DESC)
64
+ """)
65
+
66
+ conn.commit()
67
+ conn.close()
68
+ print(f"✅ Sessions database initialized at {self.db_path}")
69
+ except Exception as e:
70
+ print(f"⚠️ Failed to initialize sessions database: {e}")
71
+ # Try to recreate the database if corrupted
72
+ try:
73
+ Path(self.db_path).unlink(missing_ok=True)
74
+ print(f" Deleted corrupted database, reinitializing...")
75
+ conn = sqlite3.connect(self.db_path)
76
+ cursor = conn.cursor()
77
+
78
+ cursor.execute("""
79
+ CREATE TABLE IF NOT EXISTS sessions (
80
+ session_id TEXT PRIMARY KEY,
81
+ created_at TEXT NOT NULL,
82
+ last_active TEXT NOT NULL,
83
+ context_json TEXT NOT NULL
84
+ )
85
+ """)
86
+
87
+ cursor.execute("""
88
+ CREATE INDEX IF NOT EXISTS idx_last_active
89
+ ON sessions(last_active DESC)
90
+ """)
91
+
92
+ conn.commit()
93
+ conn.close()
94
+ print(f"✅ Sessions database reinitialized successfully")
95
+ except Exception as retry_error:
96
+ print(f"❌ Failed to reinitialize sessions database: {retry_error}")
97
+
98
+ def _make_json_serializable(self, obj: Any) -> Any:
99
+ """
100
+ Convert objects to JSON-serializable format.
101
+ Handles matplotlib Figures, plotly Figures, numpy arrays, datetime objects, and other non-serializable types.
102
+ """
103
+ try:
104
+ import numpy as np
105
+ except ImportError:
106
+ np = None
107
+
108
+ # Handle dictionaries recursively
109
+ if isinstance(obj, dict):
110
+ return {k: self._make_json_serializable(v) for k, v in obj.items()}
111
+
112
+ # Handle lists recursively
113
+ elif isinstance(obj, (list, tuple)):
114
+ return [self._make_json_serializable(item) for item in obj]
115
+
116
+ # Handle datetime objects
117
+ elif isinstance(obj, (datetime, timedelta)):
118
+ return obj.isoformat()
119
+
120
+ # Handle matplotlib Figure objects
121
+ elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
122
+ return f"<{obj.__class__.__name__} object: {id(obj)}>"
123
+
124
+ # Handle numpy arrays
125
+ elif np and isinstance(obj, np.ndarray):
126
+ return f"<NumPy array: shape={obj.shape}>"
127
+
128
+ # Handle numpy scalar types
129
+ elif hasattr(obj, 'item') and callable(obj.item):
130
+ try:
131
+ return obj.item()
132
+ except:
133
+ return str(obj)
134
+
135
+ # Handle other non-serializable objects (dataframes, models, etc.)
136
+ elif hasattr(obj, '__dict__') and not isinstance(obj, (str, int, float, bool, type(None))):
137
+ # Check if it's a common non-serializable type
138
+ class_name = obj.__class__.__name__
139
+ if class_name in ['DataFrame', 'Series', 'Model', 'Pipeline', 'Figure']:
140
+ return f"<{class_name} object: {id(obj)}>"
141
+ return f"<{class_name} object>"
142
+
143
+ # Already serializable
144
+ return obj
145
+
146
+ def save(self, session: SessionMemory):
147
+ """
148
+ Save session to database.
149
+
150
+ Args:
151
+ session: SessionMemory instance to save
152
+ """
153
+ try:
154
+ conn = sqlite3.connect(self.db_path)
155
+ cursor = conn.cursor()
156
+
157
+ # Serialize session to JSON - clean non-serializable objects first
158
+ data = session.to_dict()
159
+ clean_data = self._make_json_serializable(data)
160
+
161
+ cursor.execute("""
162
+ INSERT OR REPLACE INTO sessions (session_id, created_at, last_active, context_json)
163
+ VALUES (?, ?, ?, ?)
164
+ """, (
165
+ session.session_id,
166
+ session.created_at.isoformat(),
167
+ session.last_active.isoformat(),
168
+ json.dumps(clean_data)
169
+ ))
170
+
171
+ conn.commit()
172
+ conn.close()
173
+ except sqlite3.OperationalError as e:
174
+ if "no such table" in str(e):
175
+ print(f"⚠️ Sessions table not found, reinitializing database...")
176
+ self._init_database()
177
+ # Retry save after reinitialization
178
+ try:
179
+ conn = sqlite3.connect(self.db_path)
180
+ cursor = conn.cursor()
181
+
182
+ data = session.to_dict()
183
+ clean_data = self._make_json_serializable(data)
184
+
185
+ cursor.execute("""
186
+ INSERT OR REPLACE INTO sessions (session_id, created_at, last_active, context_json)
187
+ VALUES (?, ?, ?, ?)
188
+ """, (
189
+ session.session_id,
190
+ session.created_at.isoformat(),
191
+ session.last_active.isoformat(),
192
+ json.dumps(clean_data)
193
+ ))
194
+
195
+ conn.commit()
196
+ conn.close()
197
+ print(f"✅ Session saved successfully after database reinitialization")
198
+ except Exception as retry_error:
199
+ print(f"❌ Failed to save session after reinitialization: {retry_error}")
200
+ raise
201
+ else:
202
+ raise
203
+
204
+ def load(self, session_id: str) -> Optional[SessionMemory]:
205
+ """
206
+ Load session from database by ID.
207
+
208
+ Args:
209
+ session_id: Unique session identifier
210
+
211
+ Returns:
212
+ SessionMemory instance or None if not found
213
+ """
214
+ conn = sqlite3.connect(self.db_path)
215
+ cursor = conn.cursor()
216
+
217
+ cursor.execute("""
218
+ SELECT context_json FROM sessions WHERE session_id = ?
219
+ """, (session_id,))
220
+
221
+ result = cursor.fetchone()
222
+ conn.close()
223
+
224
+ if not result:
225
+ return None
226
+
227
+ # Deserialize JSON to SessionMemory
228
+ data = json.loads(result[0])
229
+ return SessionMemory.from_dict(data)
230
+
231
+ def get_recent_session(self, max_age_hours: int = 24) -> Optional[SessionMemory]:
232
+ """
233
+ Get most recent active session within time window.
234
+
235
+ Useful for automatic session resumption when user returns.
236
+
237
+ Args:
238
+ max_age_hours: Maximum age in hours (default: 24)
239
+
240
+ Returns:
241
+ Most recent SessionMemory or None if no recent sessions
242
+
243
+ Example:
244
+ # Resume conversation from yesterday
245
+ session = store.get_recent_session(max_age_hours=24)
246
+ if session:
247
+ print(f"Resuming session: {session.last_dataset}")
248
+ """
249
+ conn = sqlite3.connect(self.db_path)
250
+ cursor = conn.cursor()
251
+
252
+ cutoff_time = (datetime.now() - timedelta(hours=max_age_hours)).isoformat()
253
+
254
+ cursor.execute("""
255
+ SELECT context_json FROM sessions
256
+ WHERE last_active > ?
257
+ ORDER BY last_active DESC
258
+ LIMIT 1
259
+ """, (cutoff_time,))
260
+
261
+ result = cursor.fetchone()
262
+ conn.close()
263
+
264
+ if not result:
265
+ return None
266
+
267
+ data = json.loads(result[0])
268
+ return SessionMemory.from_dict(data)
269
+
270
+ def list_sessions(self, limit: int = 10) -> List[Dict[str, str]]:
271
+ """
272
+ List recent sessions with basic info.
273
+
274
+ Args:
275
+ limit: Maximum number of sessions to return
276
+
277
+ Returns:
278
+ List of session info dicts with id, created_at, last_active
279
+
280
+ Example:
281
+ sessions = store.list_sessions(limit=5)
282
+ for s in sessions:
283
+ print(f"{s['session_id']}: {s['last_active']}")
284
+ """
285
+ conn = sqlite3.connect(self.db_path)
286
+ cursor = conn.cursor()
287
+
288
+ cursor.execute("""
289
+ SELECT session_id, created_at, last_active
290
+ FROM sessions
291
+ ORDER BY last_active DESC
292
+ LIMIT ?
293
+ """, (limit,))
294
+
295
+ results = cursor.fetchall()
296
+ conn.close()
297
+
298
+ return [
299
+ {
300
+ "session_id": row[0],
301
+ "created_at": row[1],
302
+ "last_active": row[2]
303
+ }
304
+ for row in results
305
+ ]
306
+
307
+ def delete(self, session_id: str) -> bool:
308
+ """
309
+ Delete session from database.
310
+
311
+ Args:
312
+ session_id: Session to delete
313
+
314
+ Returns:
315
+ True if deleted, False if not found
316
+ """
317
+ conn = sqlite3.connect(self.db_path)
318
+ cursor = conn.cursor()
319
+
320
+ cursor.execute("DELETE FROM sessions WHERE session_id = ?", (session_id,))
321
+ rows_deleted = cursor.rowcount
322
+
323
+ conn.commit()
324
+ conn.close()
325
+
326
+ return rows_deleted > 0
327
+
328
+ def cleanup_old_sessions(self, days: int = 7) -> int:
329
+ """
330
+ Delete sessions older than specified days.
331
+
332
+ Args:
333
+ days: Age threshold in days
334
+
335
+ Returns:
336
+ Number of sessions deleted
337
+
338
+ Example:
339
+ # Delete sessions older than 7 days
340
+ deleted = store.cleanup_old_sessions(days=7)
341
+ print(f"Cleaned up {deleted} old sessions")
342
+ """
343
+ conn = sqlite3.connect(self.db_path)
344
+ cursor = conn.cursor()
345
+
346
+ cutoff_time = (datetime.now() - timedelta(days=days)).isoformat()
347
+
348
+ cursor.execute("DELETE FROM sessions WHERE last_active < ?", (cutoff_time,))
349
+ rows_deleted = cursor.rowcount
350
+
351
+ conn.commit()
352
+ conn.close()
353
+
354
+ return rows_deleted
355
+
356
+ def get_session_count(self) -> int:
357
+ """
358
+ Get total number of sessions in database.
359
+
360
+ Returns:
361
+ Session count
362
+ """
363
+ conn = sqlite3.connect(self.db_path)
364
+ cursor = conn.cursor()
365
+
366
+ cursor.execute("SELECT COUNT(*) FROM sessions")
367
+ count = cursor.fetchone()[0]
368
+
369
+ conn.close()
370
+ return count
@@ -0,0 +1,19 @@
1
+ """Storage abstraction for artifacts (models, plots, reports)."""
2
+
3
+ from .artifact_store import ArtifactStore, get_artifact_store, reset_artifact_store
4
+ from .helpers import (
5
+ save_model_with_store,
6
+ save_plot_with_store,
7
+ save_report_with_store,
8
+ save_data_with_store
9
+ )
10
+
11
+ __all__ = [
12
+ "ArtifactStore",
13
+ "get_artifact_store",
14
+ "reset_artifact_store",
15
+ "save_model_with_store",
16
+ "save_plot_with_store",
17
+ "save_report_with_store",
18
+ "save_data_with_store"
19
+ ]