ds-agent-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ds-agent.js +451 -0
- package/ds_agent/__init__.py +8 -0
- package/package.json +28 -0
- package/requirements.txt +126 -0
- package/setup.py +35 -0
- package/src/__init__.py +7 -0
- package/src/_compress_tool_result.py +118 -0
- package/src/api/__init__.py +4 -0
- package/src/api/app.py +1626 -0
- package/src/cache/__init__.py +5 -0
- package/src/cache/cache_manager.py +561 -0
- package/src/cli.py +2886 -0
- package/src/dynamic_prompts.py +281 -0
- package/src/orchestrator.py +4799 -0
- package/src/progress_manager.py +139 -0
- package/src/reasoning/__init__.py +332 -0
- package/src/reasoning/business_summary.py +431 -0
- package/src/reasoning/data_understanding.py +356 -0
- package/src/reasoning/model_explanation.py +383 -0
- package/src/reasoning/reasoning_trace.py +239 -0
- package/src/registry/__init__.py +3 -0
- package/src/registry/tools_registry.py +3 -0
- package/src/session_memory.py +448 -0
- package/src/session_store.py +370 -0
- package/src/storage/__init__.py +19 -0
- package/src/storage/artifact_store.py +620 -0
- package/src/storage/helpers.py +116 -0
- package/src/storage/huggingface_storage.py +694 -0
- package/src/storage/r2_storage.py +0 -0
- package/src/storage/user_files_service.py +288 -0
- package/src/tools/__init__.py +335 -0
- package/src/tools/advanced_analysis.py +823 -0
- package/src/tools/advanced_feature_engineering.py +708 -0
- package/src/tools/advanced_insights.py +578 -0
- package/src/tools/advanced_preprocessing.py +549 -0
- package/src/tools/advanced_training.py +906 -0
- package/src/tools/agent_tool_mapping.py +326 -0
- package/src/tools/auto_pipeline.py +420 -0
- package/src/tools/autogluon_training.py +1480 -0
- package/src/tools/business_intelligence.py +860 -0
- package/src/tools/cloud_data_sources.py +581 -0
- package/src/tools/code_interpreter.py +390 -0
- package/src/tools/computer_vision.py +614 -0
- package/src/tools/data_cleaning.py +614 -0
- package/src/tools/data_profiling.py +593 -0
- package/src/tools/data_type_conversion.py +268 -0
- package/src/tools/data_wrangling.py +433 -0
- package/src/tools/eda_reports.py +284 -0
- package/src/tools/enhanced_feature_engineering.py +241 -0
- package/src/tools/feature_engineering.py +302 -0
- package/src/tools/matplotlib_visualizations.py +1327 -0
- package/src/tools/model_training.py +520 -0
- package/src/tools/nlp_text_analytics.py +761 -0
- package/src/tools/plotly_visualizations.py +497 -0
- package/src/tools/production_mlops.py +852 -0
- package/src/tools/time_series.py +507 -0
- package/src/tools/tools_registry.py +2133 -0
- package/src/tools/visualization_engine.py +559 -0
- package/src/utils/__init__.py +42 -0
- package/src/utils/error_recovery.py +313 -0
- package/src/utils/parallel_executor.py +402 -0
- package/src/utils/polars_helpers.py +248 -0
- package/src/utils/schema_extraction.py +132 -0
- package/src/utils/semantic_layer.py +392 -0
- package/src/utils/token_budget.py +411 -0
- package/src/utils/validation.py +377 -0
- package/src/workflow_state.py +154 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session Storage Manager
|
|
3
|
+
Persists session memory to SQLite database for cross-session continuity.
|
|
4
|
+
|
|
5
|
+
Enables users to resume conversations even after restarting the agent.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import json
|
|
10
|
+
from typing import Optional, List, Dict, Any
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
|
|
14
|
+
from session_memory import SessionMemory
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SessionStore:
|
|
18
|
+
"""
|
|
19
|
+
Persistent storage for session memory using SQLite.
|
|
20
|
+
|
|
21
|
+
Features:
|
|
22
|
+
- Save/load sessions by ID
|
|
23
|
+
- Resume most recent session automatically
|
|
24
|
+
- Cleanup old sessions
|
|
25
|
+
- List all sessions
|
|
26
|
+
|
|
27
|
+
Storage location: ./cache_db/sessions.db
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, db_path: str = "./cache_db/sessions.db"):
|
|
31
|
+
"""
|
|
32
|
+
Initialize session store.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
db_path: Path to SQLite database file
|
|
36
|
+
"""
|
|
37
|
+
self.db_path = db_path
|
|
38
|
+
|
|
39
|
+
# Create directory if it doesn't exist
|
|
40
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
# Initialize database schema
|
|
43
|
+
self._init_database()
|
|
44
|
+
|
|
45
|
+
def _init_database(self):
|
|
46
|
+
"""Create sessions table if it doesn't exist."""
|
|
47
|
+
try:
|
|
48
|
+
conn = sqlite3.connect(self.db_path)
|
|
49
|
+
cursor = conn.cursor()
|
|
50
|
+
|
|
51
|
+
cursor.execute("""
|
|
52
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
53
|
+
session_id TEXT PRIMARY KEY,
|
|
54
|
+
created_at TEXT NOT NULL,
|
|
55
|
+
last_active TEXT NOT NULL,
|
|
56
|
+
context_json TEXT NOT NULL
|
|
57
|
+
)
|
|
58
|
+
""")
|
|
59
|
+
|
|
60
|
+
# Create index on last_active for faster queries
|
|
61
|
+
cursor.execute("""
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_last_active
|
|
63
|
+
ON sessions(last_active DESC)
|
|
64
|
+
""")
|
|
65
|
+
|
|
66
|
+
conn.commit()
|
|
67
|
+
conn.close()
|
|
68
|
+
print(f"✅ Sessions database initialized at {self.db_path}")
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"⚠️ Failed to initialize sessions database: {e}")
|
|
71
|
+
# Try to recreate the database if corrupted
|
|
72
|
+
try:
|
|
73
|
+
Path(self.db_path).unlink(missing_ok=True)
|
|
74
|
+
print(f" Deleted corrupted database, reinitializing...")
|
|
75
|
+
conn = sqlite3.connect(self.db_path)
|
|
76
|
+
cursor = conn.cursor()
|
|
77
|
+
|
|
78
|
+
cursor.execute("""
|
|
79
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
80
|
+
session_id TEXT PRIMARY KEY,
|
|
81
|
+
created_at TEXT NOT NULL,
|
|
82
|
+
last_active TEXT NOT NULL,
|
|
83
|
+
context_json TEXT NOT NULL
|
|
84
|
+
)
|
|
85
|
+
""")
|
|
86
|
+
|
|
87
|
+
cursor.execute("""
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_last_active
|
|
89
|
+
ON sessions(last_active DESC)
|
|
90
|
+
""")
|
|
91
|
+
|
|
92
|
+
conn.commit()
|
|
93
|
+
conn.close()
|
|
94
|
+
print(f"✅ Sessions database reinitialized successfully")
|
|
95
|
+
except Exception as retry_error:
|
|
96
|
+
print(f"❌ Failed to reinitialize sessions database: {retry_error}")
|
|
97
|
+
|
|
98
|
+
def _make_json_serializable(self, obj: Any) -> Any:
|
|
99
|
+
"""
|
|
100
|
+
Convert objects to JSON-serializable format.
|
|
101
|
+
Handles matplotlib Figures, plotly Figures, numpy arrays, datetime objects, and other non-serializable types.
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
import numpy as np
|
|
105
|
+
except ImportError:
|
|
106
|
+
np = None
|
|
107
|
+
|
|
108
|
+
# Handle dictionaries recursively
|
|
109
|
+
if isinstance(obj, dict):
|
|
110
|
+
return {k: self._make_json_serializable(v) for k, v in obj.items()}
|
|
111
|
+
|
|
112
|
+
# Handle lists recursively
|
|
113
|
+
elif isinstance(obj, (list, tuple)):
|
|
114
|
+
return [self._make_json_serializable(item) for item in obj]
|
|
115
|
+
|
|
116
|
+
# Handle datetime objects
|
|
117
|
+
elif isinstance(obj, (datetime, timedelta)):
|
|
118
|
+
return obj.isoformat()
|
|
119
|
+
|
|
120
|
+
# Handle matplotlib Figure objects
|
|
121
|
+
elif hasattr(obj, '__class__') and 'Figure' in obj.__class__.__name__:
|
|
122
|
+
return f"<{obj.__class__.__name__} object: {id(obj)}>"
|
|
123
|
+
|
|
124
|
+
# Handle numpy arrays
|
|
125
|
+
elif np and isinstance(obj, np.ndarray):
|
|
126
|
+
return f"<NumPy array: shape={obj.shape}>"
|
|
127
|
+
|
|
128
|
+
# Handle numpy scalar types
|
|
129
|
+
elif hasattr(obj, 'item') and callable(obj.item):
|
|
130
|
+
try:
|
|
131
|
+
return obj.item()
|
|
132
|
+
except:
|
|
133
|
+
return str(obj)
|
|
134
|
+
|
|
135
|
+
# Handle other non-serializable objects (dataframes, models, etc.)
|
|
136
|
+
elif hasattr(obj, '__dict__') and not isinstance(obj, (str, int, float, bool, type(None))):
|
|
137
|
+
# Check if it's a common non-serializable type
|
|
138
|
+
class_name = obj.__class__.__name__
|
|
139
|
+
if class_name in ['DataFrame', 'Series', 'Model', 'Pipeline', 'Figure']:
|
|
140
|
+
return f"<{class_name} object: {id(obj)}>"
|
|
141
|
+
return f"<{class_name} object>"
|
|
142
|
+
|
|
143
|
+
# Already serializable
|
|
144
|
+
return obj
|
|
145
|
+
|
|
146
|
+
def save(self, session: SessionMemory):
|
|
147
|
+
"""
|
|
148
|
+
Save session to database.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
session: SessionMemory instance to save
|
|
152
|
+
"""
|
|
153
|
+
try:
|
|
154
|
+
conn = sqlite3.connect(self.db_path)
|
|
155
|
+
cursor = conn.cursor()
|
|
156
|
+
|
|
157
|
+
# Serialize session to JSON - clean non-serializable objects first
|
|
158
|
+
data = session.to_dict()
|
|
159
|
+
clean_data = self._make_json_serializable(data)
|
|
160
|
+
|
|
161
|
+
cursor.execute("""
|
|
162
|
+
INSERT OR REPLACE INTO sessions (session_id, created_at, last_active, context_json)
|
|
163
|
+
VALUES (?, ?, ?, ?)
|
|
164
|
+
""", (
|
|
165
|
+
session.session_id,
|
|
166
|
+
session.created_at.isoformat(),
|
|
167
|
+
session.last_active.isoformat(),
|
|
168
|
+
json.dumps(clean_data)
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
conn.commit()
|
|
172
|
+
conn.close()
|
|
173
|
+
except sqlite3.OperationalError as e:
|
|
174
|
+
if "no such table" in str(e):
|
|
175
|
+
print(f"⚠️ Sessions table not found, reinitializing database...")
|
|
176
|
+
self._init_database()
|
|
177
|
+
# Retry save after reinitialization
|
|
178
|
+
try:
|
|
179
|
+
conn = sqlite3.connect(self.db_path)
|
|
180
|
+
cursor = conn.cursor()
|
|
181
|
+
|
|
182
|
+
data = session.to_dict()
|
|
183
|
+
clean_data = self._make_json_serializable(data)
|
|
184
|
+
|
|
185
|
+
cursor.execute("""
|
|
186
|
+
INSERT OR REPLACE INTO sessions (session_id, created_at, last_active, context_json)
|
|
187
|
+
VALUES (?, ?, ?, ?)
|
|
188
|
+
""", (
|
|
189
|
+
session.session_id,
|
|
190
|
+
session.created_at.isoformat(),
|
|
191
|
+
session.last_active.isoformat(),
|
|
192
|
+
json.dumps(clean_data)
|
|
193
|
+
))
|
|
194
|
+
|
|
195
|
+
conn.commit()
|
|
196
|
+
conn.close()
|
|
197
|
+
print(f"✅ Session saved successfully after database reinitialization")
|
|
198
|
+
except Exception as retry_error:
|
|
199
|
+
print(f"❌ Failed to save session after reinitialization: {retry_error}")
|
|
200
|
+
raise
|
|
201
|
+
else:
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
def load(self, session_id: str) -> Optional[SessionMemory]:
|
|
205
|
+
"""
|
|
206
|
+
Load session from database by ID.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
session_id: Unique session identifier
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
SessionMemory instance or None if not found
|
|
213
|
+
"""
|
|
214
|
+
conn = sqlite3.connect(self.db_path)
|
|
215
|
+
cursor = conn.cursor()
|
|
216
|
+
|
|
217
|
+
cursor.execute("""
|
|
218
|
+
SELECT context_json FROM sessions WHERE session_id = ?
|
|
219
|
+
""", (session_id,))
|
|
220
|
+
|
|
221
|
+
result = cursor.fetchone()
|
|
222
|
+
conn.close()
|
|
223
|
+
|
|
224
|
+
if not result:
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
# Deserialize JSON to SessionMemory
|
|
228
|
+
data = json.loads(result[0])
|
|
229
|
+
return SessionMemory.from_dict(data)
|
|
230
|
+
|
|
231
|
+
def get_recent_session(self, max_age_hours: int = 24) -> Optional[SessionMemory]:
|
|
232
|
+
"""
|
|
233
|
+
Get most recent active session within time window.
|
|
234
|
+
|
|
235
|
+
Useful for automatic session resumption when user returns.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
max_age_hours: Maximum age in hours (default: 24)
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Most recent SessionMemory or None if no recent sessions
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
# Resume conversation from yesterday
|
|
245
|
+
session = store.get_recent_session(max_age_hours=24)
|
|
246
|
+
if session:
|
|
247
|
+
print(f"Resuming session: {session.last_dataset}")
|
|
248
|
+
"""
|
|
249
|
+
conn = sqlite3.connect(self.db_path)
|
|
250
|
+
cursor = conn.cursor()
|
|
251
|
+
|
|
252
|
+
cutoff_time = (datetime.now() - timedelta(hours=max_age_hours)).isoformat()
|
|
253
|
+
|
|
254
|
+
cursor.execute("""
|
|
255
|
+
SELECT context_json FROM sessions
|
|
256
|
+
WHERE last_active > ?
|
|
257
|
+
ORDER BY last_active DESC
|
|
258
|
+
LIMIT 1
|
|
259
|
+
""", (cutoff_time,))
|
|
260
|
+
|
|
261
|
+
result = cursor.fetchone()
|
|
262
|
+
conn.close()
|
|
263
|
+
|
|
264
|
+
if not result:
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
data = json.loads(result[0])
|
|
268
|
+
return SessionMemory.from_dict(data)
|
|
269
|
+
|
|
270
|
+
def list_sessions(self, limit: int = 10) -> List[Dict[str, str]]:
|
|
271
|
+
"""
|
|
272
|
+
List recent sessions with basic info.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
limit: Maximum number of sessions to return
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
List of session info dicts with id, created_at, last_active
|
|
279
|
+
|
|
280
|
+
Example:
|
|
281
|
+
sessions = store.list_sessions(limit=5)
|
|
282
|
+
for s in sessions:
|
|
283
|
+
print(f"{s['session_id']}: {s['last_active']}")
|
|
284
|
+
"""
|
|
285
|
+
conn = sqlite3.connect(self.db_path)
|
|
286
|
+
cursor = conn.cursor()
|
|
287
|
+
|
|
288
|
+
cursor.execute("""
|
|
289
|
+
SELECT session_id, created_at, last_active
|
|
290
|
+
FROM sessions
|
|
291
|
+
ORDER BY last_active DESC
|
|
292
|
+
LIMIT ?
|
|
293
|
+
""", (limit,))
|
|
294
|
+
|
|
295
|
+
results = cursor.fetchall()
|
|
296
|
+
conn.close()
|
|
297
|
+
|
|
298
|
+
return [
|
|
299
|
+
{
|
|
300
|
+
"session_id": row[0],
|
|
301
|
+
"created_at": row[1],
|
|
302
|
+
"last_active": row[2]
|
|
303
|
+
}
|
|
304
|
+
for row in results
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
def delete(self, session_id: str) -> bool:
|
|
308
|
+
"""
|
|
309
|
+
Delete session from database.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
session_id: Session to delete
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
True if deleted, False if not found
|
|
316
|
+
"""
|
|
317
|
+
conn = sqlite3.connect(self.db_path)
|
|
318
|
+
cursor = conn.cursor()
|
|
319
|
+
|
|
320
|
+
cursor.execute("DELETE FROM sessions WHERE session_id = ?", (session_id,))
|
|
321
|
+
rows_deleted = cursor.rowcount
|
|
322
|
+
|
|
323
|
+
conn.commit()
|
|
324
|
+
conn.close()
|
|
325
|
+
|
|
326
|
+
return rows_deleted > 0
|
|
327
|
+
|
|
328
|
+
def cleanup_old_sessions(self, days: int = 7) -> int:
|
|
329
|
+
"""
|
|
330
|
+
Delete sessions older than specified days.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
days: Age threshold in days
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Number of sessions deleted
|
|
337
|
+
|
|
338
|
+
Example:
|
|
339
|
+
# Delete sessions older than 7 days
|
|
340
|
+
deleted = store.cleanup_old_sessions(days=7)
|
|
341
|
+
print(f"Cleaned up {deleted} old sessions")
|
|
342
|
+
"""
|
|
343
|
+
conn = sqlite3.connect(self.db_path)
|
|
344
|
+
cursor = conn.cursor()
|
|
345
|
+
|
|
346
|
+
cutoff_time = (datetime.now() - timedelta(days=days)).isoformat()
|
|
347
|
+
|
|
348
|
+
cursor.execute("DELETE FROM sessions WHERE last_active < ?", (cutoff_time,))
|
|
349
|
+
rows_deleted = cursor.rowcount
|
|
350
|
+
|
|
351
|
+
conn.commit()
|
|
352
|
+
conn.close()
|
|
353
|
+
|
|
354
|
+
return rows_deleted
|
|
355
|
+
|
|
356
|
+
def get_session_count(self) -> int:
|
|
357
|
+
"""
|
|
358
|
+
Get total number of sessions in database.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Session count
|
|
362
|
+
"""
|
|
363
|
+
conn = sqlite3.connect(self.db_path)
|
|
364
|
+
cursor = conn.cursor()
|
|
365
|
+
|
|
366
|
+
cursor.execute("SELECT COUNT(*) FROM sessions")
|
|
367
|
+
count = cursor.fetchone()[0]
|
|
368
|
+
|
|
369
|
+
conn.close()
|
|
370
|
+
return count
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Storage abstraction for artifacts (models, plots, reports)."""
|
|
2
|
+
|
|
3
|
+
from .artifact_store import ArtifactStore, get_artifact_store, reset_artifact_store
|
|
4
|
+
from .helpers import (
|
|
5
|
+
save_model_with_store,
|
|
6
|
+
save_plot_with_store,
|
|
7
|
+
save_report_with_store,
|
|
8
|
+
save_data_with_store
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ArtifactStore",
|
|
13
|
+
"get_artifact_store",
|
|
14
|
+
"reset_artifact_store",
|
|
15
|
+
"save_model_with_store",
|
|
16
|
+
"save_plot_with_store",
|
|
17
|
+
"save_report_with_store",
|
|
18
|
+
"save_data_with_store"
|
|
19
|
+
]
|