ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,451 @@
1
+ #!/usr/bin/env node
2
+
3
+ "use strict";
4
+
5
+ const fs = require("fs");
6
+ const os = require("os");
7
+ const path = require("path");
8
+ const { spawnSync } = require("child_process");
9
+
10
+ const pkg = require("../package.json");
11
+ const projectRoot = path.resolve(__dirname, "..");
12
+ const appHome = process.env.DS_AGENT_HOME || path.join(os.homedir(), ".ds-agent");
13
+ const venvDir = path.join(appHome, "venv");
14
+ const stateFile = path.join(appHome, "install-state.json");
15
+ const requirementsFile = path.join(projectRoot, "requirements.txt");
16
+ const cliScript = path.join(projectRoot, "src", "cli.py");
17
+ const INSTALL_SCHEMA_VERSION = 3;
18
+
19
+ const HEAVY_GROUPS = ["runtime", "data", "frame", "plot", "forecast", "nlp", "bi", "ml", "report", "automl"];
20
+
21
+ const COMMAND_GROUPS = {
22
+ quickstart: [],
23
+ analyze: HEAVY_GROUPS,
24
+ pipeline: HEAVY_GROUPS,
25
+ chat: HEAVY_GROUPS,
26
+ "cache-stats": HEAVY_GROUPS,
27
+ "clear-cache": HEAVY_GROUPS,
28
+ tune: ["data", "frame", "ml", "plot", "automl"],
29
+ train: ["data", "frame", "ml", "plot"],
30
+ clean: ["data", "frame"],
31
+ profile: ["data", "frame"],
32
+ eda: ["data", "frame", "report", "plot"],
33
+ report: ["data", "report"],
34
+ compare: ["data", "frame", "report", "plot"],
35
+ bi: ["data", "frame", "bi", "plot"],
36
+ plot: ["data", "frame", "plot"],
37
+ forecast: ["data", "forecast", "plot"],
38
+ nlp: ["data", "nlp"],
39
+ sessions: [],
40
+ };
41
+
42
+ const GROUP_DEFS = {
43
+ core: {
44
+ version: 2,
45
+ packages: [
46
+ "typer>=0.12.3,<0.16.0",
47
+ "click>=8.1.7,<8.2.0",
48
+ "rich==13.7.0",
49
+ "python-dotenv==1.0.0",
50
+ "pydantic>=2.11.7",
51
+ "setuptools<81",
52
+ ],
53
+ },
54
+ data: {
55
+ version: 2,
56
+ packages: [
57
+ "numpy>=1.26.0,<2.3.0",
58
+ "pandas>=2.2.0,<3.0.0",
59
+ ],
60
+ },
61
+ frame: {
62
+ version: 1,
63
+ packages: [
64
+ "polars>=0.20.3",
65
+ "pyarrow>=14.0.1",
66
+ "duckdb>=0.10.0",
67
+ "joblib==1.3.2",
68
+ ],
69
+ },
70
+ report: {
71
+ version: 1,
72
+ packages: ["ydata-profiling>=4.17.0", "sweetviz>=2.3.0"],
73
+ },
74
+ plot: {
75
+ version: 1,
76
+ packages: ["matplotlib>=3.8.2", "seaborn>=0.13.1", "plotly>=5.18.0"],
77
+ },
78
+ forecast: {
79
+ version: 1,
80
+ packages: ["prophet>=1.1.5", "holidays>=0.40", "pmdarima>=2.0", "statsmodels>=0.14.1"],
81
+ },
82
+ nlp: {
83
+ version: 1,
84
+ packages: [
85
+ "textblob>=0.17.1",
86
+ "vaderSentiment>=3.3.2",
87
+ "sentence-transformers>=2.2.2",
88
+ "tiktoken>=0.5.2",
89
+ ],
90
+ },
91
+ bi: {
92
+ version: 1,
93
+ packages: ["scipy>=1.11.4", "statsmodels>=0.14.1", "dowhy>=0.11"],
94
+ },
95
+ ml: {
96
+ version: 1,
97
+ packages: [
98
+ "scikit-learn>=1.4.0",
99
+ "xgboost>=2.0.3",
100
+ "lightgbm>=4.6.0",
101
+ "catboost>=1.2.8",
102
+ "optuna>=3.5.0",
103
+ "shap>=0.44.1",
104
+ "imbalanced-learn>=0.12.0",
105
+ "cleanlab>=2.6",
106
+ "pandera>=0.18",
107
+ "boruta>=0.3",
108
+ ],
109
+ },
110
+ runtime: {
111
+ version: 1,
112
+ packages: [
113
+ "groq>=0.13.0",
114
+ "mistralai>=0.0.7",
115
+ "supabase==2.28.3",
116
+ "boto3>=1.28.0",
117
+ "huggingface_hub>=0.20.0",
118
+ "fastapi>=0.109.0",
119
+ "uvicorn>=0.25.0",
120
+ "python-multipart>=0.0.6",
121
+ "google-cloud-bigquery==3.14.1",
122
+ "google-cloud-storage==2.14.0",
123
+ "google-auth==2.25.2",
124
+ "google-generativeai==0.3.2",
125
+ "lime==0.2.0.1",
126
+ "fairlearn==0.10.0",
127
+ "evidently>=0.4.0",
128
+ "dtreeviz>=2.2",
129
+ "Pillow>=11.0.0",
130
+ ],
131
+ },
132
+ automl: {
133
+ version: 1,
134
+ packages: ["autogluon.tabular>=1.2", "autogluon.timeseries>=1.2", "holidays>=0.40"],
135
+ },
136
+ };
137
+
138
+ const GROUP_INSTALL_ORDER = ["core", "runtime", "data", "frame", "plot", "forecast", "nlp", "bi", "ml", "report", "automl"];
139
+
140
+ function log(msg) {
141
+ process.stderr.write(`${msg}\n`);
142
+ }
143
+
144
+ function pipEnv() {
145
+ return {
146
+ ...process.env,
147
+ PIP_PROGRESS_BAR: "off",
148
+ PIP_DISABLE_PIP_VERSION_CHECK: "1",
149
+ };
150
+ }
151
+
152
+ function runCommand(command, args, options = {}) {
153
+ const result = spawnSync(command, args, {
154
+ stdio: "inherit",
155
+ ...options,
156
+ });
157
+
158
+ if (result.error) {
159
+ throw result.error;
160
+ }
161
+
162
+ if (result.status !== 0) {
163
+ throw new Error(`Command failed: ${command} ${args.join(" ")}`);
164
+ }
165
+ }
166
+
167
+ function runCapture(command, args) {
168
+ const result = spawnSync(command, args, { encoding: "utf8" });
169
+ if (result.error || result.status !== 0) {
170
+ return null;
171
+ }
172
+ return (result.stdout || result.stderr || "").trim();
173
+ }
174
+
175
+ function ensureDir(dirPath) {
176
+ fs.mkdirSync(dirPath, { recursive: true });
177
+ }
178
+
179
+ function getVenvPython() {
180
+ if (process.platform === "win32") {
181
+ return path.join(venvDir, "Scripts", "python.exe");
182
+ }
183
+ return path.join(venvDir, "bin", "python");
184
+ }
185
+
186
+ function detectPython() {
187
+ const candidates = [];
188
+ if (process.env.DS_AGENT_PYTHON) {
189
+ candidates.push(process.env.DS_AGENT_PYTHON);
190
+ }
191
+ candidates.push("python3.11", "python3.12", "python3.10", "python3.13", "python3", "python");
192
+
193
+ function parseVersion(text) {
194
+ const match = String(text || "").match(/Python\s+(\d+)\.(\d+)\.(\d+)/i);
195
+ if (!match) {
196
+ return null;
197
+ }
198
+ return {
199
+ major: Number(match[1]),
200
+ minor: Number(match[2]),
201
+ patch: Number(match[3]),
202
+ };
203
+ }
204
+
205
+ function isSupported(version) {
206
+ if (!version) {
207
+ return false;
208
+ }
209
+ if (version.major !== 3) {
210
+ return false;
211
+ }
212
+ // AutoGluon and other pinned dependencies currently support 3.10-3.13 best.
213
+ return version.minor >= 10 && version.minor <= 13;
214
+ }
215
+
216
+ for (const candidate of candidates) {
217
+ const out = runCapture(candidate, ["--version"]);
218
+ const parsed = parseVersion(out);
219
+ if (parsed && isSupported(parsed)) {
220
+ return {
221
+ command: candidate,
222
+ version: `${parsed.major}.${parsed.minor}.${parsed.patch}`,
223
+ };
224
+ }
225
+ }
226
+
227
+ return null;
228
+ }
229
+
230
+ function boolEnv(name) {
231
+ const raw = String(process.env[name] || "").toLowerCase().trim();
232
+ return raw === "1" || raw === "true" || raw === "yes" || raw === "on";
233
+ }
234
+
235
+ function detectCommand(args) {
236
+ for (const arg of args) {
237
+ if (!arg || arg === "--") {
238
+ continue;
239
+ }
240
+ if (arg.startsWith("-")) {
241
+ continue;
242
+ }
243
+ return arg;
244
+ }
245
+ return null;
246
+ }
247
+
248
+ function requestedGroups(args) {
249
+ const command = detectCommand(args);
250
+
251
+ if (boolEnv("DS_AGENT_INSTALL_ALL")) {
252
+ return { command, groups: [...GROUP_INSTALL_ORDER] };
253
+ }
254
+
255
+ const groups = new Set(["core"]);
256
+ const mapped = COMMAND_GROUPS[command] || [];
257
+ for (const groupName of mapped) {
258
+ groups.add(groupName);
259
+ }
260
+
261
+ // For unknown commands or typos, keep installs minimal (core only).
262
+ // This prevents long installs before Click/Typer returns a command error.
263
+
264
+ return {
265
+ command,
266
+ groups: GROUP_INSTALL_ORDER.filter((name) => groups.has(name)),
267
+ };
268
+ }
269
+
270
+ function readState() {
271
+ try {
272
+ if (!fs.existsSync(stateFile)) {
273
+ return null;
274
+ }
275
+ const raw = fs.readFileSync(stateFile, "utf8");
276
+ return JSON.parse(raw);
277
+ } catch (_err) {
278
+ return null;
279
+ }
280
+ }
281
+
282
+ function writeState(state) {
283
+ fs.writeFileSync(stateFile, JSON.stringify(state, null, 2), "utf8");
284
+ }
285
+
286
+ function installPackageSet(venvPython, packages, label) {
287
+ if (!packages || packages.length === 0) {
288
+ return;
289
+ }
290
+
291
+ log(`[ds-agent] Installing ${label} dependencies...`);
292
+ runCommand(
293
+ venvPython,
294
+ [
295
+ "-m",
296
+ "pip",
297
+ "install",
298
+ "--disable-pip-version-check",
299
+ "--progress-bar",
300
+ "off",
301
+ "--upgrade-strategy",
302
+ "only-if-needed",
303
+ "--no-input",
304
+ ...packages,
305
+ ],
306
+ { env: pipEnv() }
307
+ );
308
+ }
309
+
310
+ function ensureGroupInstalled(venvPython, state, groupName) {
311
+ const groupDef = GROUP_DEFS[groupName];
312
+ if (!groupDef) {
313
+ return;
314
+ }
315
+
316
+ if ((state.installedGroups || {})[groupName] === groupDef.version) {
317
+ return;
318
+ }
319
+
320
+ installPackageSet(venvPython, groupDef.packages, groupName);
321
+ state.installedGroups[groupName] = groupDef.version;
322
+ state.lastInstalledAt = new Date().toISOString();
323
+ writeState(state);
324
+ }
325
+
326
+ function bootstrap(args) {
327
+ ensureDir(appHome);
328
+
329
+ const python = detectPython();
330
+ if (!python) {
331
+ throw new Error(
332
+ "No compatible Python found. Install Python 3.10-3.13 or set DS_AGENT_PYTHON to a compatible executable."
333
+ );
334
+ }
335
+
336
+ const venvPython = getVenvPython();
337
+
338
+ if (!fs.existsSync(venvPython)) {
339
+ log("[ds-agent] Creating Python virtual environment...");
340
+ runCommand(python.command, ["-m", "venv", venvDir]);
341
+ }
342
+
343
+ let state = readState();
344
+ const hardResetNeeded =
345
+ !state ||
346
+ state.installSchemaVersion !== INSTALL_SCHEMA_VERSION ||
347
+ state.packageVersion !== pkg.version ||
348
+ state.pythonCommand !== python.command ||
349
+ state.pythonVersion !== python.version;
350
+
351
+ if (hardResetNeeded) {
352
+ state = {
353
+ installSchemaVersion: INSTALL_SCHEMA_VERSION,
354
+ packageVersion: pkg.version,
355
+ pythonCommand: python.command,
356
+ pythonVersion: python.version,
357
+ requirementsMtime: fs.existsSync(requirementsFile) ? fs.statSync(requirementsFile).mtimeMs : null,
358
+ installedGroups: {},
359
+ updatedAt: new Date().toISOString(),
360
+ };
361
+ writeState(state);
362
+ }
363
+
364
+ if (!(state.installedGroups || {}).core) {
365
+ log("[ds-agent] Preparing Python toolchain...");
366
+ runCommand(
367
+ venvPython,
368
+ [
369
+ "-m",
370
+ "pip",
371
+ "install",
372
+ "--upgrade",
373
+ "--disable-pip-version-check",
374
+ "--progress-bar",
375
+ "off",
376
+ "pip",
377
+ "setuptools<81",
378
+ "wheel",
379
+ ],
380
+ { env: pipEnv() }
381
+ );
382
+ ensureGroupInstalled(venvPython, state, "core");
383
+ }
384
+
385
+ const request = requestedGroups(args);
386
+ for (const groupName of request.groups) {
387
+ ensureGroupInstalled(venvPython, state, groupName);
388
+ }
389
+
390
+ if (boolEnv("DS_AGENT_INSTALL_ALL")) {
391
+ log("[ds-agent] DS_AGENT_INSTALL_ALL is enabled, syncing full requirements.txt...");
392
+ runCommand(
393
+ venvPython,
394
+ [
395
+ "-m",
396
+ "pip",
397
+ "install",
398
+ "--disable-pip-version-check",
399
+ "--progress-bar",
400
+ "off",
401
+ "--upgrade-strategy",
402
+ "only-if-needed",
403
+ "--no-input",
404
+ "-r",
405
+ requirementsFile,
406
+ ],
407
+ { env: pipEnv() }
408
+ );
409
+ }
410
+
411
+ state.updatedAt = new Date().toISOString();
412
+ writeState(state);
413
+
414
+ return venvPython;
415
+ }
416
+
417
+ function runCli() {
418
+ const args = process.argv.slice(2);
419
+ const venvPython = bootstrap(args);
420
+
421
+ const disableTqdm = !boolEnv("DS_AGENT_VERBOSE_PROGRESS");
422
+
423
+ const env = {
424
+ ...process.env,
425
+ PIP_PROGRESS_BAR: process.env.PIP_PROGRESS_BAR || "off",
426
+ TQDM_DISABLE: disableTqdm ? "1" : process.env.TQDM_DISABLE || "0",
427
+ HF_HUB_DISABLE_PROGRESS_BARS: disableTqdm ? "1" : process.env.HF_HUB_DISABLE_PROGRESS_BARS || "0",
428
+ TOKENIZERS_PARALLELISM: process.env.TOKENIZERS_PARALLELISM || "false",
429
+ PYTHONPATH: process.env.PYTHONPATH
430
+ ? `${projectRoot}${path.delimiter}${process.env.PYTHONPATH}`
431
+ : projectRoot,
432
+ };
433
+
434
+ const result = spawnSync(venvPython, [cliScript, ...args], {
435
+ stdio: "inherit",
436
+ env,
437
+ });
438
+
439
+ if (result.error) {
440
+ throw result.error;
441
+ }
442
+
443
+ process.exit(result.status || 0);
444
+ }
445
+
446
+ try {
447
+ runCli();
448
+ } catch (err) {
449
+ log(`[ds-agent] ${err.message || String(err)}`);
450
+ process.exit(1);
451
+ }
@@ -0,0 +1,8 @@
1
+ from pathlib import Path
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Allow ds_agent.* imports to resolve modules currently stored under src/.
6
+ _src_dir = Path(__file__).resolve().parent.parent / "src"
7
+ if _src_dir.exists():
8
+ __path__.append(str(_src_dir))
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "ds-agent-cli",
3
+ "version": "0.1.0",
4
+ "description": "DS-Agent CLI wrapper for npm global install",
5
+ "license": "MIT",
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
9
+ "bin": {
10
+ "ds-agent": "bin/ds-agent.js"
11
+ },
12
+ "files": [
13
+ "bin/ds-agent.js",
14
+ "src/**/*.py",
15
+ "ds_agent/__init__.py",
16
+ "requirements.txt",
17
+ "setup.py"
18
+ ],
19
+ "engines": {
20
+ "node": ">=18"
21
+ },
22
+ "keywords": [
23
+ "data-science",
24
+ "cli",
25
+ "python",
26
+ "ml"
27
+ ]
28
+ }
@@ -0,0 +1,126 @@
1
+ # Core Dependencies
2
+ groq>=0.13.0 # Updated for httpx compatibility
3
+ mistralai>=0.0.7 # Mistral AI - 1B tokens/month (corrected version)
4
+ python-dotenv==1.0.0
5
+
6
+ # Data Processing
7
+ polars>=0.20.3
8
+ duckdb>=0.10.0
9
+ pyarrow>=14.0.1
10
+ pandas>=2.2.0 # Updated for Python 3.13 compatibility
11
+
12
+ # Machine Learning
13
+ scikit-learn>=1.4.0
14
+ xgboost>=2.0.3
15
+ lightgbm>=4.6.0
16
+ catboost>=1.2.8
17
+ optuna>=3.5.0
18
+
19
+ # AutoGluon AutoML (modular install - only tabular + timeseries)
20
+ autogluon.tabular>=1.2
21
+ autogluon.timeseries>=1.2
22
+ holidays>=0.40 # Holiday calendar for time series covariates
23
+
24
+ # Explainability
25
+ shap>=0.44.1
26
+
27
+ # Advanced ML Tools
28
+ imbalanced-learn>=0.12.0
29
+
30
+ # Statistical Analysis
31
+ scipy>=1.11.4
32
+ statsmodels>=0.14.1
33
+
34
+ # Visualization
35
+ matplotlib>=3.8.2
36
+ seaborn>=0.13.1
37
+ plotly>=5.18.0 # Interactive visualizations
38
+
39
+ # EDA Report Generation
40
+ ydata-profiling>=4.17.0 # Comprehensive automated EDA reports with Python 3.13 compatibility
41
+ sweetviz>=2.3.0 # Interactive EDA with comparison support
42
+
43
+ # User Interface
44
+ # gradio>=5.49.1
45
+
46
+ # REST API (Cloud Run)
47
+ fastapi>=0.109.0
48
+ uvicorn>=0.25.0
49
+ python-multipart>=0.0.6 # For file uploads
50
+
51
+ # Text Processing
52
+ textblob>=0.17.1
53
+ vaderSentiment>=3.3.2 # Rule-based sentiment analysis (fast, no GPU needed)
54
+
55
+ # Time Series Forecasting
56
+ prophet>=1.1.5
57
+ holidays>=0.38
58
+ pmdarima>=2.0 # Auto ARIMA (auto_arima) for optimal order selection
59
+
60
+ # MLOps & Explainability
61
+ lime==0.2.0.1
62
+ fairlearn==0.10.0
63
+ evidently>=0.4.0 # Production-grade data drift detection & monitoring
64
+ dtreeviz>=2.2 # Decision tree visualization
65
+
66
+ # NLP & Semantic Layer (REQUIRED for column understanding and agent routing)
67
+ sentence-transformers>=2.2.2 # For semantic column embeddings and agent routing
68
+ tiktoken>=0.5.2 # For accurate token counting in budget management
69
+
70
+ # Advanced NLP (Optional - Uncomment for advanced NLP tools)
71
+ # These are optional but recommended for full NLP capabilities
72
+ # spacy==3.7.2 # For named entity recognition (perform_named_entity_recognition)
73
+ # transformers==4.35.2 # For transformer-based sentiment & topic modeling
74
+ # bertopic==0.16.0 # For advanced topic modeling
75
+
76
+ # Computer Vision (Optional - Uncomment for CV tools)
77
+ # These are optional but recommended for full CV capabilities
78
+ # torch==2.1.0 # For CNN-based image feature extraction
79
+ # torchvision==0.16.0 # For pre-trained models (ResNet, EfficientNet, VGG)
80
+ Pillow>=11.0.0 # For basic image processing
81
+ #opencv-python==4.8.1 # For advanced image processing & color features
82
+
83
+ # Business Intelligence (Optional - Uncomment for advanced BI tools)
84
+ # These are optional but add specialized capabilities
85
+ # lifetimes==0.11.3 # For customer lifetime value modeling
86
+ # econml==0.15.0 # For advanced causal inference
87
+ dowhy>=0.11 # Formal causal inference with DAG-based reasoning
88
+
89
+ # Data Quality & Validation
90
+ cleanlab>=2.6 # Label error detection using confident learning
91
+ pandera>=0.18 # Schema-based DataFrame validation
92
+ boruta>=0.3 # All-relevant feature selection (BorutaPy)
93
+
94
+ # CLI & UI
95
+ typer>=0.12.3
96
+ rich==13.7.0
97
+ tqdm==4.66.1
98
+
99
+ # Utilities
100
+ pydantic>=2.11.7
101
+ joblib==1.3.2
102
+
103
+ # Google Cloud Integration
104
+ google-cloud-bigquery==3.14.1
105
+ google-cloud-storage==2.14.0 # For GCS artifact storage
106
+ google-auth==2.25.2
107
+ google-generativeai==0.3.2 # For Gemini LLM support
108
+
109
+ # Cloudflare R2 Storage (S3-compatible)
110
+ boto3>=1.28.0 # For R2 file storage
111
+
112
+ # HuggingFace Storage Integration
113
+ huggingface_hub>=0.20.0 # For storing user artifacts on HuggingFace
114
+
115
+ # Supabase Backend
116
+ supabase==2.28.3 # For user file metadata
117
+
118
+ # Testing
119
+ pytest==7.4.3
120
+ pytest-mock==3.12.0
121
+ pytest-cov==4.1.0
122
+
123
+ # Development
124
+ black==23.12.1
125
+ flake8==7.0.0
126
+ mypy==1.8.0
package/setup.py ADDED
@@ -0,0 +1,35 @@
1
+ from setuptools import find_packages, setup
2
+
3
+
4
+ setup(
5
+ name="ds-agent",
6
+ version="0.1.0",
7
+ description="Data Science Agent CLI",
8
+ packages=find_packages(),
9
+ include_package_data=True,
10
+ install_requires=[
11
+ "typer",
12
+ "rich",
13
+ "polars",
14
+ "pandas",
15
+ "matplotlib",
16
+ "seaborn",
17
+ "plotly",
18
+ "groq",
19
+ "python-dotenv",
20
+ "gradio",
21
+ "ydata-profiling",
22
+ "sweetviz",
23
+ "scikit-learn",
24
+ "optuna",
25
+ "xgboost",
26
+ "prophet",
27
+ "statsmodels",
28
+ "imbalanced-learn",
29
+ ],
30
+ entry_points={
31
+ "console_scripts": [
32
+ "ds-agent=ds_agent.cli:main",
33
+ ]
34
+ },
35
+ )
@@ -0,0 +1,7 @@
1
+ """Data Science Copilot - AI-powered data science automation."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .orchestrator import DataScienceCopilot
6
+
7
+ __all__ = ["DataScienceCopilot"]