ds-agent-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ds-agent.js +451 -0
- package/ds_agent/__init__.py +8 -0
- package/package.json +28 -0
- package/requirements.txt +126 -0
- package/setup.py +35 -0
- package/src/__init__.py +7 -0
- package/src/_compress_tool_result.py +118 -0
- package/src/api/__init__.py +4 -0
- package/src/api/app.py +1626 -0
- package/src/cache/__init__.py +5 -0
- package/src/cache/cache_manager.py +561 -0
- package/src/cli.py +2886 -0
- package/src/dynamic_prompts.py +281 -0
- package/src/orchestrator.py +4799 -0
- package/src/progress_manager.py +139 -0
- package/src/reasoning/__init__.py +332 -0
- package/src/reasoning/business_summary.py +431 -0
- package/src/reasoning/data_understanding.py +356 -0
- package/src/reasoning/model_explanation.py +383 -0
- package/src/reasoning/reasoning_trace.py +239 -0
- package/src/registry/__init__.py +3 -0
- package/src/registry/tools_registry.py +3 -0
- package/src/session_memory.py +448 -0
- package/src/session_store.py +370 -0
- package/src/storage/__init__.py +19 -0
- package/src/storage/artifact_store.py +620 -0
- package/src/storage/helpers.py +116 -0
- package/src/storage/huggingface_storage.py +694 -0
- package/src/storage/r2_storage.py +0 -0
- package/src/storage/user_files_service.py +288 -0
- package/src/tools/__init__.py +335 -0
- package/src/tools/advanced_analysis.py +823 -0
- package/src/tools/advanced_feature_engineering.py +708 -0
- package/src/tools/advanced_insights.py +578 -0
- package/src/tools/advanced_preprocessing.py +549 -0
- package/src/tools/advanced_training.py +906 -0
- package/src/tools/agent_tool_mapping.py +326 -0
- package/src/tools/auto_pipeline.py +420 -0
- package/src/tools/autogluon_training.py +1480 -0
- package/src/tools/business_intelligence.py +860 -0
- package/src/tools/cloud_data_sources.py +581 -0
- package/src/tools/code_interpreter.py +390 -0
- package/src/tools/computer_vision.py +614 -0
- package/src/tools/data_cleaning.py +614 -0
- package/src/tools/data_profiling.py +593 -0
- package/src/tools/data_type_conversion.py +268 -0
- package/src/tools/data_wrangling.py +433 -0
- package/src/tools/eda_reports.py +284 -0
- package/src/tools/enhanced_feature_engineering.py +241 -0
- package/src/tools/feature_engineering.py +302 -0
- package/src/tools/matplotlib_visualizations.py +1327 -0
- package/src/tools/model_training.py +520 -0
- package/src/tools/nlp_text_analytics.py +761 -0
- package/src/tools/plotly_visualizations.py +497 -0
- package/src/tools/production_mlops.py +852 -0
- package/src/tools/time_series.py +507 -0
- package/src/tools/tools_registry.py +2133 -0
- package/src/tools/visualization_engine.py +559 -0
- package/src/utils/__init__.py +42 -0
- package/src/utils/error_recovery.py +313 -0
- package/src/utils/parallel_executor.py +402 -0
- package/src/utils/polars_helpers.py +248 -0
- package/src/utils/schema_extraction.py +132 -0
- package/src/utils/semantic_layer.py +392 -0
- package/src/utils/token_budget.py +411 -0
- package/src/utils/validation.py +377 -0
- package/src/workflow_state.py +154 -0
package/bin/ds-agent.js
ADDED
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
"use strict";
|
|
4
|
+
|
|
5
|
+
const fs = require("fs");
|
|
6
|
+
const os = require("os");
|
|
7
|
+
const path = require("path");
|
|
8
|
+
const { spawnSync } = require("child_process");
|
|
9
|
+
|
|
10
|
+
const pkg = require("../package.json");
|
|
11
|
+
const projectRoot = path.resolve(__dirname, "..");
|
|
12
|
+
const appHome = process.env.DS_AGENT_HOME || path.join(os.homedir(), ".ds-agent");
|
|
13
|
+
const venvDir = path.join(appHome, "venv");
|
|
14
|
+
const stateFile = path.join(appHome, "install-state.json");
|
|
15
|
+
const requirementsFile = path.join(projectRoot, "requirements.txt");
|
|
16
|
+
const cliScript = path.join(projectRoot, "src", "cli.py");
|
|
17
|
+
const INSTALL_SCHEMA_VERSION = 3;
|
|
18
|
+
|
|
19
|
+
const HEAVY_GROUPS = ["runtime", "data", "frame", "plot", "forecast", "nlp", "bi", "ml", "report", "automl"];
|
|
20
|
+
|
|
21
|
+
const COMMAND_GROUPS = {
|
|
22
|
+
quickstart: [],
|
|
23
|
+
analyze: HEAVY_GROUPS,
|
|
24
|
+
pipeline: HEAVY_GROUPS,
|
|
25
|
+
chat: HEAVY_GROUPS,
|
|
26
|
+
"cache-stats": HEAVY_GROUPS,
|
|
27
|
+
"clear-cache": HEAVY_GROUPS,
|
|
28
|
+
tune: ["data", "frame", "ml", "plot", "automl"],
|
|
29
|
+
train: ["data", "frame", "ml", "plot"],
|
|
30
|
+
clean: ["data", "frame"],
|
|
31
|
+
profile: ["data", "frame"],
|
|
32
|
+
eda: ["data", "frame", "report", "plot"],
|
|
33
|
+
report: ["data", "report"],
|
|
34
|
+
compare: ["data", "frame", "report", "plot"],
|
|
35
|
+
bi: ["data", "frame", "bi", "plot"],
|
|
36
|
+
plot: ["data", "frame", "plot"],
|
|
37
|
+
forecast: ["data", "forecast", "plot"],
|
|
38
|
+
nlp: ["data", "nlp"],
|
|
39
|
+
sessions: [],
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const GROUP_DEFS = {
|
|
43
|
+
core: {
|
|
44
|
+
version: 2,
|
|
45
|
+
packages: [
|
|
46
|
+
"typer>=0.12.3,<0.16.0",
|
|
47
|
+
"click>=8.1.7,<8.2.0",
|
|
48
|
+
"rich==13.7.0",
|
|
49
|
+
"python-dotenv==1.0.0",
|
|
50
|
+
"pydantic>=2.11.7",
|
|
51
|
+
"setuptools<81",
|
|
52
|
+
],
|
|
53
|
+
},
|
|
54
|
+
data: {
|
|
55
|
+
version: 2,
|
|
56
|
+
packages: [
|
|
57
|
+
"numpy>=1.26.0,<2.3.0",
|
|
58
|
+
"pandas>=2.2.0,<3.0.0",
|
|
59
|
+
],
|
|
60
|
+
},
|
|
61
|
+
frame: {
|
|
62
|
+
version: 1,
|
|
63
|
+
packages: [
|
|
64
|
+
"polars>=0.20.3",
|
|
65
|
+
"pyarrow>=14.0.1",
|
|
66
|
+
"duckdb>=0.10.0",
|
|
67
|
+
"joblib==1.3.2",
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
report: {
|
|
71
|
+
version: 1,
|
|
72
|
+
packages: ["ydata-profiling>=4.17.0", "sweetviz>=2.3.0"],
|
|
73
|
+
},
|
|
74
|
+
plot: {
|
|
75
|
+
version: 1,
|
|
76
|
+
packages: ["matplotlib>=3.8.2", "seaborn>=0.13.1", "plotly>=5.18.0"],
|
|
77
|
+
},
|
|
78
|
+
forecast: {
|
|
79
|
+
version: 1,
|
|
80
|
+
packages: ["prophet>=1.1.5", "holidays>=0.40", "pmdarima>=2.0", "statsmodels>=0.14.1"],
|
|
81
|
+
},
|
|
82
|
+
nlp: {
|
|
83
|
+
version: 1,
|
|
84
|
+
packages: [
|
|
85
|
+
"textblob>=0.17.1",
|
|
86
|
+
"vaderSentiment>=3.3.2",
|
|
87
|
+
"sentence-transformers>=2.2.2",
|
|
88
|
+
"tiktoken>=0.5.2",
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
bi: {
|
|
92
|
+
version: 1,
|
|
93
|
+
packages: ["scipy>=1.11.4", "statsmodels>=0.14.1", "dowhy>=0.11"],
|
|
94
|
+
},
|
|
95
|
+
ml: {
|
|
96
|
+
version: 1,
|
|
97
|
+
packages: [
|
|
98
|
+
"scikit-learn>=1.4.0",
|
|
99
|
+
"xgboost>=2.0.3",
|
|
100
|
+
"lightgbm>=4.6.0",
|
|
101
|
+
"catboost>=1.2.8",
|
|
102
|
+
"optuna>=3.5.0",
|
|
103
|
+
"shap>=0.44.1",
|
|
104
|
+
"imbalanced-learn>=0.12.0",
|
|
105
|
+
"cleanlab>=2.6",
|
|
106
|
+
"pandera>=0.18",
|
|
107
|
+
"boruta>=0.3",
|
|
108
|
+
],
|
|
109
|
+
},
|
|
110
|
+
runtime: {
|
|
111
|
+
version: 1,
|
|
112
|
+
packages: [
|
|
113
|
+
"groq>=0.13.0",
|
|
114
|
+
"mistralai>=0.0.7",
|
|
115
|
+
"supabase==2.28.3",
|
|
116
|
+
"boto3>=1.28.0",
|
|
117
|
+
"huggingface_hub>=0.20.0",
|
|
118
|
+
"fastapi>=0.109.0",
|
|
119
|
+
"uvicorn>=0.25.0",
|
|
120
|
+
"python-multipart>=0.0.6",
|
|
121
|
+
"google-cloud-bigquery==3.14.1",
|
|
122
|
+
"google-cloud-storage==2.14.0",
|
|
123
|
+
"google-auth==2.25.2",
|
|
124
|
+
"google-generativeai==0.3.2",
|
|
125
|
+
"lime==0.2.0.1",
|
|
126
|
+
"fairlearn==0.10.0",
|
|
127
|
+
"evidently>=0.4.0",
|
|
128
|
+
"dtreeviz>=2.2",
|
|
129
|
+
"Pillow>=11.0.0",
|
|
130
|
+
],
|
|
131
|
+
},
|
|
132
|
+
automl: {
|
|
133
|
+
version: 1,
|
|
134
|
+
packages: ["autogluon.tabular>=1.2", "autogluon.timeseries>=1.2", "holidays>=0.40"],
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const GROUP_INSTALL_ORDER = ["core", "runtime", "data", "frame", "plot", "forecast", "nlp", "bi", "ml", "report", "automl"];
|
|
139
|
+
|
|
140
|
+
function log(msg) {
|
|
141
|
+
process.stderr.write(`${msg}\n`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function pipEnv() {
|
|
145
|
+
return {
|
|
146
|
+
...process.env,
|
|
147
|
+
PIP_PROGRESS_BAR: "off",
|
|
148
|
+
PIP_DISABLE_PIP_VERSION_CHECK: "1",
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function runCommand(command, args, options = {}) {
|
|
153
|
+
const result = spawnSync(command, args, {
|
|
154
|
+
stdio: "inherit",
|
|
155
|
+
...options,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
if (result.error) {
|
|
159
|
+
throw result.error;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (result.status !== 0) {
|
|
163
|
+
throw new Error(`Command failed: ${command} ${args.join(" ")}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function runCapture(command, args) {
|
|
168
|
+
const result = spawnSync(command, args, { encoding: "utf8" });
|
|
169
|
+
if (result.error || result.status !== 0) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
return (result.stdout || result.stderr || "").trim();
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function ensureDir(dirPath) {
|
|
176
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function getVenvPython() {
|
|
180
|
+
if (process.platform === "win32") {
|
|
181
|
+
return path.join(venvDir, "Scripts", "python.exe");
|
|
182
|
+
}
|
|
183
|
+
return path.join(venvDir, "bin", "python");
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function detectPython() {
|
|
187
|
+
const candidates = [];
|
|
188
|
+
if (process.env.DS_AGENT_PYTHON) {
|
|
189
|
+
candidates.push(process.env.DS_AGENT_PYTHON);
|
|
190
|
+
}
|
|
191
|
+
candidates.push("python3.11", "python3.12", "python3.10", "python3.13", "python3", "python");
|
|
192
|
+
|
|
193
|
+
function parseVersion(text) {
|
|
194
|
+
const match = String(text || "").match(/Python\s+(\d+)\.(\d+)\.(\d+)/i);
|
|
195
|
+
if (!match) {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
return {
|
|
199
|
+
major: Number(match[1]),
|
|
200
|
+
minor: Number(match[2]),
|
|
201
|
+
patch: Number(match[3]),
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function isSupported(version) {
|
|
206
|
+
if (!version) {
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
if (version.major !== 3) {
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
// AutoGluon and other pinned dependencies currently support 3.10-3.13 best.
|
|
213
|
+
return version.minor >= 10 && version.minor <= 13;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
for (const candidate of candidates) {
|
|
217
|
+
const out = runCapture(candidate, ["--version"]);
|
|
218
|
+
const parsed = parseVersion(out);
|
|
219
|
+
if (parsed && isSupported(parsed)) {
|
|
220
|
+
return {
|
|
221
|
+
command: candidate,
|
|
222
|
+
version: `${parsed.major}.${parsed.minor}.${parsed.patch}`,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function boolEnv(name) {
|
|
231
|
+
const raw = String(process.env[name] || "").toLowerCase().trim();
|
|
232
|
+
return raw === "1" || raw === "true" || raw === "yes" || raw === "on";
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function detectCommand(args) {
|
|
236
|
+
for (const arg of args) {
|
|
237
|
+
if (!arg || arg === "--") {
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
if (arg.startsWith("-")) {
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
return arg;
|
|
244
|
+
}
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function requestedGroups(args) {
|
|
249
|
+
const command = detectCommand(args);
|
|
250
|
+
|
|
251
|
+
if (boolEnv("DS_AGENT_INSTALL_ALL")) {
|
|
252
|
+
return { command, groups: [...GROUP_INSTALL_ORDER] };
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const groups = new Set(["core"]);
|
|
256
|
+
const mapped = COMMAND_GROUPS[command] || [];
|
|
257
|
+
for (const groupName of mapped) {
|
|
258
|
+
groups.add(groupName);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// For unknown commands or typos, keep installs minimal (core only).
|
|
262
|
+
// This prevents long installs before Click/Typer returns a command error.
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
command,
|
|
266
|
+
groups: GROUP_INSTALL_ORDER.filter((name) => groups.has(name)),
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function readState() {
|
|
271
|
+
try {
|
|
272
|
+
if (!fs.existsSync(stateFile)) {
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
const raw = fs.readFileSync(stateFile, "utf8");
|
|
276
|
+
return JSON.parse(raw);
|
|
277
|
+
} catch (_err) {
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function writeState(state) {
|
|
283
|
+
fs.writeFileSync(stateFile, JSON.stringify(state, null, 2), "utf8");
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function installPackageSet(venvPython, packages, label) {
|
|
287
|
+
if (!packages || packages.length === 0) {
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
log(`[ds-agent] Installing ${label} dependencies...`);
|
|
292
|
+
runCommand(
|
|
293
|
+
venvPython,
|
|
294
|
+
[
|
|
295
|
+
"-m",
|
|
296
|
+
"pip",
|
|
297
|
+
"install",
|
|
298
|
+
"--disable-pip-version-check",
|
|
299
|
+
"--progress-bar",
|
|
300
|
+
"off",
|
|
301
|
+
"--upgrade-strategy",
|
|
302
|
+
"only-if-needed",
|
|
303
|
+
"--no-input",
|
|
304
|
+
...packages,
|
|
305
|
+
],
|
|
306
|
+
{ env: pipEnv() }
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function ensureGroupInstalled(venvPython, state, groupName) {
|
|
311
|
+
const groupDef = GROUP_DEFS[groupName];
|
|
312
|
+
if (!groupDef) {
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if ((state.installedGroups || {})[groupName] === groupDef.version) {
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
installPackageSet(venvPython, groupDef.packages, groupName);
|
|
321
|
+
state.installedGroups[groupName] = groupDef.version;
|
|
322
|
+
state.lastInstalledAt = new Date().toISOString();
|
|
323
|
+
writeState(state);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
function bootstrap(args) {
|
|
327
|
+
ensureDir(appHome);
|
|
328
|
+
|
|
329
|
+
const python = detectPython();
|
|
330
|
+
if (!python) {
|
|
331
|
+
throw new Error(
|
|
332
|
+
"No compatible Python found. Install Python 3.10-3.13 or set DS_AGENT_PYTHON to a compatible executable."
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const venvPython = getVenvPython();
|
|
337
|
+
|
|
338
|
+
if (!fs.existsSync(venvPython)) {
|
|
339
|
+
log("[ds-agent] Creating Python virtual environment...");
|
|
340
|
+
runCommand(python.command, ["-m", "venv", venvDir]);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
let state = readState();
|
|
344
|
+
const hardResetNeeded =
|
|
345
|
+
!state ||
|
|
346
|
+
state.installSchemaVersion !== INSTALL_SCHEMA_VERSION ||
|
|
347
|
+
state.packageVersion !== pkg.version ||
|
|
348
|
+
state.pythonCommand !== python.command ||
|
|
349
|
+
state.pythonVersion !== python.version;
|
|
350
|
+
|
|
351
|
+
if (hardResetNeeded) {
|
|
352
|
+
state = {
|
|
353
|
+
installSchemaVersion: INSTALL_SCHEMA_VERSION,
|
|
354
|
+
packageVersion: pkg.version,
|
|
355
|
+
pythonCommand: python.command,
|
|
356
|
+
pythonVersion: python.version,
|
|
357
|
+
requirementsMtime: fs.existsSync(requirementsFile) ? fs.statSync(requirementsFile).mtimeMs : null,
|
|
358
|
+
installedGroups: {},
|
|
359
|
+
updatedAt: new Date().toISOString(),
|
|
360
|
+
};
|
|
361
|
+
writeState(state);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
if (!(state.installedGroups || {}).core) {
|
|
365
|
+
log("[ds-agent] Preparing Python toolchain...");
|
|
366
|
+
runCommand(
|
|
367
|
+
venvPython,
|
|
368
|
+
[
|
|
369
|
+
"-m",
|
|
370
|
+
"pip",
|
|
371
|
+
"install",
|
|
372
|
+
"--upgrade",
|
|
373
|
+
"--disable-pip-version-check",
|
|
374
|
+
"--progress-bar",
|
|
375
|
+
"off",
|
|
376
|
+
"pip",
|
|
377
|
+
"setuptools<81",
|
|
378
|
+
"wheel",
|
|
379
|
+
],
|
|
380
|
+
{ env: pipEnv() }
|
|
381
|
+
);
|
|
382
|
+
ensureGroupInstalled(venvPython, state, "core");
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const request = requestedGroups(args);
|
|
386
|
+
for (const groupName of request.groups) {
|
|
387
|
+
ensureGroupInstalled(venvPython, state, groupName);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (boolEnv("DS_AGENT_INSTALL_ALL")) {
|
|
391
|
+
log("[ds-agent] DS_AGENT_INSTALL_ALL is enabled, syncing full requirements.txt...");
|
|
392
|
+
runCommand(
|
|
393
|
+
venvPython,
|
|
394
|
+
[
|
|
395
|
+
"-m",
|
|
396
|
+
"pip",
|
|
397
|
+
"install",
|
|
398
|
+
"--disable-pip-version-check",
|
|
399
|
+
"--progress-bar",
|
|
400
|
+
"off",
|
|
401
|
+
"--upgrade-strategy",
|
|
402
|
+
"only-if-needed",
|
|
403
|
+
"--no-input",
|
|
404
|
+
"-r",
|
|
405
|
+
requirementsFile,
|
|
406
|
+
],
|
|
407
|
+
{ env: pipEnv() }
|
|
408
|
+
);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
state.updatedAt = new Date().toISOString();
|
|
412
|
+
writeState(state);
|
|
413
|
+
|
|
414
|
+
return venvPython;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
function runCli() {
|
|
418
|
+
const args = process.argv.slice(2);
|
|
419
|
+
const venvPython = bootstrap(args);
|
|
420
|
+
|
|
421
|
+
const disableTqdm = !boolEnv("DS_AGENT_VERBOSE_PROGRESS");
|
|
422
|
+
|
|
423
|
+
const env = {
|
|
424
|
+
...process.env,
|
|
425
|
+
PIP_PROGRESS_BAR: process.env.PIP_PROGRESS_BAR || "off",
|
|
426
|
+
TQDM_DISABLE: disableTqdm ? "1" : process.env.TQDM_DISABLE || "0",
|
|
427
|
+
HF_HUB_DISABLE_PROGRESS_BARS: disableTqdm ? "1" : process.env.HF_HUB_DISABLE_PROGRESS_BARS || "0",
|
|
428
|
+
TOKENIZERS_PARALLELISM: process.env.TOKENIZERS_PARALLELISM || "false",
|
|
429
|
+
PYTHONPATH: process.env.PYTHONPATH
|
|
430
|
+
? `${projectRoot}${path.delimiter}${process.env.PYTHONPATH}`
|
|
431
|
+
: projectRoot,
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
const result = spawnSync(venvPython, [cliScript, ...args], {
|
|
435
|
+
stdio: "inherit",
|
|
436
|
+
env,
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
if (result.error) {
|
|
440
|
+
throw result.error;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
process.exit(result.status || 0);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
try {
|
|
447
|
+
runCli();
|
|
448
|
+
} catch (err) {
|
|
449
|
+
log(`[ds-agent] ${err.message || String(err)}`);
|
|
450
|
+
process.exit(1);
|
|
451
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ds-agent-cli",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "DS-Agent CLI wrapper for npm global install",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"publishConfig": {
|
|
7
|
+
"access": "public"
|
|
8
|
+
},
|
|
9
|
+
"bin": {
|
|
10
|
+
"ds-agent": "bin/ds-agent.js"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"bin/ds-agent.js",
|
|
14
|
+
"src/**/*.py",
|
|
15
|
+
"ds_agent/__init__.py",
|
|
16
|
+
"requirements.txt",
|
|
17
|
+
"setup.py"
|
|
18
|
+
],
|
|
19
|
+
"engines": {
|
|
20
|
+
"node": ">=18"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"data-science",
|
|
24
|
+
"cli",
|
|
25
|
+
"python",
|
|
26
|
+
"ml"
|
|
27
|
+
]
|
|
28
|
+
}
|
package/requirements.txt
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Core Dependencies
|
|
2
|
+
groq>=0.13.0 # Updated for httpx compatibility
|
|
3
|
+
mistralai>=0.0.7 # Mistral AI - 1B tokens/month (corrected version)
|
|
4
|
+
python-dotenv==1.0.0
|
|
5
|
+
|
|
6
|
+
# Data Processing
|
|
7
|
+
polars>=0.20.3
|
|
8
|
+
duckdb>=0.10.0
|
|
9
|
+
pyarrow>=14.0.1
|
|
10
|
+
pandas>=2.2.0 # Updated for Python 3.13 compatibility
|
|
11
|
+
|
|
12
|
+
# Machine Learning
|
|
13
|
+
scikit-learn>=1.4.0
|
|
14
|
+
xgboost>=2.0.3
|
|
15
|
+
lightgbm>=4.6.0
|
|
16
|
+
catboost>=1.2.8
|
|
17
|
+
optuna>=3.5.0
|
|
18
|
+
|
|
19
|
+
# AutoGluon AutoML (modular install - only tabular + timeseries)
|
|
20
|
+
autogluon.tabular>=1.2
|
|
21
|
+
autogluon.timeseries>=1.2
|
|
22
|
+
holidays>=0.40 # Holiday calendar for time series covariates
|
|
23
|
+
|
|
24
|
+
# Explainability
|
|
25
|
+
shap>=0.44.1
|
|
26
|
+
|
|
27
|
+
# Advanced ML Tools
|
|
28
|
+
imbalanced-learn>=0.12.0
|
|
29
|
+
|
|
30
|
+
# Statistical Analysis
|
|
31
|
+
scipy>=1.11.4
|
|
32
|
+
statsmodels>=0.14.1
|
|
33
|
+
|
|
34
|
+
# Visualization
|
|
35
|
+
matplotlib>=3.8.2
|
|
36
|
+
seaborn>=0.13.1
|
|
37
|
+
plotly>=5.18.0 # Interactive visualizations
|
|
38
|
+
|
|
39
|
+
# EDA Report Generation
|
|
40
|
+
ydata-profiling>=4.17.0 # Comprehensive automated EDA reports with Python 3.13 compatibility
|
|
41
|
+
sweetviz>=2.3.0 # Interactive EDA with comparison support
|
|
42
|
+
|
|
43
|
+
# User Interface
|
|
44
|
+
# gradio>=5.49.1
|
|
45
|
+
|
|
46
|
+
# REST API (Cloud Run)
|
|
47
|
+
fastapi>=0.109.0
|
|
48
|
+
uvicorn>=0.25.0
|
|
49
|
+
python-multipart>=0.0.6 # For file uploads
|
|
50
|
+
|
|
51
|
+
# Text Processing
|
|
52
|
+
textblob>=0.17.1
|
|
53
|
+
vaderSentiment>=3.3.2 # Rule-based sentiment analysis (fast, no GPU needed)
|
|
54
|
+
|
|
55
|
+
# Time Series Forecasting
|
|
56
|
+
prophet>=1.1.5
|
|
57
|
+
holidays>=0.38
|
|
58
|
+
pmdarima>=2.0 # Auto ARIMA (auto_arima) for optimal order selection
|
|
59
|
+
|
|
60
|
+
# MLOps & Explainability
|
|
61
|
+
lime==0.2.0.1
|
|
62
|
+
fairlearn==0.10.0
|
|
63
|
+
evidently>=0.4.0 # Production-grade data drift detection & monitoring
|
|
64
|
+
dtreeviz>=2.2 # Decision tree visualization
|
|
65
|
+
|
|
66
|
+
# NLP & Semantic Layer (REQUIRED for column understanding and agent routing)
|
|
67
|
+
sentence-transformers>=2.2.2 # For semantic column embeddings and agent routing
|
|
68
|
+
tiktoken>=0.5.2 # For accurate token counting in budget management
|
|
69
|
+
|
|
70
|
+
# Advanced NLP (Optional - Uncomment for advanced NLP tools)
|
|
71
|
+
# These are optional but recommended for full NLP capabilities
|
|
72
|
+
# spacy==3.7.2 # For named entity recognition (perform_named_entity_recognition)
|
|
73
|
+
# transformers==4.35.2 # For transformer-based sentiment & topic modeling
|
|
74
|
+
# bertopic==0.16.0 # For advanced topic modeling
|
|
75
|
+
|
|
76
|
+
# Computer Vision (Optional - Uncomment for CV tools)
|
|
77
|
+
# These are optional but recommended for full CV capabilities
|
|
78
|
+
# torch==2.1.0 # For CNN-based image feature extraction
|
|
79
|
+
# torchvision==0.16.0 # For pre-trained models (ResNet, EfficientNet, VGG)
|
|
80
|
+
Pillow>=11.0.0 # For basic image processing
|
|
81
|
+
#opencv-python==4.8.1 # For advanced image processing & color features
|
|
82
|
+
|
|
83
|
+
# Business Intelligence (Optional - Uncomment for advanced BI tools)
|
|
84
|
+
# These are optional but add specialized capabilities
|
|
85
|
+
# lifetimes==0.11.3 # For customer lifetime value modeling
|
|
86
|
+
# econml==0.15.0 # For advanced causal inference
|
|
87
|
+
dowhy>=0.11 # Formal causal inference with DAG-based reasoning
|
|
88
|
+
|
|
89
|
+
# Data Quality & Validation
|
|
90
|
+
cleanlab>=2.6 # Label error detection using confident learning
|
|
91
|
+
pandera>=0.18 # Schema-based DataFrame validation
|
|
92
|
+
boruta>=0.3 # All-relevant feature selection (BorutaPy)
|
|
93
|
+
|
|
94
|
+
# CLI & UI
|
|
95
|
+
typer>=0.12.3
|
|
96
|
+
rich==13.7.0
|
|
97
|
+
tqdm==4.66.1
|
|
98
|
+
|
|
99
|
+
# Utilities
|
|
100
|
+
pydantic>=2.11.7
|
|
101
|
+
joblib==1.3.2
|
|
102
|
+
|
|
103
|
+
# Google Cloud Integration
|
|
104
|
+
google-cloud-bigquery==3.14.1
|
|
105
|
+
google-cloud-storage==2.14.0 # For GCS artifact storage
|
|
106
|
+
google-auth==2.25.2
|
|
107
|
+
google-generativeai==0.3.2 # For Gemini LLM support
|
|
108
|
+
|
|
109
|
+
# Cloudflare R2 Storage (S3-compatible)
|
|
110
|
+
boto3>=1.28.0 # For R2 file storage
|
|
111
|
+
|
|
112
|
+
# HuggingFace Storage Integration
|
|
113
|
+
huggingface_hub>=0.20.0 # For storing user artifacts on HuggingFace
|
|
114
|
+
|
|
115
|
+
# Supabase Backend
|
|
116
|
+
supabase==2.28.3 # For user file metadata
|
|
117
|
+
|
|
118
|
+
# Testing
|
|
119
|
+
pytest==7.4.3
|
|
120
|
+
pytest-mock==3.12.0
|
|
121
|
+
pytest-cov==4.1.0
|
|
122
|
+
|
|
123
|
+
# Development
|
|
124
|
+
black==23.12.1
|
|
125
|
+
flake8==7.0.0
|
|
126
|
+
mypy==1.8.0
|
package/setup.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from setuptools import find_packages, setup
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
setup(
|
|
5
|
+
name="ds-agent",
|
|
6
|
+
version="0.1.0",
|
|
7
|
+
description="Data Science Agent CLI",
|
|
8
|
+
packages=find_packages(),
|
|
9
|
+
include_package_data=True,
|
|
10
|
+
install_requires=[
|
|
11
|
+
"typer",
|
|
12
|
+
"rich",
|
|
13
|
+
"polars",
|
|
14
|
+
"pandas",
|
|
15
|
+
"matplotlib",
|
|
16
|
+
"seaborn",
|
|
17
|
+
"plotly",
|
|
18
|
+
"groq",
|
|
19
|
+
"python-dotenv",
|
|
20
|
+
"gradio",
|
|
21
|
+
"ydata-profiling",
|
|
22
|
+
"sweetviz",
|
|
23
|
+
"scikit-learn",
|
|
24
|
+
"optuna",
|
|
25
|
+
"xgboost",
|
|
26
|
+
"prophet",
|
|
27
|
+
"statsmodels",
|
|
28
|
+
"imbalanced-learn",
|
|
29
|
+
],
|
|
30
|
+
entry_points={
|
|
31
|
+
"console_scripts": [
|
|
32
|
+
"ds-agent=ds_agent.cli:main",
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
)
|