@vespermcp/mcp-server 1.2.9 → 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,7 +63,13 @@ export class DataCleaner {
63
63
  reject(new Error(result.error));
64
64
  }
65
65
  else {
66
- resolve(result);
66
+ resolve({
67
+ success: true,
68
+ rows_affected: Number(result.rows_affected ?? 0),
69
+ columns_affected: Number(result.columns_affected ?? 0),
70
+ output_path: result.output_path,
71
+ logs: Array.isArray(result.logs) ? result.logs : [],
72
+ });
67
73
  }
68
74
  }
69
75
  catch (e) {
@@ -14,7 +14,7 @@ export class PipelineExecutor {
14
14
  /**
15
15
  * Run the full Auto-Cleaning Pipeline on a dataset file
16
16
  */
17
- async runPipeline(datasetId, filePath, outputFormat = "csv", onProgress) {
17
+ async runPipeline(datasetId, filePath, outputFormat = "parquet", onProgress) {
18
18
  // ... (logging setup)
19
19
  const log = (m) => {
20
20
  console.error(`[Pipeline] ${m}`);
@@ -26,9 +26,10 @@ export class PipelineExecutor {
26
26
  // 2. Generate Plan
27
27
  log(`Generating cleaning plan...`);
28
28
  const plan = await this.planner.generatePlan(datasetId, qualityReport);
29
+ const rules = plan.operations;
29
30
  // If no cleaning needed, we still might need format conversion
30
31
  const needsConversion = !filePath.toLowerCase().endsWith(`.${outputFormat}`);
31
- if (plan.operations.length === 0 && !needsConversion) {
32
+ if (rules.length === 0 && !needsConversion) {
32
33
  log(`No cleaning or conversion needed.`);
33
34
  return {
34
35
  initial_quality: qualityReport,
@@ -42,9 +43,9 @@ export class PipelineExecutor {
42
43
  };
43
44
  }
44
45
  // 3. Execute Plan (includes conversion if requested)
45
- log(`Executing ${plan.operations.length} operations (Format: ${outputFormat})...`);
46
- plan.operations.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
47
- const cleaningResult = await this.cleaner.clean(filePath, plan.operations, outputFormat);
46
+ log(`Executing ${rules.length} operations (Format: ${outputFormat})...`);
47
+ rules.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
48
+ const cleaningResult = await this.cleaner.clean(filePath, rules, outputFormat);
48
49
  if (cleaningResult.success) {
49
50
  log(`Cleaning complete. Output: ${cleaningResult.output_path}`);
50
51
  }
@@ -110,6 +110,10 @@ export class CleaningPlanner {
110
110
  }
111
111
  return plan;
112
112
  }
113
+ async generateRules(datasetId, report, ruleSet, targetInfo) {
114
+ const plan = await this.generatePlan(datasetId, report, ruleSet, targetInfo);
115
+ return plan.operations;
116
+ }
113
117
  shouldFixType(col) {
114
118
  if (col.inferred_type && col.inferred_type.includes("Numeric") && (col.type.includes("String") || col.type.includes("Utf8"))) {
115
119
  return true;
package/build/index.js CHANGED
@@ -469,11 +469,20 @@ async function handlePrepareJob(jobId, query, requirements) {
469
469
  */
470
470
  async function handleCleanJob(jobId, datasetId, ops) {
471
471
  const update = (updates) => jobManager.updateJob(jobId, updates);
472
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
472
+ const safeId = datasetId.replace(/\//g, "_");
473
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
474
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
475
+ let filePath = parquetPath;
476
+ if (!fs.existsSync(filePath)) {
477
+ filePath = csvPath;
478
+ }
473
479
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
474
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
475
- if (fs.existsSync(demoPath))
476
- filePath = demoPath;
480
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
481
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
482
+ if (fs.existsSync(demoParquetPath))
483
+ filePath = demoParquetPath;
484
+ else if (fs.existsSync(demoCsvPath))
485
+ filePath = demoCsvPath;
477
486
  else
478
487
  throw new Error(`Data file not found for ${datasetId}`);
479
488
  }
@@ -714,7 +723,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
714
723
  },
715
724
  {
716
725
  name: "export_dataset",
717
- description: "Export a dataset to a local directory. Use format='feather' (default) for 5-10× faster writes than CSV. Add fast=true to skip quality/cleaning steps.",
726
+ description: "Export a dataset to a local directory. Use format='parquet' (default) for efficient analytics and broad interoperability. Add fast=true to skip quality/cleaning steps.",
718
727
  inputSchema: {
719
728
  type: "object",
720
729
  properties: {
@@ -729,7 +738,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
729
738
  format: {
730
739
  type: "string",
731
740
  enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
732
- description: "Output format. feather (fastest), parquet (best compression), csv (human-readable). Default: feather.",
741
+ description: "Output format. parquet (default, analytics-friendly), feather (fast local IO), csv (human-readable).",
733
742
  },
734
743
  compression: {
735
744
  type: "string",
@@ -800,7 +809,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
800
809
  output_format: {
801
810
  type: "string",
802
811
  enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
803
- description: "Output format (default: feather).",
812
+ description: "Output format (default: parquet).",
804
813
  },
805
814
  compression: {
806
815
  type: "string",
@@ -1144,12 +1153,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1144
1153
  }
1145
1154
  case "analyze_quality": {
1146
1155
  const datasetId = String(request.params.arguments?.dataset_id);
1147
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
1156
+ const safeId = datasetId.replace(/\//g, "_");
1157
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
1158
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
1159
+ let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
1148
1160
  // Demo Fallback for easy testing
1149
1161
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
1150
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1151
- if (fs.existsSync(demoPath)) {
1152
- filePath = demoPath;
1162
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
1163
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1164
+ if (fs.existsSync(demoParquetPath)) {
1165
+ filePath = demoParquetPath;
1166
+ }
1167
+ else if (fs.existsSync(demoCsvPath)) {
1168
+ filePath = demoCsvPath;
1153
1169
  }
1154
1170
  else if (datasetId !== "demo") {
1155
1171
  return {
@@ -1165,11 +1181,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1165
1181
  }
1166
1182
  case "preview_cleaning": {
1167
1183
  const datasetId = String(request.params.arguments?.dataset_id);
1168
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
1184
+ const safeId = datasetId.replace(/\//g, "_");
1185
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
1186
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
1187
+ let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
1169
1188
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
1170
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1171
- if (fs.existsSync(demoPath)) {
1172
- filePath = demoPath;
1189
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
1190
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1191
+ if (fs.existsSync(demoParquetPath)) {
1192
+ filePath = demoParquetPath;
1193
+ }
1194
+ else if (fs.existsSync(demoCsvPath)) {
1195
+ filePath = demoCsvPath;
1173
1196
  }
1174
1197
  else {
1175
1198
  throw new McpError(ErrorCode.InvalidParams, `Local data file not found for ${datasetId}. Please run prepare_dataset first.`);
@@ -1291,7 +1314,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1291
1314
  // If NOT fast mode, run quality/cleaning pipeline first (only for csv/parquet compat)
1292
1315
  if (!fastMode) {
1293
1316
  const currentExt = path.extname(sourcePath).substring(1).toLowerCase();
1294
- const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "csv";
1317
+ const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "parquet";
1295
1318
  if (currentExt !== pipelineFmt) {
1296
1319
  console.error(`[Export] Running quality/cleaning pipeline (use fast=true to skip)...`);
1297
1320
  try {
@@ -152,7 +152,7 @@ export class DataIngestor {
152
152
  /**
153
153
  * Generates a safe local filename for a dataset ID
154
154
  */
155
- getTargetPath(datasetId, extension = "csv") {
155
+ getTargetPath(datasetId, extension = "parquet") {
156
156
  const safeId = datasetId.replace(/\//g, "_").replace(/:/g, "_");
157
157
  return path.join(this.rawDataDir, `${safeId}.${extension}`);
158
158
  }
@@ -155,12 +155,7 @@ def _get_from_env(name: str) -> Optional[str]:
155
155
 
156
156
 
157
157
  def get_key(name: str) -> Optional[str]:
158
- # 1) env vars (highest priority)
159
- env_val = _get_from_env(name)
160
- if env_val:
161
- return env_val
162
-
163
- # 2) keyring (secure)
158
+ # 1) keyring (secure)
164
159
  if HAS_KEYRING:
165
160
  try:
166
161
  val = keyring.get_password(SERVICE_NAME, name)
@@ -169,14 +164,21 @@ def get_key(name: str) -> Optional[str]:
169
164
  except Exception:
170
165
  pass
171
166
 
172
- # 3) encrypted fallback config.toml
167
+ # 2) encrypted fallback config.toml
173
168
  fallback = _read_fallback_toml()
174
169
  enc = fallback.get(name)
175
- if not enc:
176
- return None
177
- secret = _get_or_create_local_secret()
178
- method = fallback.get("__method__", "fernet" if HAS_FERNET else "xor")
179
- return _decrypt_value(enc, method, secret)
170
+ if enc:
171
+ secret = _get_or_create_local_secret()
172
+ method = fallback.get("__method__", "fernet" if HAS_FERNET else "xor")
173
+ dec = _decrypt_value(enc, method, secret)
174
+ if dec:
175
+ return dec
176
+
177
+ # 3) env vars (fallback only)
178
+ env_val = _get_from_env(name)
179
+ if env_val:
180
+ return env_val
181
+ return None
180
182
 
181
183
 
182
184
  def set_key(name: str, value: str) -> Dict[str, str]:
@@ -20,14 +20,13 @@ def _ensure_auth() -> Dict[str, Any]:
20
20
  }
21
21
 
22
22
  # Priority:
23
- # 1) Existing env vars
24
- # 2) secure local store (keyring or ~/.vesper/config.toml)
23
+ # 1) secure local store (keyring or ~/.vesper/config.toml)
24
+ # 2) existing env vars
25
25
  # 3) ~/.kaggle/kaggle.json handled by KaggleApi.authenticate()
26
- if not os.getenv("KAGGLE_USERNAME") or not os.getenv("KAGGLE_KEY"):
27
- keys = get_all()
28
- if keys.get("kaggle_username") and keys.get("kaggle_key"):
29
- os.environ["KAGGLE_USERNAME"] = keys["kaggle_username"]
30
- os.environ["KAGGLE_KEY"] = keys["kaggle_key"]
26
+ keys = get_all()
27
+ if keys.get("kaggle_username") and keys.get("kaggle_key"):
28
+ os.environ["KAGGLE_USERNAME"] = keys["kaggle_username"]
29
+ os.environ["KAGGLE_KEY"] = keys["kaggle_key"]
31
30
 
32
31
  api = KaggleApi()
33
32
  try:
@@ -62,23 +62,19 @@ class AssetDownloader:
62
62
 
63
63
  @staticmethod
64
64
  def _hydrate_kaggle_credentials() -> None:
65
- username = os.getenv("KAGGLE_USERNAME")
66
- key = os.getenv("KAGGLE_KEY")
67
-
68
- if not username or not key:
69
- try:
70
- from config import get_all # type: ignore
71
- keys = get_all() or {}
72
- except Exception:
73
- keys = {}
65
+ try:
66
+ from config import get_all # type: ignore
67
+ keys = get_all() or {}
68
+ except Exception:
69
+ keys = {}
74
70
 
75
- username = username or keys.get("kaggle_username")
76
- key = key or keys.get("kaggle_key")
71
+ username = keys.get("kaggle_username") or os.getenv("KAGGLE_USERNAME")
72
+ key = keys.get("kaggle_key") or os.getenv("KAGGLE_KEY")
77
73
 
78
- if username:
79
- os.environ["KAGGLE_USERNAME"] = str(username)
80
- if key:
81
- os.environ["KAGGLE_KEY"] = str(key)
74
+ if username:
75
+ os.environ["KAGGLE_USERNAME"] = str(username)
76
+ if key:
77
+ os.environ["KAGGLE_KEY"] = str(key)
82
78
 
83
79
  username = os.getenv("KAGGLE_USERNAME")
84
80
  key = os.getenv("KAGGLE_KEY")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.9",
3
+ "version": "1.2.11",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
@@ -91,5 +91,6 @@
91
91
  "tsx": "^4.21.0",
92
92
  "typescript": "^5.9.3",
93
93
  "vitest": "^4.0.17"
94
- }
94
+ },
95
+ "packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
95
96
  }
@@ -155,12 +155,7 @@ def _get_from_env(name: str) -> Optional[str]:
155
155
 
156
156
 
157
157
  def get_key(name: str) -> Optional[str]:
158
- # 1) env vars (highest priority)
159
- env_val = _get_from_env(name)
160
- if env_val:
161
- return env_val
162
-
163
- # 2) keyring (secure)
158
+ # 1) keyring (secure)
164
159
  if HAS_KEYRING:
165
160
  try:
166
161
  val = keyring.get_password(SERVICE_NAME, name)
@@ -169,14 +164,21 @@ def get_key(name: str) -> Optional[str]:
169
164
  except Exception:
170
165
  pass
171
166
 
172
- # 3) encrypted fallback config.toml
167
+ # 2) encrypted fallback config.toml
173
168
  fallback = _read_fallback_toml()
174
169
  enc = fallback.get(name)
175
- if not enc:
176
- return None
177
- secret = _get_or_create_local_secret()
178
- method = fallback.get("__method__", "fernet" if HAS_FERNET else "xor")
179
- return _decrypt_value(enc, method, secret)
170
+ if enc:
171
+ secret = _get_or_create_local_secret()
172
+ method = fallback.get("__method__", "fernet" if HAS_FERNET else "xor")
173
+ dec = _decrypt_value(enc, method, secret)
174
+ if dec:
175
+ return dec
176
+
177
+ # 3) env vars (fallback only)
178
+ env_val = _get_from_env(name)
179
+ if env_val:
180
+ return env_val
181
+ return None
180
182
 
181
183
 
182
184
  def set_key(name: str, value: str) -> Dict[str, str]:
@@ -20,14 +20,13 @@ def _ensure_auth() -> Dict[str, Any]:
20
20
  }
21
21
 
22
22
  # Priority:
23
- # 1) Existing env vars
24
- # 2) secure local store (keyring or ~/.vesper/config.toml)
23
+ # 1) secure local store (keyring or ~/.vesper/config.toml)
24
+ # 2) existing env vars
25
25
  # 3) ~/.kaggle/kaggle.json handled by KaggleApi.authenticate()
26
- if not os.getenv("KAGGLE_USERNAME") or not os.getenv("KAGGLE_KEY"):
27
- keys = get_all()
28
- if keys.get("kaggle_username") and keys.get("kaggle_key"):
29
- os.environ["KAGGLE_USERNAME"] = keys["kaggle_username"]
30
- os.environ["KAGGLE_KEY"] = keys["kaggle_key"]
26
+ keys = get_all()
27
+ if keys.get("kaggle_username") and keys.get("kaggle_key"):
28
+ os.environ["KAGGLE_USERNAME"] = keys["kaggle_username"]
29
+ os.environ["KAGGLE_KEY"] = keys["kaggle_key"]
31
30
 
32
31
  api = KaggleApi()
33
32
  try:
@@ -62,23 +62,19 @@ class AssetDownloader:
62
62
 
63
63
  @staticmethod
64
64
  def _hydrate_kaggle_credentials() -> None:
65
- username = os.getenv("KAGGLE_USERNAME")
66
- key = os.getenv("KAGGLE_KEY")
67
-
68
- if not username or not key:
69
- try:
70
- from config import get_all # type: ignore
71
- keys = get_all() or {}
72
- except Exception:
73
- keys = {}
65
+ try:
66
+ from config import get_all # type: ignore
67
+ keys = get_all() or {}
68
+ except Exception:
69
+ keys = {}
74
70
 
75
- username = username or keys.get("kaggle_username")
76
- key = key or keys.get("kaggle_key")
71
+ username = keys.get("kaggle_username") or os.getenv("KAGGLE_USERNAME")
72
+ key = keys.get("kaggle_key") or os.getenv("KAGGLE_KEY")
77
73
 
78
- if username:
79
- os.environ["KAGGLE_USERNAME"] = str(username)
80
- if key:
81
- os.environ["KAGGLE_KEY"] = str(key)
74
+ if username:
75
+ os.environ["KAGGLE_USERNAME"] = str(username)
76
+ if key:
77
+ os.environ["KAGGLE_KEY"] = str(key)
82
78
 
83
79
  username = os.getenv("KAGGLE_USERNAME")
84
80
  key = os.getenv("KAGGLE_KEY")