@vespermcp/mcp-server 1.2.10 → 1.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,7 +63,13 @@ export class DataCleaner {
63
63
  reject(new Error(result.error));
64
64
  }
65
65
  else {
66
- resolve(result);
66
+ resolve({
67
+ success: true,
68
+ rows_affected: Number(result.rows_affected ?? 0),
69
+ columns_affected: Number(result.columns_affected ?? 0),
70
+ output_path: result.output_path,
71
+ logs: Array.isArray(result.logs) ? result.logs : [],
72
+ });
67
73
  }
68
74
  }
69
75
  catch (e) {
@@ -14,7 +14,7 @@ export class PipelineExecutor {
14
14
  /**
15
15
  * Run the full Auto-Cleaning Pipeline on a dataset file
16
16
  */
17
- async runPipeline(datasetId, filePath, outputFormat = "csv", onProgress) {
17
+ async runPipeline(datasetId, filePath, outputFormat = "parquet", onProgress) {
18
18
  // ... (logging setup)
19
19
  const log = (m) => {
20
20
  console.error(`[Pipeline] ${m}`);
@@ -26,9 +26,10 @@ export class PipelineExecutor {
26
26
  // 2. Generate Plan
27
27
  log(`Generating cleaning plan...`);
28
28
  const plan = await this.planner.generatePlan(datasetId, qualityReport);
29
+ const rules = plan.operations;
29
30
  // If no cleaning needed, we still might need format conversion
30
31
  const needsConversion = !filePath.toLowerCase().endsWith(`.${outputFormat}`);
31
- if (plan.operations.length === 0 && !needsConversion) {
32
+ if (rules.length === 0 && !needsConversion) {
32
33
  log(`No cleaning or conversion needed.`);
33
34
  return {
34
35
  initial_quality: qualityReport,
@@ -42,9 +43,9 @@ export class PipelineExecutor {
42
43
  };
43
44
  }
44
45
  // 3. Execute Plan (includes conversion if requested)
45
- log(`Executing ${plan.operations.length} operations (Format: ${outputFormat})...`);
46
- plan.operations.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
47
- const cleaningResult = await this.cleaner.clean(filePath, plan.operations, outputFormat);
46
+ log(`Executing ${rules.length} operations (Format: ${outputFormat})...`);
47
+ rules.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
48
+ const cleaningResult = await this.cleaner.clean(filePath, rules, outputFormat);
48
49
  if (cleaningResult.success) {
49
50
  log(`Cleaning complete. Output: ${cleaningResult.output_path}`);
50
51
  }
@@ -110,6 +110,10 @@ export class CleaningPlanner {
110
110
  }
111
111
  return plan;
112
112
  }
113
+ async generateRules(datasetId, report, ruleSet, targetInfo) {
114
+ const plan = await this.generatePlan(datasetId, report, ruleSet, targetInfo);
115
+ return plan.operations;
116
+ }
113
117
  shouldFixType(col) {
114
118
  if (col.inferred_type && col.inferred_type.includes("Numeric") && (col.type.includes("String") || col.type.includes("Utf8"))) {
115
119
  return true;
package/build/index.js CHANGED
@@ -469,11 +469,20 @@ async function handlePrepareJob(jobId, query, requirements) {
469
469
  */
470
470
  async function handleCleanJob(jobId, datasetId, ops) {
471
471
  const update = (updates) => jobManager.updateJob(jobId, updates);
472
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
472
+ const safeId = datasetId.replace(/\//g, "_");
473
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
474
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
475
+ let filePath = parquetPath;
476
+ if (!fs.existsSync(filePath)) {
477
+ filePath = csvPath;
478
+ }
473
479
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
474
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
475
- if (fs.existsSync(demoPath))
476
- filePath = demoPath;
480
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
481
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
482
+ if (fs.existsSync(demoParquetPath))
483
+ filePath = demoParquetPath;
484
+ else if (fs.existsSync(demoCsvPath))
485
+ filePath = demoCsvPath;
477
486
  else
478
487
  throw new Error(`Data file not found for ${datasetId}`);
479
488
  }
@@ -714,7 +723,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
714
723
  },
715
724
  {
716
725
  name: "export_dataset",
717
- description: "Export a dataset to a local directory. Use format='feather' (default) for 5-10Ɨ faster writes than CSV. Add fast=true to skip quality/cleaning steps.",
726
+ description: "Export a dataset to a local directory. Use format='parquet' (default) for efficient analytics and broad interoperability. Add fast=true to skip quality/cleaning steps.",
718
727
  inputSchema: {
719
728
  type: "object",
720
729
  properties: {
@@ -729,7 +738,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
729
738
  format: {
730
739
  type: "string",
731
740
  enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
732
- description: "Output format. feather (fastest), parquet (best compression), csv (human-readable). Default: feather.",
741
+ description: "Output format. parquet (default, analytics-friendly), feather (fast local IO), csv (human-readable).",
733
742
  },
734
743
  compression: {
735
744
  type: "string",
@@ -800,7 +809,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
800
809
  output_format: {
801
810
  type: "string",
802
811
  enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
803
- description: "Output format (default: feather).",
812
+ description: "Output format (default: parquet).",
804
813
  },
805
814
  compression: {
806
815
  type: "string",
@@ -1144,12 +1153,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1144
1153
  }
1145
1154
  case "analyze_quality": {
1146
1155
  const datasetId = String(request.params.arguments?.dataset_id);
1147
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
1156
+ const safeId = datasetId.replace(/\//g, "_");
1157
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
1158
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
1159
+ let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
1148
1160
  // Demo Fallback for easy testing
1149
1161
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
1150
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1151
- if (fs.existsSync(demoPath)) {
1152
- filePath = demoPath;
1162
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
1163
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1164
+ if (fs.existsSync(demoParquetPath)) {
1165
+ filePath = demoParquetPath;
1166
+ }
1167
+ else if (fs.existsSync(demoCsvPath)) {
1168
+ filePath = demoCsvPath;
1153
1169
  }
1154
1170
  else if (datasetId !== "demo") {
1155
1171
  return {
@@ -1165,11 +1181,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1165
1181
  }
1166
1182
  case "preview_cleaning": {
1167
1183
  const datasetId = String(request.params.arguments?.dataset_id);
1168
- let filePath = path.join(dataRoot, "data", "raw", `${datasetId.replace(/\//g, "_")}.csv`);
1184
+ const safeId = datasetId.replace(/\//g, "_");
1185
+ const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
1186
+ const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
1187
+ let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
1169
1188
  if (datasetId === "demo" || !fs.existsSync(filePath)) {
1170
- const demoPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1171
- if (fs.existsSync(demoPath)) {
1172
- filePath = demoPath;
1189
+ const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
1190
+ const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
1191
+ if (fs.existsSync(demoParquetPath)) {
1192
+ filePath = demoParquetPath;
1193
+ }
1194
+ else if (fs.existsSync(demoCsvPath)) {
1195
+ filePath = demoCsvPath;
1173
1196
  }
1174
1197
  else {
1175
1198
  throw new McpError(ErrorCode.InvalidParams, `Local data file not found for ${datasetId}. Please run prepare_dataset first.`);
@@ -1291,7 +1314,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1291
1314
  // If NOT fast mode, run quality/cleaning pipeline first (only for csv/parquet compat)
1292
1315
  if (!fastMode) {
1293
1316
  const currentExt = path.extname(sourcePath).substring(1).toLowerCase();
1294
- const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "csv";
1317
+ const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "parquet";
1295
1318
  if (currentExt !== pipelineFmt) {
1296
1319
  console.error(`[Export] Running quality/cleaning pipeline (use fast=true to skip)...`);
1297
1320
  try {
@@ -152,7 +152,7 @@ export class DataIngestor {
152
152
  /**
153
153
  * Generates a safe local filename for a dataset ID
154
154
  */
155
- getTargetPath(datasetId, extension = "csv") {
155
+ getTargetPath(datasetId, extension = "parquet") {
156
156
  const safeId = datasetId.replace(/\//g, "_").replace(/:/g, "_");
157
157
  return path.join(this.rawDataDir, `${safeId}.${extension}`);
158
158
  }
package/package.json CHANGED
@@ -1,12 +1,13 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.10",
3
+ "version": "1.2.12",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
7
7
  "bin": {
8
8
  "vespermcp": "./build/index.js",
9
- "@vespermcp/mcp-server": "./build/index.js"
9
+ "@vespermcp/mcp-server": "./build/index.js",
10
+ "vesper-wizard": "src/scripts/wizard.js"
10
11
  },
11
12
  "files": [
12
13
  "build/**/*",
@@ -72,6 +73,7 @@
72
73
  "ajv": "^8.17.1",
73
74
  "ajv-formats": "^3.0.1",
74
75
  "better-sqlite3": "^12.6.0",
76
+ "inquirer": "^13.3.0",
75
77
  "lodash": "^4.17.21",
76
78
  "uuid": "^13.0.0",
77
79
  "zod": "^4.3.5",
@@ -91,5 +93,6 @@
91
93
  "tsx": "^4.21.0",
92
94
  "typescript": "^5.9.3",
93
95
  "vitest": "^4.0.17"
94
- }
96
+ },
97
+ "packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
95
98
  }
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env node
2
+
3
+ // Vesper Wizard CLI: Interactive setup for fast configuration
4
+ const inquirer = require('inquirer');
5
+ const fs = require('fs');
6
+ const path = require('path');
7
+
8
+ async function main() {
9
+ console.log('\nšŸ§™ Welcome to the Vesper Wizard!\n');
10
+
11
+ // Step 1: Project basics
12
+ const { projectName } = await inquirer.prompt([
13
+ {
14
+ type: 'input',
15
+ name: 'projectName',
16
+ message: 'Project name:',
17
+ default: path.basename(process.cwd()),
18
+ },
19
+ ]);
20
+
21
+ // Step 2: Data directory
22
+ const { dataDir } = await inquirer.prompt([
23
+ {
24
+ type: 'input',
25
+ name: 'dataDir',
26
+ message: 'Path to your data directory:',
27
+ default: './datasets',
28
+ },
29
+ ]);
30
+
31
+ // Step 3: Default export format
32
+ const { exportFormat } = await inquirer.prompt([
33
+ {
34
+ type: 'list',
35
+ name: 'exportFormat',
36
+ message: 'Default export format:',
37
+ choices: ['parquet', 'csv', 'feather'],
38
+ default: 'parquet',
39
+ },
40
+ ]);
41
+
42
+ // Step 4: Add tokens/credentials
43
+ const { addTokens } = await inquirer.prompt([
44
+ {
45
+ type: 'confirm',
46
+ name: 'addTokens',
47
+ message: 'Would you like to add API tokens or credentials now?',
48
+ default: true,
49
+ },
50
+ ]);
51
+ let tokens = {};
52
+ if (addTokens) {
53
+ const { kaggleToken } = await inquirer.prompt([
54
+ {
55
+ type: 'input',
56
+ name: 'kaggleToken',
57
+ message: 'Kaggle API token (leave blank to skip):',
58
+ },
59
+ ]);
60
+ if (kaggleToken) tokens.kaggle = kaggleToken;
61
+ // Add more tokens as needed
62
+ }
63
+
64
+ // Step 5: Write config file
65
+ const config = {
66
+ project: projectName,
67
+ dataDir,
68
+ exportFormat,
69
+ tokens,
70
+ };
71
+ const configPath = path.join(process.cwd(), 'vesper-mcp-config.json');
72
+ fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
73
+ console.log(`\nāœ… Configuration saved to ${configPath}`);
74
+ console.log('\nšŸŽ‰ Vesper is ready to use!\n');
75
+ }
76
+
77
+ main();