@vespermcp/mcp-server 1.2.10 ā 1.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -63,7 +63,13 @@ export class DataCleaner {
|
|
|
63
63
|
reject(new Error(result.error));
|
|
64
64
|
}
|
|
65
65
|
else {
|
|
66
|
-
resolve(
|
|
66
|
+
resolve({
|
|
67
|
+
success: true,
|
|
68
|
+
rows_affected: Number(result.rows_affected ?? 0),
|
|
69
|
+
columns_affected: Number(result.columns_affected ?? 0),
|
|
70
|
+
output_path: result.output_path,
|
|
71
|
+
logs: Array.isArray(result.logs) ? result.logs : [],
|
|
72
|
+
});
|
|
67
73
|
}
|
|
68
74
|
}
|
|
69
75
|
catch (e) {
|
|
@@ -14,7 +14,7 @@ export class PipelineExecutor {
|
|
|
14
14
|
/**
|
|
15
15
|
* Run the full Auto-Cleaning Pipeline on a dataset file
|
|
16
16
|
*/
|
|
17
|
-
async runPipeline(datasetId, filePath, outputFormat = "
|
|
17
|
+
async runPipeline(datasetId, filePath, outputFormat = "parquet", onProgress) {
|
|
18
18
|
// ... (logging setup)
|
|
19
19
|
const log = (m) => {
|
|
20
20
|
console.error(`[Pipeline] ${m}`);
|
|
@@ -26,9 +26,10 @@ export class PipelineExecutor {
|
|
|
26
26
|
// 2. Generate Plan
|
|
27
27
|
log(`Generating cleaning plan...`);
|
|
28
28
|
const plan = await this.planner.generatePlan(datasetId, qualityReport);
|
|
29
|
+
const rules = plan.operations;
|
|
29
30
|
// If no cleaning needed, we still might need format conversion
|
|
30
31
|
const needsConversion = !filePath.toLowerCase().endsWith(`.${outputFormat}`);
|
|
31
|
-
if (
|
|
32
|
+
if (rules.length === 0 && !needsConversion) {
|
|
32
33
|
log(`No cleaning or conversion needed.`);
|
|
33
34
|
return {
|
|
34
35
|
initial_quality: qualityReport,
|
|
@@ -42,9 +43,9 @@ export class PipelineExecutor {
|
|
|
42
43
|
};
|
|
43
44
|
}
|
|
44
45
|
// 3. Execute Plan (includes conversion if requested)
|
|
45
|
-
log(`Executing ${
|
|
46
|
-
|
|
47
|
-
const cleaningResult = await this.cleaner.clean(filePath,
|
|
46
|
+
log(`Executing ${rules.length} operations (Format: ${outputFormat})...`);
|
|
47
|
+
rules.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
|
|
48
|
+
const cleaningResult = await this.cleaner.clean(filePath, rules, outputFormat);
|
|
48
49
|
if (cleaningResult.success) {
|
|
49
50
|
log(`Cleaning complete. Output: ${cleaningResult.output_path}`);
|
|
50
51
|
}
|
|
@@ -110,6 +110,10 @@ export class CleaningPlanner {
|
|
|
110
110
|
}
|
|
111
111
|
return plan;
|
|
112
112
|
}
|
|
113
|
+
async generateRules(datasetId, report, ruleSet, targetInfo) {
|
|
114
|
+
const plan = await this.generatePlan(datasetId, report, ruleSet, targetInfo);
|
|
115
|
+
return plan.operations;
|
|
116
|
+
}
|
|
113
117
|
shouldFixType(col) {
|
|
114
118
|
if (col.inferred_type && col.inferred_type.includes("Numeric") && (col.type.includes("String") || col.type.includes("Utf8"))) {
|
|
115
119
|
return true;
|
package/build/index.js
CHANGED
|
@@ -469,11 +469,20 @@ async function handlePrepareJob(jobId, query, requirements) {
|
|
|
469
469
|
*/
|
|
470
470
|
async function handleCleanJob(jobId, datasetId, ops) {
|
|
471
471
|
const update = (updates) => jobManager.updateJob(jobId, updates);
|
|
472
|
-
|
|
472
|
+
const safeId = datasetId.replace(/\//g, "_");
|
|
473
|
+
const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
|
|
474
|
+
const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
|
|
475
|
+
let filePath = parquetPath;
|
|
476
|
+
if (!fs.existsSync(filePath)) {
|
|
477
|
+
filePath = csvPath;
|
|
478
|
+
}
|
|
473
479
|
if (datasetId === "demo" || !fs.existsSync(filePath)) {
|
|
474
|
-
const
|
|
475
|
-
|
|
476
|
-
|
|
480
|
+
const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
|
|
481
|
+
const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
|
|
482
|
+
if (fs.existsSync(demoParquetPath))
|
|
483
|
+
filePath = demoParquetPath;
|
|
484
|
+
else if (fs.existsSync(demoCsvPath))
|
|
485
|
+
filePath = demoCsvPath;
|
|
477
486
|
else
|
|
478
487
|
throw new Error(`Data file not found for ${datasetId}`);
|
|
479
488
|
}
|
|
@@ -714,7 +723,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
714
723
|
},
|
|
715
724
|
{
|
|
716
725
|
name: "export_dataset",
|
|
717
|
-
description: "Export a dataset to a local directory. Use format='
|
|
726
|
+
description: "Export a dataset to a local directory. Use format='parquet' (default) for efficient analytics and broad interoperability. Add fast=true to skip quality/cleaning steps.",
|
|
718
727
|
inputSchema: {
|
|
719
728
|
type: "object",
|
|
720
729
|
properties: {
|
|
@@ -729,7 +738,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
729
738
|
format: {
|
|
730
739
|
type: "string",
|
|
731
740
|
enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
|
|
732
|
-
description: "Output format.
|
|
741
|
+
description: "Output format. parquet (default, analytics-friendly), feather (fast local IO), csv (human-readable).",
|
|
733
742
|
},
|
|
734
743
|
compression: {
|
|
735
744
|
type: "string",
|
|
@@ -800,7 +809,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
800
809
|
output_format: {
|
|
801
810
|
type: "string",
|
|
802
811
|
enum: ["feather", "parquet", "csv", "jsonl", "arrow"],
|
|
803
|
-
description: "Output format (default:
|
|
812
|
+
description: "Output format (default: parquet).",
|
|
804
813
|
},
|
|
805
814
|
compression: {
|
|
806
815
|
type: "string",
|
|
@@ -1144,12 +1153,19 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1144
1153
|
}
|
|
1145
1154
|
case "analyze_quality": {
|
|
1146
1155
|
const datasetId = String(request.params.arguments?.dataset_id);
|
|
1147
|
-
|
|
1156
|
+
const safeId = datasetId.replace(/\//g, "_");
|
|
1157
|
+
const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
|
|
1158
|
+
const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
|
|
1159
|
+
let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
|
|
1148
1160
|
// Demo Fallback for easy testing
|
|
1149
1161
|
if (datasetId === "demo" || !fs.existsSync(filePath)) {
|
|
1150
|
-
const
|
|
1151
|
-
|
|
1152
|
-
|
|
1162
|
+
const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
|
|
1163
|
+
const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
|
|
1164
|
+
if (fs.existsSync(demoParquetPath)) {
|
|
1165
|
+
filePath = demoParquetPath;
|
|
1166
|
+
}
|
|
1167
|
+
else if (fs.existsSync(demoCsvPath)) {
|
|
1168
|
+
filePath = demoCsvPath;
|
|
1153
1169
|
}
|
|
1154
1170
|
else if (datasetId !== "demo") {
|
|
1155
1171
|
return {
|
|
@@ -1165,11 +1181,18 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1165
1181
|
}
|
|
1166
1182
|
case "preview_cleaning": {
|
|
1167
1183
|
const datasetId = String(request.params.arguments?.dataset_id);
|
|
1168
|
-
|
|
1184
|
+
const safeId = datasetId.replace(/\//g, "_");
|
|
1185
|
+
const parquetPath = path.join(dataRoot, "data", "raw", `${safeId}.parquet`);
|
|
1186
|
+
const csvPath = path.join(dataRoot, "data", "raw", `${safeId}.csv`);
|
|
1187
|
+
let filePath = fs.existsSync(parquetPath) ? parquetPath : csvPath;
|
|
1169
1188
|
if (datasetId === "demo" || !fs.existsSync(filePath)) {
|
|
1170
|
-
const
|
|
1171
|
-
|
|
1172
|
-
|
|
1189
|
+
const demoParquetPath = path.join(dataRoot, "e2e_demo_output", "raw_data.parquet");
|
|
1190
|
+
const demoCsvPath = path.join(dataRoot, "e2e_demo_output", "raw_data.csv");
|
|
1191
|
+
if (fs.existsSync(demoParquetPath)) {
|
|
1192
|
+
filePath = demoParquetPath;
|
|
1193
|
+
}
|
|
1194
|
+
else if (fs.existsSync(demoCsvPath)) {
|
|
1195
|
+
filePath = demoCsvPath;
|
|
1173
1196
|
}
|
|
1174
1197
|
else {
|
|
1175
1198
|
throw new McpError(ErrorCode.InvalidParams, `Local data file not found for ${datasetId}. Please run prepare_dataset first.`);
|
|
@@ -1291,7 +1314,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1291
1314
|
// If NOT fast mode, run quality/cleaning pipeline first (only for csv/parquet compat)
|
|
1292
1315
|
if (!fastMode) {
|
|
1293
1316
|
const currentExt = path.extname(sourcePath).substring(1).toLowerCase();
|
|
1294
|
-
const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "
|
|
1317
|
+
const pipelineFmt = (requestedFormat === "csv" || requestedFormat === "parquet") ? requestedFormat : "parquet";
|
|
1295
1318
|
if (currentExt !== pipelineFmt) {
|
|
1296
1319
|
console.error(`[Export] Running quality/cleaning pipeline (use fast=true to skip)...`);
|
|
1297
1320
|
try {
|
|
@@ -152,7 +152,7 @@ export class DataIngestor {
|
|
|
152
152
|
/**
|
|
153
153
|
* Generates a safe local filename for a dataset ID
|
|
154
154
|
*/
|
|
155
|
-
getTargetPath(datasetId, extension = "
|
|
155
|
+
getTargetPath(datasetId, extension = "parquet") {
|
|
156
156
|
const safeId = datasetId.replace(/\//g, "_").replace(/:/g, "_");
|
|
157
157
|
return path.join(this.rawDataDir, `${safeId}.${extension}`);
|
|
158
158
|
}
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.12",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
|
7
7
|
"bin": {
|
|
8
8
|
"vespermcp": "./build/index.js",
|
|
9
|
-
"@vespermcp/mcp-server": "./build/index.js"
|
|
9
|
+
"@vespermcp/mcp-server": "./build/index.js",
|
|
10
|
+
"vesper-wizard": "src/scripts/wizard.js"
|
|
10
11
|
},
|
|
11
12
|
"files": [
|
|
12
13
|
"build/**/*",
|
|
@@ -72,6 +73,7 @@
|
|
|
72
73
|
"ajv": "^8.17.1",
|
|
73
74
|
"ajv-formats": "^3.0.1",
|
|
74
75
|
"better-sqlite3": "^12.6.0",
|
|
76
|
+
"inquirer": "^13.3.0",
|
|
75
77
|
"lodash": "^4.17.21",
|
|
76
78
|
"uuid": "^13.0.0",
|
|
77
79
|
"zod": "^4.3.5",
|
|
@@ -91,5 +93,6 @@
|
|
|
91
93
|
"tsx": "^4.21.0",
|
|
92
94
|
"typescript": "^5.9.3",
|
|
93
95
|
"vitest": "^4.0.17"
|
|
94
|
-
}
|
|
96
|
+
},
|
|
97
|
+
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
|
|
95
98
|
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// Vesper Wizard CLI: Interactive setup for fast configuration
|
|
4
|
+
const inquirer = require('inquirer');
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
|
|
8
|
+
async function main() {
|
|
9
|
+
console.log('\nš§ Welcome to the Vesper Wizard!\n');
|
|
10
|
+
|
|
11
|
+
// Step 1: Project basics
|
|
12
|
+
const { projectName } = await inquirer.prompt([
|
|
13
|
+
{
|
|
14
|
+
type: 'input',
|
|
15
|
+
name: 'projectName',
|
|
16
|
+
message: 'Project name:',
|
|
17
|
+
default: path.basename(process.cwd()),
|
|
18
|
+
},
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
// Step 2: Data directory
|
|
22
|
+
const { dataDir } = await inquirer.prompt([
|
|
23
|
+
{
|
|
24
|
+
type: 'input',
|
|
25
|
+
name: 'dataDir',
|
|
26
|
+
message: 'Path to your data directory:',
|
|
27
|
+
default: './datasets',
|
|
28
|
+
},
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
// Step 3: Default export format
|
|
32
|
+
const { exportFormat } = await inquirer.prompt([
|
|
33
|
+
{
|
|
34
|
+
type: 'list',
|
|
35
|
+
name: 'exportFormat',
|
|
36
|
+
message: 'Default export format:',
|
|
37
|
+
choices: ['parquet', 'csv', 'feather'],
|
|
38
|
+
default: 'parquet',
|
|
39
|
+
},
|
|
40
|
+
]);
|
|
41
|
+
|
|
42
|
+
// Step 4: Add tokens/credentials
|
|
43
|
+
const { addTokens } = await inquirer.prompt([
|
|
44
|
+
{
|
|
45
|
+
type: 'confirm',
|
|
46
|
+
name: 'addTokens',
|
|
47
|
+
message: 'Would you like to add API tokens or credentials now?',
|
|
48
|
+
default: true,
|
|
49
|
+
},
|
|
50
|
+
]);
|
|
51
|
+
let tokens = {};
|
|
52
|
+
if (addTokens) {
|
|
53
|
+
const { kaggleToken } = await inquirer.prompt([
|
|
54
|
+
{
|
|
55
|
+
type: 'input',
|
|
56
|
+
name: 'kaggleToken',
|
|
57
|
+
message: 'Kaggle API token (leave blank to skip):',
|
|
58
|
+
},
|
|
59
|
+
]);
|
|
60
|
+
if (kaggleToken) tokens.kaggle = kaggleToken;
|
|
61
|
+
// Add more tokens as needed
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Step 5: Write config file
|
|
65
|
+
const config = {
|
|
66
|
+
project: projectName,
|
|
67
|
+
dataDir,
|
|
68
|
+
exportFormat,
|
|
69
|
+
tokens,
|
|
70
|
+
};
|
|
71
|
+
const configPath = path.join(process.cwd(), 'vesper-mcp-config.json');
|
|
72
|
+
fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
|
|
73
|
+
console.log(`\nā
Configuration saved to ${configPath}`);
|
|
74
|
+
console.log('\nš Vesper is ready to use!\n');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
main();
|