@vespermcp/mcp-server 1.2.28 → 1.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +48 -0
- package/build/python/cleaner.py +2 -0
- package/package.json +1 -1
- package/src/python/cleaner.py +2 -0
package/build/index.js
CHANGED
|
@@ -339,6 +339,36 @@ function ensureLocalPipelineSource(sourcePath, datasetId, targetDir) {
|
|
|
339
339
|
}
|
|
340
340
|
return stagedPath;
|
|
341
341
|
}
|
|
342
|
+
function cleanupIntermediateArtifacts(artifactPaths, finalOutputPath) {
|
|
343
|
+
const finalResolved = path.resolve(finalOutputPath);
|
|
344
|
+
const finalLineage = `${finalResolved}.lineage.json`;
|
|
345
|
+
for (const candidate of artifactPaths) {
|
|
346
|
+
if (!candidate)
|
|
347
|
+
continue;
|
|
348
|
+
const resolved = path.resolve(candidate);
|
|
349
|
+
if (resolved === finalResolved || resolved === finalLineage)
|
|
350
|
+
continue;
|
|
351
|
+
try {
|
|
352
|
+
if (fs.existsSync(resolved) && fs.statSync(resolved).isFile()) {
|
|
353
|
+
fs.unlinkSync(resolved);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
catch {
|
|
357
|
+
// Best-effort cleanup.
|
|
358
|
+
}
|
|
359
|
+
const sidecar = `${resolved}.lineage.json`;
|
|
360
|
+
if (sidecar === finalLineage)
|
|
361
|
+
continue;
|
|
362
|
+
try {
|
|
363
|
+
if (fs.existsSync(sidecar) && fs.statSync(sidecar).isFile()) {
|
|
364
|
+
fs.unlinkSync(sidecar);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
catch {
|
|
368
|
+
// Best-effort cleanup.
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
342
372
|
function resolveDatasetLocalPath(datasetIdOrPath, preferredDirs = []) {
|
|
343
373
|
if (fs.existsSync(datasetIdOrPath)) {
|
|
344
374
|
return ensureExportableLocalPath(datasetIdOrPath);
|
|
@@ -2898,6 +2928,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
2898
2928
|
case "export_dataset": {
|
|
2899
2929
|
const datasetId = String(request.params.arguments?.dataset_id);
|
|
2900
2930
|
const isDirectLocalInput = isDirectLocalDatasetReference(datasetId);
|
|
2931
|
+
const intermediateArtifacts = new Set();
|
|
2901
2932
|
const requestedTargetDir = request.params.arguments?.target_dir
|
|
2902
2933
|
? String(request.params.arguments?.target_dir).trim()
|
|
2903
2934
|
: request.params.arguments?.output_dir
|
|
@@ -2967,9 +2998,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
2967
2998
|
else if (currentExt !== pipelineFmt) {
|
|
2968
2999
|
console.error(`[Export] Running quality/cleaning pipeline (use fast=true to skip)...`);
|
|
2969
3000
|
try {
|
|
3001
|
+
const beforeStagingPath = sourcePath;
|
|
2970
3002
|
sourcePath = ensureLocalPipelineSource(sourcePath, datasetId, targetDir);
|
|
3003
|
+
if (path.resolve(beforeStagingPath) !== path.resolve(sourcePath)) {
|
|
3004
|
+
intermediateArtifacts.add(sourcePath);
|
|
3005
|
+
}
|
|
2971
3006
|
const pipelineResult = await pipelineExecutor.runPipeline(datasetId, sourcePath, pipelineFmt);
|
|
2972
3007
|
if (pipelineResult.final_output_path) {
|
|
3008
|
+
if (path.resolve(pipelineResult.final_output_path) !== path.resolve(sourcePath)) {
|
|
3009
|
+
intermediateArtifacts.add(pipelineResult.final_output_path);
|
|
3010
|
+
}
|
|
2973
3011
|
sourcePath = pipelineResult.final_output_path;
|
|
2974
3012
|
try {
|
|
2975
3013
|
// Update registry to point to pipeline's final output
|
|
@@ -3058,6 +3096,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
3058
3096
|
msg += ` Python: \`pd.read_parquet('${result.output_path}').head()\`\n`;
|
|
3059
3097
|
msg += ` DuckDB: \`SELECT * FROM '${result.output_path}' LIMIT 50;\`\n`;
|
|
3060
3098
|
}
|
|
3099
|
+
cleanupIntermediateArtifacts(intermediateArtifacts, result.output_path);
|
|
3061
3100
|
return { content: [{ type: "text", text: msg }] };
|
|
3062
3101
|
}
|
|
3063
3102
|
catch (error) {
|
|
@@ -3933,6 +3972,7 @@ async function runExportCli(args) {
|
|
|
3933
3972
|
const fastMode = args.includes("--fast");
|
|
3934
3973
|
const preview = args.includes("--preview");
|
|
3935
3974
|
const isDirectLocalInput = isDirectLocalDatasetReference(datasetId);
|
|
3975
|
+
const intermediateArtifacts = new Set();
|
|
3936
3976
|
const resolvedTargetDir = path.resolve(targetDir || process.cwd());
|
|
3937
3977
|
let sourcePath = resolveDatasetLocalPath(datasetId, [resolvedTargetDir, process.cwd()]);
|
|
3938
3978
|
if (!sourcePath) {
|
|
@@ -3954,9 +3994,16 @@ async function runExportCli(args) {
|
|
|
3954
3994
|
const pipelineCompatibleInput = currentExt === "csv" || currentExt === "parquet";
|
|
3955
3995
|
if (pipelineCompatibleInput && currentExt !== pipelineFmt) {
|
|
3956
3996
|
try {
|
|
3997
|
+
const beforeStagingPath = sourcePath;
|
|
3957
3998
|
sourcePath = ensureLocalPipelineSource(sourcePath, datasetId, resolvedTargetDir);
|
|
3999
|
+
if (path.resolve(beforeStagingPath) !== path.resolve(sourcePath)) {
|
|
4000
|
+
intermediateArtifacts.add(sourcePath);
|
|
4001
|
+
}
|
|
3958
4002
|
const pipelineResult = await pipelineExecutor.runPipeline(datasetId, sourcePath, pipelineFmt);
|
|
3959
4003
|
if (pipelineResult.final_output_path) {
|
|
4004
|
+
if (path.resolve(pipelineResult.final_output_path) !== path.resolve(sourcePath)) {
|
|
4005
|
+
intermediateArtifacts.add(pipelineResult.final_output_path);
|
|
4006
|
+
}
|
|
3960
4007
|
sourcePath = pipelineResult.final_output_path;
|
|
3961
4008
|
if (!isDirectLocalInput && shouldTrackExportPath(sourcePath)) {
|
|
3962
4009
|
upsertRegistry(datasetId, sourcePath, "completed");
|
|
@@ -3987,6 +4034,7 @@ async function runExportCli(args) {
|
|
|
3987
4034
|
console.error(`[Export] Resolved output directory: ${outDir}`);
|
|
3988
4035
|
console.error(`[Export] Output file: ${outputFile}`);
|
|
3989
4036
|
const result = await dataExporter.export(sourcePath, outputFile, requestedFormat, exportOpts);
|
|
4037
|
+
cleanupIntermediateArtifacts(intermediateArtifacts, result.output_path);
|
|
3990
4038
|
console.log(`Export complete: ${result.output_path}`);
|
|
3991
4039
|
console.log(`Format: ${result.format}${result.compression ? ` (${result.compression})` : ""}`);
|
|
3992
4040
|
if (result.rows !== undefined)
|
package/build/python/cleaner.py
CHANGED
|
@@ -182,6 +182,8 @@ def main():
|
|
|
182
182
|
output_format = "parquet"
|
|
183
183
|
|
|
184
184
|
base_name = file_path.rsplit(".", 1)[0]
|
|
185
|
+
if base_name.endswith("_cleaned"):
|
|
186
|
+
base_name = base_name[:-8]
|
|
185
187
|
if output_format == "csv":
|
|
186
188
|
output_path = f"{base_name}_cleaned.csv"
|
|
187
189
|
# Stringify ANY column that might not be CSV-safe (List, Struct, Object, etc.)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.29",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
package/src/python/cleaner.py
CHANGED
|
@@ -182,6 +182,8 @@ def main():
|
|
|
182
182
|
output_format = "parquet"
|
|
183
183
|
|
|
184
184
|
base_name = file_path.rsplit(".", 1)[0]
|
|
185
|
+
if base_name.endswith("_cleaned"):
|
|
186
|
+
base_name = base_name[:-8]
|
|
185
187
|
if output_format == "csv":
|
|
186
188
|
output_path = f"{base_name}_cleaned.csv"
|
|
187
189
|
# Stringify ANY column that might not be CSV-safe (List, Struct, Object, etc.)
|