@vespermcp/mcp-server 1.2.28 → 1.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -339,6 +339,36 @@ function ensureLocalPipelineSource(sourcePath, datasetId, targetDir) {
339
339
  }
340
340
  return stagedPath;
341
341
  }
342
+ function cleanupIntermediateArtifacts(artifactPaths, finalOutputPath) {
343
+ const finalResolved = path.resolve(finalOutputPath);
344
+ const finalLineage = `${finalResolved}.lineage.json`;
345
+ for (const candidate of artifactPaths) {
346
+ if (!candidate)
347
+ continue;
348
+ const resolved = path.resolve(candidate);
349
+ if (resolved === finalResolved || resolved === finalLineage)
350
+ continue;
351
+ try {
352
+ if (fs.existsSync(resolved) && fs.statSync(resolved).isFile()) {
353
+ fs.unlinkSync(resolved);
354
+ }
355
+ }
356
+ catch {
357
+ // Best-effort cleanup.
358
+ }
359
+ const sidecar = `${resolved}.lineage.json`;
360
+ if (sidecar === finalLineage)
361
+ continue;
362
+ try {
363
+ if (fs.existsSync(sidecar) && fs.statSync(sidecar).isFile()) {
364
+ fs.unlinkSync(sidecar);
365
+ }
366
+ }
367
+ catch {
368
+ // Best-effort cleanup.
369
+ }
370
+ }
371
+ }
342
372
  function resolveDatasetLocalPath(datasetIdOrPath, preferredDirs = []) {
343
373
  if (fs.existsSync(datasetIdOrPath)) {
344
374
  return ensureExportableLocalPath(datasetIdOrPath);
@@ -2898,6 +2928,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2898
2928
  case "export_dataset": {
2899
2929
  const datasetId = String(request.params.arguments?.dataset_id);
2900
2930
  const isDirectLocalInput = isDirectLocalDatasetReference(datasetId);
2931
+ const intermediateArtifacts = new Set();
2901
2932
  const requestedTargetDir = request.params.arguments?.target_dir
2902
2933
  ? String(request.params.arguments?.target_dir).trim()
2903
2934
  : request.params.arguments?.output_dir
@@ -2967,9 +2998,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2967
2998
  else if (currentExt !== pipelineFmt) {
2968
2999
  console.error(`[Export] Running quality/cleaning pipeline (use fast=true to skip)...`);
2969
3000
  try {
3001
+ const beforeStagingPath = sourcePath;
2970
3002
  sourcePath = ensureLocalPipelineSource(sourcePath, datasetId, targetDir);
3003
+ if (path.resolve(beforeStagingPath) !== path.resolve(sourcePath)) {
3004
+ intermediateArtifacts.add(sourcePath);
3005
+ }
2971
3006
  const pipelineResult = await pipelineExecutor.runPipeline(datasetId, sourcePath, pipelineFmt);
2972
3007
  if (pipelineResult.final_output_path) {
3008
+ if (path.resolve(pipelineResult.final_output_path) !== path.resolve(sourcePath)) {
3009
+ intermediateArtifacts.add(pipelineResult.final_output_path);
3010
+ }
2973
3011
  sourcePath = pipelineResult.final_output_path;
2974
3012
  try {
2975
3013
  // Update registry to point to pipeline's final output
@@ -3058,6 +3096,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
3058
3096
  msg += ` Python: \`pd.read_parquet('${result.output_path}').head()\`\n`;
3059
3097
  msg += ` DuckDB: \`SELECT * FROM '${result.output_path}' LIMIT 50;\`\n`;
3060
3098
  }
3099
+ cleanupIntermediateArtifacts(intermediateArtifacts, result.output_path);
3061
3100
  return { content: [{ type: "text", text: msg }] };
3062
3101
  }
3063
3102
  catch (error) {
@@ -3933,6 +3972,7 @@ async function runExportCli(args) {
3933
3972
  const fastMode = args.includes("--fast");
3934
3973
  const preview = args.includes("--preview");
3935
3974
  const isDirectLocalInput = isDirectLocalDatasetReference(datasetId);
3975
+ const intermediateArtifacts = new Set();
3936
3976
  const resolvedTargetDir = path.resolve(targetDir || process.cwd());
3937
3977
  let sourcePath = resolveDatasetLocalPath(datasetId, [resolvedTargetDir, process.cwd()]);
3938
3978
  if (!sourcePath) {
@@ -3954,9 +3994,16 @@ async function runExportCli(args) {
3954
3994
  const pipelineCompatibleInput = currentExt === "csv" || currentExt === "parquet";
3955
3995
  if (pipelineCompatibleInput && currentExt !== pipelineFmt) {
3956
3996
  try {
3997
+ const beforeStagingPath = sourcePath;
3957
3998
  sourcePath = ensureLocalPipelineSource(sourcePath, datasetId, resolvedTargetDir);
3999
+ if (path.resolve(beforeStagingPath) !== path.resolve(sourcePath)) {
4000
+ intermediateArtifacts.add(sourcePath);
4001
+ }
3958
4002
  const pipelineResult = await pipelineExecutor.runPipeline(datasetId, sourcePath, pipelineFmt);
3959
4003
  if (pipelineResult.final_output_path) {
4004
+ if (path.resolve(pipelineResult.final_output_path) !== path.resolve(sourcePath)) {
4005
+ intermediateArtifacts.add(pipelineResult.final_output_path);
4006
+ }
3960
4007
  sourcePath = pipelineResult.final_output_path;
3961
4008
  if (!isDirectLocalInput && shouldTrackExportPath(sourcePath)) {
3962
4009
  upsertRegistry(datasetId, sourcePath, "completed");
@@ -3987,6 +4034,7 @@ async function runExportCli(args) {
3987
4034
  console.error(`[Export] Resolved output directory: ${outDir}`);
3988
4035
  console.error(`[Export] Output file: ${outputFile}`);
3989
4036
  const result = await dataExporter.export(sourcePath, outputFile, requestedFormat, exportOpts);
4037
+ cleanupIntermediateArtifacts(intermediateArtifacts, result.output_path);
3990
4038
  console.log(`Export complete: ${result.output_path}`);
3991
4039
  console.log(`Format: ${result.format}${result.compression ? ` (${result.compression})` : ""}`);
3992
4040
  if (result.rows !== undefined)
@@ -182,6 +182,8 @@ def main():
182
182
  output_format = "parquet"
183
183
 
184
184
  base_name = file_path.rsplit(".", 1)[0]
185
+ if base_name.endswith("_cleaned"):
186
+ base_name = base_name[:-8]
185
187
  if output_format == "csv":
186
188
  output_path = f"{base_name}_cleaned.csv"
187
189
  # Stringify ANY column that might not be CSV-safe (List, Struct, Object, etc.)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.28",
3
+ "version": "1.2.29",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
@@ -182,6 +182,8 @@ def main():
182
182
  output_format = "parquet"
183
183
 
184
184
  base_name = file_path.rsplit(".", 1)[0]
185
+ if base_name.endswith("_cleaned"):
186
+ base_name = base_name[:-8]
185
187
  if output_format == "csv":
186
188
  output_path = f"{base_name}_cleaned.csv"
187
189
  # Stringify ANY column that might not be CSV-safe (List, Struct, Object, etc.)