@vespermcp/mcp-server 1.2.6 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -1019,9 +1019,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1019
1019
  if (source === "kaggle") {
1020
1020
  requiredModules.push({ module: "kaggle", packageName: "kaggle" });
1021
1021
  }
1022
- if (outputFormat === "webdataset") {
1023
- requiredModules.push({ module: "webdataset", packageName: "webdataset" });
1024
- }
1025
1022
  try {
1026
1023
  await ensurePythonModules(requiredModules);
1027
1024
  }
@@ -1588,7 +1585,7 @@ async function main() {
1588
1585
  const isFuse = args.includes("fuse");
1589
1586
  const isDiscover = args.includes("discover");
1590
1587
  const isDownload = args.includes("download");
1591
- const isConfig = args.includes("config");
1588
+ const isConfig = args.includes("config") || args.includes("configure");
1592
1589
  const isSetup = args.includes("--setup") || args.includes("setup");
1593
1590
  const isSilent = args.includes("--silent");
1594
1591
  if (process.stdin.isTTY && !isSilent) {
@@ -1630,7 +1627,9 @@ async function runConfigCli(args) {
1630
1627
  const isKaggle = args.includes("kaggle");
1631
1628
  if (!(isKeys || isKaggle) || args.includes("--help")) {
1632
1629
  console.log("Usage: vespermcp config keys");
1630
+ console.log(" vespermcp configure keys");
1633
1631
  console.log(" vespermcp config kaggle --username <name> --key <api_key>");
1632
+ console.log(" vespermcp configure kaggle --username <name> --key <api_key>");
1634
1633
  console.log("Core Vesper tools work with zero API keys.");
1635
1634
  return;
1636
1635
  }
@@ -127,12 +127,6 @@ class AssetDownloader:
127
127
  if not urls:
128
128
  raise ValueError("urls are required for source=url")
129
129
 
130
- if output_format == "webdataset" and wds is None:
131
- raise RuntimeError(
132
- "webdataset package is required for webdataset output. "
133
- "Install with: pip install webdataset"
134
- )
135
-
136
130
  # --- Now safe to create directories ---
137
131
  dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
138
132
  images_dir = dataset_dir / "images"
@@ -380,24 +374,59 @@ class AssetDownloader:
380
374
  raise ValueError(f"Unsupported image value type: {type(value)}")
381
375
 
382
376
  async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
383
- if wds is None:
384
- raise RuntimeError("webdataset package is required for webdataset output. Install with: pip install webdataset")
377
+ """Write a webdataset-compatible tar archive.
378
+
379
+ Uses Python's built-in tarfile module instead of wds.ShardWriter to
380
+ avoid the gopen() handler issue on Windows (backslash paths).
381
+ The resulting .tar files are fully compatible with webdataset readers.
382
+ """
383
+ import io
384
+ import tarfile as _tarfile
385
+
386
+ max_per_shard = 5000
387
+ shard_idx = 0
388
+ count_in_shard = 0
389
+ current_tar: _tarfile.TarFile | None = None
390
+
391
+ def _open_shard() -> _tarfile.TarFile:
392
+ nonlocal shard_idx
393
+ shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
394
+ shard_idx += 1
395
+ return _tarfile.open(str(shard_path), "w")
385
396
 
386
- shard_pattern = str(dataset_dir / "shard-%06d.tar")
387
- with metadata_file.open("r", encoding="utf-8") as mf, wds.ShardWriter(shard_pattern, maxcount=5000) as sink:
388
- for line in mf:
389
- row = json.loads(line)
390
- image_path = Path(row["image_path"])
391
- if not image_path.exists():
392
- continue
393
- key = image_path.stem
394
- ext = image_path.suffix.lstrip(".") or "jpg"
395
- sample = {
396
- "__key__": key,
397
- ext: image_path.read_bytes(),
398
- "json": json.dumps(row, ensure_ascii=False).encode("utf-8"),
399
- }
400
- sink.write(sample)
397
+ try:
398
+ current_tar = _open_shard()
399
+
400
+ with metadata_file.open("r", encoding="utf-8") as mf:
401
+ for line in mf:
402
+ row = json.loads(line)
403
+ image_path = Path(row["image_path"])
404
+ if not image_path.exists():
405
+ continue
406
+
407
+ key = image_path.stem
408
+ ext = image_path.suffix.lstrip(".") or "jpg"
409
+
410
+ # Add image file
411
+ img_data = image_path.read_bytes()
412
+ img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
413
+ img_info.size = len(img_data)
414
+ current_tar.addfile(img_info, io.BytesIO(img_data))
415
+
416
+ # Add JSON metadata sidecar
417
+ json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
418
+ json_info = _tarfile.TarInfo(name=f"{key}.json")
419
+ json_info.size = len(json_data)
420
+ current_tar.addfile(json_info, io.BytesIO(json_data))
421
+
422
+ count_in_shard += 1
423
+ if count_in_shard >= max_per_shard:
424
+ current_tar.close()
425
+ current_tar = _open_shard()
426
+ count_in_shard = 0
427
+ finally:
428
+ if current_tar is not None:
429
+ current_tar.close()
401
430
 
402
431
  async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
403
432
  try:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.6",
3
+ "version": "1.2.8",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
@@ -127,12 +127,6 @@ class AssetDownloader:
127
127
  if not urls:
128
128
  raise ValueError("urls are required for source=url")
129
129
 
130
- if output_format == "webdataset" and wds is None:
131
- raise RuntimeError(
132
- "webdataset package is required for webdataset output. "
133
- "Install with: pip install webdataset"
134
- )
135
-
136
130
  # --- Now safe to create directories ---
137
131
  dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
138
132
  images_dir = dataset_dir / "images"
@@ -380,24 +374,59 @@ class AssetDownloader:
380
374
  raise ValueError(f"Unsupported image value type: {type(value)}")
381
375
 
382
376
  async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
383
- if wds is None:
384
- raise RuntimeError("webdataset package is required for webdataset output. Install with: pip install webdataset")
377
+ """Write a webdataset-compatible tar archive.
378
+
379
+ Uses Python's built-in tarfile module instead of wds.ShardWriter to
380
+ avoid the gopen() handler issue on Windows (backslash paths).
381
+ The resulting .tar files are fully compatible with webdataset readers.
382
+ """
383
+ import io
384
+ import tarfile as _tarfile
385
+
386
+ max_per_shard = 5000
387
+ shard_idx = 0
388
+ count_in_shard = 0
389
+ current_tar: _tarfile.TarFile | None = None
390
+
391
+ def _open_shard() -> _tarfile.TarFile:
392
+ nonlocal shard_idx
393
+ shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
394
+ shard_idx += 1
395
+ return _tarfile.open(str(shard_path), "w")
385
396
 
386
- shard_pattern = str(dataset_dir / "shard-%06d.tar")
387
- with metadata_file.open("r", encoding="utf-8") as mf, wds.ShardWriter(shard_pattern, maxcount=5000) as sink:
388
- for line in mf:
389
- row = json.loads(line)
390
- image_path = Path(row["image_path"])
391
- if not image_path.exists():
392
- continue
393
- key = image_path.stem
394
- ext = image_path.suffix.lstrip(".") or "jpg"
395
- sample = {
396
- "__key__": key,
397
- ext: image_path.read_bytes(),
398
- "json": json.dumps(row, ensure_ascii=False).encode("utf-8"),
399
- }
400
- sink.write(sample)
397
+ try:
398
+ current_tar = _open_shard()
399
+
400
+ with metadata_file.open("r", encoding="utf-8") as mf:
401
+ for line in mf:
402
+ row = json.loads(line)
403
+ image_path = Path(row["image_path"])
404
+ if not image_path.exists():
405
+ continue
406
+
407
+ key = image_path.stem
408
+ ext = image_path.suffix.lstrip(".") or "jpg"
409
+
410
+ # Add image file
411
+ img_data = image_path.read_bytes()
412
+ img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
413
+ img_info.size = len(img_data)
414
+ current_tar.addfile(img_info, io.BytesIO(img_data))
415
+
416
+ # Add JSON metadata sidecar
417
+ json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
418
+ json_info = _tarfile.TarInfo(name=f"{key}.json")
419
+ json_info.size = len(json_data)
420
+ current_tar.addfile(json_info, io.BytesIO(json_data))
421
+
422
+ count_in_shard += 1
423
+ if count_in_shard >= max_per_shard:
424
+ current_tar.close()
425
+ current_tar = _open_shard()
426
+ count_in_shard = 0
427
+ finally:
428
+ if current_tar is not None:
429
+ current_tar.close()
401
430
 
402
431
  async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
403
432
  try: