@vespermcp/mcp-server 1.2.6 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -1019,9 +1019,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1019
1019
|
if (source === "kaggle") {
|
|
1020
1020
|
requiredModules.push({ module: "kaggle", packageName: "kaggle" });
|
|
1021
1021
|
}
|
|
1022
|
-
if (outputFormat === "webdataset") {
|
|
1023
|
-
requiredModules.push({ module: "webdataset", packageName: "webdataset" });
|
|
1024
|
-
}
|
|
1025
1022
|
try {
|
|
1026
1023
|
await ensurePythonModules(requiredModules);
|
|
1027
1024
|
}
|
|
Binary file
|
|
@@ -127,12 +127,6 @@ class AssetDownloader:
|
|
|
127
127
|
if not urls:
|
|
128
128
|
raise ValueError("urls are required for source=url")
|
|
129
129
|
|
|
130
|
-
if output_format == "webdataset" and wds is None:
|
|
131
|
-
raise RuntimeError(
|
|
132
|
-
"webdataset package is required for webdataset output. "
|
|
133
|
-
"Install with: pip install webdataset"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
130
|
# --- Now safe to create directories ---
|
|
137
131
|
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
132
|
images_dir = dataset_dir / "images"
|
|
@@ -380,24 +374,59 @@ class AssetDownloader:
|
|
|
380
374
|
raise ValueError(f"Unsupported image value type: {type(value)}")
|
|
381
375
|
|
|
382
376
|
async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
|
|
383
|
-
|
|
384
|
-
|
|
377
|
+
"""Write a webdataset-compatible tar archive.
|
|
378
|
+
|
|
379
|
+
Uses Python's built-in tarfile module instead of wds.ShardWriter to
|
|
380
|
+
avoid the gopen() handler issue on Windows (backslash paths).
|
|
381
|
+
The resulting .tar files are fully compatible with webdataset readers.
|
|
382
|
+
"""
|
|
383
|
+
import io
|
|
384
|
+
import tarfile as _tarfile
|
|
385
|
+
|
|
386
|
+
max_per_shard = 5000
|
|
387
|
+
shard_idx = 0
|
|
388
|
+
count_in_shard = 0
|
|
389
|
+
current_tar: _tarfile.TarFile | None = None
|
|
390
|
+
|
|
391
|
+
def _open_shard() -> _tarfile.TarFile:
|
|
392
|
+
nonlocal shard_idx
|
|
393
|
+
shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
|
|
394
|
+
shard_idx += 1
|
|
395
|
+
return _tarfile.open(str(shard_path), "w")
|
|
385
396
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
ext
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
397
|
+
try:
|
|
398
|
+
current_tar = _open_shard()
|
|
399
|
+
|
|
400
|
+
with metadata_file.open("r", encoding="utf-8") as mf:
|
|
401
|
+
for line in mf:
|
|
402
|
+
row = json.loads(line)
|
|
403
|
+
image_path = Path(row["image_path"])
|
|
404
|
+
if not image_path.exists():
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
key = image_path.stem
|
|
408
|
+
ext = image_path.suffix.lstrip(".") or "jpg"
|
|
409
|
+
|
|
410
|
+
# Add image file
|
|
411
|
+
img_data = image_path.read_bytes()
|
|
412
|
+
img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
|
|
413
|
+
img_info.size = len(img_data)
|
|
414
|
+
current_tar.addfile(img_info, io.BytesIO(img_data))
|
|
415
|
+
|
|
416
|
+
# Add JSON metadata sidecar
|
|
417
|
+
json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
|
|
418
|
+
json_info = _tarfile.TarInfo(name=f"{key}.json")
|
|
419
|
+
json_info.size = len(json_data)
|
|
420
|
+
current_tar.addfile(json_info, io.BytesIO(json_data))
|
|
421
|
+
|
|
422
|
+
count_in_shard += 1
|
|
423
|
+
if count_in_shard >= max_per_shard:
|
|
424
|
+
current_tar.close()
|
|
425
|
+
current_tar = _open_shard()
|
|
426
|
+
count_in_shard = 0
|
|
427
|
+
finally:
|
|
428
|
+
if current_tar is not None:
|
|
429
|
+
current_tar.close()
|
|
401
430
|
|
|
402
431
|
async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
|
|
403
432
|
try:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.7",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
|
@@ -127,12 +127,6 @@ class AssetDownloader:
|
|
|
127
127
|
if not urls:
|
|
128
128
|
raise ValueError("urls are required for source=url")
|
|
129
129
|
|
|
130
|
-
if output_format == "webdataset" and wds is None:
|
|
131
|
-
raise RuntimeError(
|
|
132
|
-
"webdataset package is required for webdataset output. "
|
|
133
|
-
"Install with: pip install webdataset"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
130
|
# --- Now safe to create directories ---
|
|
137
131
|
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
132
|
images_dir = dataset_dir / "images"
|
|
@@ -380,24 +374,59 @@ class AssetDownloader:
|
|
|
380
374
|
raise ValueError(f"Unsupported image value type: {type(value)}")
|
|
381
375
|
|
|
382
376
|
async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
|
|
383
|
-
|
|
384
|
-
|
|
377
|
+
"""Write a webdataset-compatible tar archive.
|
|
378
|
+
|
|
379
|
+
Uses Python's built-in tarfile module instead of wds.ShardWriter to
|
|
380
|
+
avoid the gopen() handler issue on Windows (backslash paths).
|
|
381
|
+
The resulting .tar files are fully compatible with webdataset readers.
|
|
382
|
+
"""
|
|
383
|
+
import io
|
|
384
|
+
import tarfile as _tarfile
|
|
385
|
+
|
|
386
|
+
max_per_shard = 5000
|
|
387
|
+
shard_idx = 0
|
|
388
|
+
count_in_shard = 0
|
|
389
|
+
current_tar: _tarfile.TarFile | None = None
|
|
390
|
+
|
|
391
|
+
def _open_shard() -> _tarfile.TarFile:
|
|
392
|
+
nonlocal shard_idx
|
|
393
|
+
shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
|
|
394
|
+
shard_idx += 1
|
|
395
|
+
return _tarfile.open(str(shard_path), "w")
|
|
385
396
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
ext
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
397
|
+
try:
|
|
398
|
+
current_tar = _open_shard()
|
|
399
|
+
|
|
400
|
+
with metadata_file.open("r", encoding="utf-8") as mf:
|
|
401
|
+
for line in mf:
|
|
402
|
+
row = json.loads(line)
|
|
403
|
+
image_path = Path(row["image_path"])
|
|
404
|
+
if not image_path.exists():
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
key = image_path.stem
|
|
408
|
+
ext = image_path.suffix.lstrip(".") or "jpg"
|
|
409
|
+
|
|
410
|
+
# Add image file
|
|
411
|
+
img_data = image_path.read_bytes()
|
|
412
|
+
img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
|
|
413
|
+
img_info.size = len(img_data)
|
|
414
|
+
current_tar.addfile(img_info, io.BytesIO(img_data))
|
|
415
|
+
|
|
416
|
+
# Add JSON metadata sidecar
|
|
417
|
+
json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
|
|
418
|
+
json_info = _tarfile.TarInfo(name=f"{key}.json")
|
|
419
|
+
json_info.size = len(json_data)
|
|
420
|
+
current_tar.addfile(json_info, io.BytesIO(json_data))
|
|
421
|
+
|
|
422
|
+
count_in_shard += 1
|
|
423
|
+
if count_in_shard >= max_per_shard:
|
|
424
|
+
current_tar.close()
|
|
425
|
+
current_tar = _open_shard()
|
|
426
|
+
count_in_shard = 0
|
|
427
|
+
finally:
|
|
428
|
+
if current_tar is not None:
|
|
429
|
+
current_tar.close()
|
|
401
430
|
|
|
402
431
|
async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
|
|
403
432
|
try:
|