@vespermcp/mcp-server 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -1019,9 +1019,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1019
1019
|
if (source === "kaggle") {
|
|
1020
1020
|
requiredModules.push({ module: "kaggle", packageName: "kaggle" });
|
|
1021
1021
|
}
|
|
1022
|
-
if (outputFormat === "webdataset") {
|
|
1023
|
-
requiredModules.push({ module: "webdataset", packageName: "webdataset" });
|
|
1024
|
-
}
|
|
1025
1022
|
try {
|
|
1026
1023
|
await ensurePythonModules(requiredModules);
|
|
1027
1024
|
}
|
|
@@ -1588,7 +1585,7 @@ async function main() {
|
|
|
1588
1585
|
const isFuse = args.includes("fuse");
|
|
1589
1586
|
const isDiscover = args.includes("discover");
|
|
1590
1587
|
const isDownload = args.includes("download");
|
|
1591
|
-
const isConfig = args.includes("config");
|
|
1588
|
+
const isConfig = args.includes("config") || args.includes("configure");
|
|
1592
1589
|
const isSetup = args.includes("--setup") || args.includes("setup");
|
|
1593
1590
|
const isSilent = args.includes("--silent");
|
|
1594
1591
|
if (process.stdin.isTTY && !isSilent) {
|
|
@@ -1630,7 +1627,9 @@ async function runConfigCli(args) {
|
|
|
1630
1627
|
const isKaggle = args.includes("kaggle");
|
|
1631
1628
|
if (!(isKeys || isKaggle) || args.includes("--help")) {
|
|
1632
1629
|
console.log("Usage: vespermcp config keys");
|
|
1630
|
+
console.log(" vespermcp configure keys");
|
|
1633
1631
|
console.log(" vespermcp config kaggle --username <name> --key <api_key>");
|
|
1632
|
+
console.log(" vespermcp configure kaggle --username <name> --key <api_key>");
|
|
1634
1633
|
console.log("Core Vesper tools work with zero API keys.");
|
|
1635
1634
|
return;
|
|
1636
1635
|
}
|
|
Binary file
|
|
@@ -127,12 +127,6 @@ class AssetDownloader:
|
|
|
127
127
|
if not urls:
|
|
128
128
|
raise ValueError("urls are required for source=url")
|
|
129
129
|
|
|
130
|
-
if output_format == "webdataset" and wds is None:
|
|
131
|
-
raise RuntimeError(
|
|
132
|
-
"webdataset package is required for webdataset output. "
|
|
133
|
-
"Install with: pip install webdataset"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
130
|
# --- Now safe to create directories ---
|
|
137
131
|
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
132
|
images_dir = dataset_dir / "images"
|
|
@@ -380,24 +374,59 @@ class AssetDownloader:
|
|
|
380
374
|
raise ValueError(f"Unsupported image value type: {type(value)}")
|
|
381
375
|
|
|
382
376
|
async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
|
|
383
|
-
|
|
384
|
-
|
|
377
|
+
"""Write a webdataset-compatible tar archive.
|
|
378
|
+
|
|
379
|
+
Uses Python's built-in tarfile module instead of wds.ShardWriter to
|
|
380
|
+
avoid the gopen() handler issue on Windows (backslash paths).
|
|
381
|
+
The resulting .tar files are fully compatible with webdataset readers.
|
|
382
|
+
"""
|
|
383
|
+
import io
|
|
384
|
+
import tarfile as _tarfile
|
|
385
|
+
|
|
386
|
+
max_per_shard = 5000
|
|
387
|
+
shard_idx = 0
|
|
388
|
+
count_in_shard = 0
|
|
389
|
+
current_tar: _tarfile.TarFile | None = None
|
|
390
|
+
|
|
391
|
+
def _open_shard() -> _tarfile.TarFile:
|
|
392
|
+
nonlocal shard_idx
|
|
393
|
+
shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
|
|
394
|
+
shard_idx += 1
|
|
395
|
+
return _tarfile.open(str(shard_path), "w")
|
|
385
396
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
ext
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
397
|
+
try:
|
|
398
|
+
current_tar = _open_shard()
|
|
399
|
+
|
|
400
|
+
with metadata_file.open("r", encoding="utf-8") as mf:
|
|
401
|
+
for line in mf:
|
|
402
|
+
row = json.loads(line)
|
|
403
|
+
image_path = Path(row["image_path"])
|
|
404
|
+
if not image_path.exists():
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
key = image_path.stem
|
|
408
|
+
ext = image_path.suffix.lstrip(".") or "jpg"
|
|
409
|
+
|
|
410
|
+
# Add image file
|
|
411
|
+
img_data = image_path.read_bytes()
|
|
412
|
+
img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
|
|
413
|
+
img_info.size = len(img_data)
|
|
414
|
+
current_tar.addfile(img_info, io.BytesIO(img_data))
|
|
415
|
+
|
|
416
|
+
# Add JSON metadata sidecar
|
|
417
|
+
json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
|
|
418
|
+
json_info = _tarfile.TarInfo(name=f"{key}.json")
|
|
419
|
+
json_info.size = len(json_data)
|
|
420
|
+
current_tar.addfile(json_info, io.BytesIO(json_data))
|
|
421
|
+
|
|
422
|
+
count_in_shard += 1
|
|
423
|
+
if count_in_shard >= max_per_shard:
|
|
424
|
+
current_tar.close()
|
|
425
|
+
current_tar = _open_shard()
|
|
426
|
+
count_in_shard = 0
|
|
427
|
+
finally:
|
|
428
|
+
if current_tar is not None:
|
|
429
|
+
current_tar.close()
|
|
401
430
|
|
|
402
431
|
async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
|
|
403
432
|
try:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.8",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
|
@@ -127,12 +127,6 @@ class AssetDownloader:
|
|
|
127
127
|
if not urls:
|
|
128
128
|
raise ValueError("urls are required for source=url")
|
|
129
129
|
|
|
130
|
-
if output_format == "webdataset" and wds is None:
|
|
131
|
-
raise RuntimeError(
|
|
132
|
-
"webdataset package is required for webdataset output. "
|
|
133
|
-
"Install with: pip install webdataset"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
130
|
# --- Now safe to create directories ---
|
|
137
131
|
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
132
|
images_dir = dataset_dir / "images"
|
|
@@ -380,24 +374,59 @@ class AssetDownloader:
|
|
|
380
374
|
raise ValueError(f"Unsupported image value type: {type(value)}")
|
|
381
375
|
|
|
382
376
|
async def _write_webdataset(self, dataset_dir: Path, images_dir: Path, metadata_file: Path) -> None:
|
|
383
|
-
|
|
384
|
-
|
|
377
|
+
"""Write a webdataset-compatible tar archive.
|
|
378
|
+
|
|
379
|
+
Uses Python's built-in tarfile module instead of wds.ShardWriter to
|
|
380
|
+
avoid the gopen() handler issue on Windows (backslash paths).
|
|
381
|
+
The resulting .tar files are fully compatible with webdataset readers.
|
|
382
|
+
"""
|
|
383
|
+
import io
|
|
384
|
+
import tarfile as _tarfile
|
|
385
|
+
|
|
386
|
+
max_per_shard = 5000
|
|
387
|
+
shard_idx = 0
|
|
388
|
+
count_in_shard = 0
|
|
389
|
+
current_tar: _tarfile.TarFile | None = None
|
|
390
|
+
|
|
391
|
+
def _open_shard() -> _tarfile.TarFile:
|
|
392
|
+
nonlocal shard_idx
|
|
393
|
+
shard_path = dataset_dir / f"shard-{shard_idx:06d}.tar"
|
|
394
|
+
shard_idx += 1
|
|
395
|
+
return _tarfile.open(str(shard_path), "w")
|
|
385
396
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
ext
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
397
|
+
try:
|
|
398
|
+
current_tar = _open_shard()
|
|
399
|
+
|
|
400
|
+
with metadata_file.open("r", encoding="utf-8") as mf:
|
|
401
|
+
for line in mf:
|
|
402
|
+
row = json.loads(line)
|
|
403
|
+
image_path = Path(row["image_path"])
|
|
404
|
+
if not image_path.exists():
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
key = image_path.stem
|
|
408
|
+
ext = image_path.suffix.lstrip(".") or "jpg"
|
|
409
|
+
|
|
410
|
+
# Add image file
|
|
411
|
+
img_data = image_path.read_bytes()
|
|
412
|
+
img_info = _tarfile.TarInfo(name=f"{key}.{ext}")
|
|
413
|
+
img_info.size = len(img_data)
|
|
414
|
+
current_tar.addfile(img_info, io.BytesIO(img_data))
|
|
415
|
+
|
|
416
|
+
# Add JSON metadata sidecar
|
|
417
|
+
json_data = json.dumps(row, ensure_ascii=False).encode("utf-8")
|
|
418
|
+
json_info = _tarfile.TarInfo(name=f"{key}.json")
|
|
419
|
+
json_info.size = len(json_data)
|
|
420
|
+
current_tar.addfile(json_info, io.BytesIO(json_data))
|
|
421
|
+
|
|
422
|
+
count_in_shard += 1
|
|
423
|
+
if count_in_shard >= max_per_shard:
|
|
424
|
+
current_tar.close()
|
|
425
|
+
current_tar = _open_shard()
|
|
426
|
+
count_in_shard = 0
|
|
427
|
+
finally:
|
|
428
|
+
if current_tar is not None:
|
|
429
|
+
current_tar.close()
|
|
401
430
|
|
|
402
431
|
async def _write_parquet(self, dataset_dir: Path, metadata_file: Path) -> None:
|
|
403
432
|
try:
|