npm - @vespermcp/mcp-server - Versions diffs - 1.2.3 → 1.2.5 - Mend

@vespermcp/mcp-server 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/build/index.js +129 -20
package/build/python/asset_downloader_engine.py +73 -0
package/build/python/kaggle_engine.py +22 -2
package/build/python/vesper/__init__.py +1 -0
package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
package/build/python/vesper/core/__init__.py +1 -0
package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
package/build/python/vesper/core/asset_downloader.py +388 -0
package/build/python/vesper/core/download_recipe.py +104 -0
package/package.json +2 -2
package/src/python/asset_downloader_engine.py +73 -0
package/src/python/kaggle_engine.py +22 -2
package/src/python/vesper/__init__.py +1 -0
package/src/python/vesper/core/__init__.py +1 -0
package/src/python/vesper/core/asset_downloader.py +388 -0
package/src/python/vesper/core/download_recipe.py +104 -0

package/build/index.js CHANGED Viewed

@@ -167,6 +167,25 @@ function syncPythonScripts(appRoot, dataRoot) {
     const pythonDest = path.join(dataRoot, "python");
     if (!fs.existsSync(pythonDest))
         fs.mkdirSync(pythonDest, { recursive: true });
+    const collectPyFiles = (dir) => {
+        if (!fs.existsSync(dir))
+            return [];
+        const out = [];
+        const stack = [dir];
+        while (stack.length > 0) {
+            const cur = stack.pop();
+            for (const entry of fs.readdirSync(cur, { withFileTypes: true })) {
+                const full = path.join(cur, entry.name);
+                if (entry.isDirectory()) {
+                    stack.push(full);
+                }
+                else if (entry.isFile() && full.endsWith(".py")) {
+                    out.push(full);
+                }
+            }
+        }
+        return out;
+    };
     // Sources to check for Python scripts
     const sources = [
         path.join(appRoot, "src", "python"),
@@ -175,25 +194,21 @@ function syncPythonScripts(appRoot, dataRoot) {
     ];
     let syncedCount = 0;
     for (const src of sources) {
-        if (fs.existsSync(src)) {
-            const files = fs.readdirSync(src);
-            for (const file of files) {
-                if (file.endsWith(".py")) {
-                    const srcPath = path.join(src, file);
-                    const destPath = path.join(pythonDest, file);
-                    // Only copy if file doesn't exist or is different size (basic sync)
-                    const srcStat = fs.statSync(srcPath);
-                    let shouldCopy = true;
-                    if (fs.existsSync(destPath)) {
-                        const destStat = fs.statSync(destPath);
-                        if (srcStat.size === destStat.size)
-                            shouldCopy = false;
-                    }
-                    if (shouldCopy) {
-                        fs.copyFileSync(srcPath, destPath);
-                        syncedCount++;
-                    }
-                }
+        const files = collectPyFiles(src);
+        for (const srcPath of files) {
+            const rel = path.relative(src, srcPath);
+            const destPath = path.join(pythonDest, rel);
+            const srcStat = fs.statSync(srcPath);
+            let shouldCopy = true;
+            if (fs.existsSync(destPath)) {
+                const destStat = fs.statSync(destPath);
+                if (srcStat.size === destStat.size)
+                    shouldCopy = false;
+            }
+            if (shouldCopy) {
+                fs.mkdirSync(path.dirname(destPath), { recursive: true });
+                fs.copyFileSync(srcPath, destPath);
+                syncedCount++;
             }
         }
     }
@@ -471,6 +486,25 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
                     required: ["source", "dataset_id"],
                 },
             },
+            {
+                name: "vesper_download_assets",
+                description: "Download real image/media assets using smart source-aware recipes (HuggingFace, Kaggle, direct URL).",
+                inputSchema: {
+                    type: "object",
+                    properties: {
+                        dataset_id: { type: "string", description: "Unique dataset identifier." },
+                        source: { type: "string", enum: ["huggingface", "kaggle", "url"], description: "Asset source type." },
+                        repo_id: { type: "string", description: "Repo ID for HuggingFace (e.g. cifar100)." },
+                        kaggle_ref: { type: "string", description: "Kaggle dataset ref (owner/dataset)." },
+                        urls: { type: "array", items: { type: "string" }, description: "Direct asset URLs." },
+                        output_format: { type: "string", enum: ["webdataset", "imagefolder", "parquet"], description: "Output asset format." },
+                        max_items: { type: "number", description: "Optional cap on number of assets to fetch." },
+                        workers: { type: "number", description: "Parallel worker count (default 8)." },
+                        image_column: { type: "string", description: "Optional explicit image column for HuggingFace datasets." },
+                    },
+                    required: ["dataset_id", "source"],
+                },
+            },
             {
                 name: "configure_kaggle",
                 description: "Optionally store Kaggle API credentials for Kaggle discover/download. Core Vesper works without this.",
@@ -571,6 +605,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
                     properties: {
                         query: { type: "string" },
                         requirements: { type: "string" },
+                        download_images: { type: "boolean", description: "When true, enables post-prepare smart asset downloading for image/media datasets." },
                         cleaning_options: { type: "object" },
                         split_config: { type: "object" },
                     },
@@ -813,6 +848,24 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                     const hf = new HuggingFaceScraper();
                     results = await hf.scrape(Math.max(1, limit), true, query);
                 }
+                const recipeScript = path.join(dataRoot, "python", "asset_downloader_engine.py");
+                for (const ds of results.slice(0, limit)) {
+                    const info = {
+                        dataset_id: ds.id,
+                        id: ds.id,
+                        source: ds.source,
+                        repo_id: ds.id,
+                        total_images: ds.total_examples || 0,
+                        image_column: undefined,
+                        recipes_dir: path.join(dataRoot, "recipes"),
+                    };
+                    try {
+                        await runPythonJson(recipeScript, ["build_recipe", JSON.stringify(info)]);
+                    }
+                    catch {
+                        // best-effort recipe generation; ignore discovery-time recipe failures
+                    }
+                }
                 const formattedOutput = formatSearchResults(results.slice(0, limit));
                 return {
                     content: [{ type: "text", text: formattedOutput }]
@@ -857,6 +910,61 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 };
             }
         }
+        case "vesper_download_assets": {
+            hydrateExternalKeys();
+            const datasetId = String(request.params.arguments?.dataset_id || "").trim();
+            const source = String(request.params.arguments?.source || "").trim().toLowerCase();
+            const repoId = request.params.arguments?.repo_id ? String(request.params.arguments.repo_id) : undefined;
+            const kaggleRef = request.params.arguments?.kaggle_ref ? String(request.params.arguments.kaggle_ref) : undefined;
+            const urls = Array.isArray(request.params.arguments?.urls)
+                ? (request.params.arguments?.urls).map(v => String(v))
+                : undefined;
+            const outputFormat = String(request.params.arguments?.output_format || "webdataset");
+            const maxItems = request.params.arguments?.max_items ? Number(request.params.arguments.max_items) : undefined;
+            const workers = request.params.arguments?.workers ? Number(request.params.arguments.workers) : 8;
+            const imageColumn = request.params.arguments?.image_column ? String(request.params.arguments.image_column) : undefined;
+            if (!datasetId || !source) {
+                throw new McpError(ErrorCode.InvalidParams, "dataset_id and source are required");
+            }
+            if (source === "kaggle" && !dataIngestor.hasKaggleCredentials()) {
+                return {
+                    content: [{ type: "text", text: "Kaggle support requires API key. Run 'vespermcp config keys' (30 seconds)." }],
+                    isError: true,
+                };
+            }
+            const scriptPath = path.join(dataRoot, "python", "asset_downloader_engine.py");
+            const payload = {
+                dataset_id: datasetId,
+                source,
+                repo_id: repoId,
+                kaggle_ref: kaggleRef,
+                urls,
+                output_format: outputFormat,
+                max_items: maxItems,
+                workers,
+                image_column: imageColumn,
+                output_root: path.join(dataRoot, "data", "assets"),
+                recipes_dir: path.join(dataRoot, "recipes"),
+            };
+            try {
+                const result = await runPythonJson(scriptPath, ["download", JSON.stringify(payload)]);
+                if (!result?.ok) {
+                    return {
+                        content: [{ type: "text", text: `ERROR: asset download failed: ${result?.error || "Unknown error"}` }],
+                        isError: true,
+                    };
+                }
+                return {
+                    content: [{ type: "text", text: JSON.stringify(result.result, null, 2) }],
+                };
+            }
+            catch (error) {
+                return {
+                    content: [{ type: "text", text: `ERROR: asset downloader execution failed: ${error.message}` }],
+                    isError: true,
+                };
+            }
+        }
         case "configure_kaggle": {
             const username = String(request.params.arguments?.username || "").trim();
             const key = String(request.params.arguments?.key || "").trim();
@@ -1033,7 +1141,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         case "prepare_dataset": {
             const query = String(request.params.arguments?.query);
             const requirements = request.params.arguments?.requirements ? String(request.params.arguments?.requirements) : undefined;
-            const job = jobManager.createJob("prepare", 0, { query, requirements });
+            const downloadImages = request.params.arguments?.download_images === true;
+            const job = jobManager.createJob("prepare", 0, { query, requirements, downloadImages });
             return {
                 content: [{ type: "text", text: `Preparation job started. ID: ${job.id}. Vesper is finding and preparing the best dataset for you.` }]
             };

package/build/python/asset_downloader_engine.py ADDED Viewed

@@ -0,0 +1,73 @@
+import argparse
+import asyncio
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict
+CURRENT_DIR = Path(__file__).resolve().parent
+if str(CURRENT_DIR) not in sys.path:
+    sys.path.insert(0, str(CURRENT_DIR))
+from vesper.core.asset_downloader import AssetDownloader
+from vesper.core.download_recipe import build_download_recipe, save_recipe, get_download_recipe
+def _print(payload: Dict[str, Any]) -> None:
+    print(json.dumps(payload, ensure_ascii=False))
+async def _run_download(args: argparse.Namespace) -> Dict[str, Any]:
+    payload = json.loads(args.payload)
+    output_root = payload.get("output_root") or str(Path.home() / ".vesper" / "data" / "assets")
+    workers = int(payload.get("workers") or 8)
+    recipes_dir = payload.get("recipes_dir")
+    downloader = AssetDownloader(output_root=output_root, workers=workers, recipes_dir=recipes_dir)
+    result = await downloader.download_assets(
+        dataset_id=str(payload.get("dataset_id")),
+        source=payload.get("source"),
+        repo_id=payload.get("repo_id"),
+        kaggle_ref=payload.get("kaggle_ref"),
+        urls=payload.get("urls"),
+        output_format=payload.get("output_format", "webdataset"),
+        max_items=payload.get("max_items"),
+        image_column=payload.get("image_column"),
+    )
+    return {"ok": True, "result": result}
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Vesper Smart Asset Downloader Engine")
+    parser.add_argument("action", choices=["download", "build_recipe", "get_recipe"])
+    parser.add_argument("payload", help="JSON payload")
+    args = parser.parse_args()
+    try:
+        if args.action == "download":
+            response = asyncio.run(_run_download(args))
+            _print(response)
+            return
+        payload = json.loads(args.payload)
+        if args.action == "build_recipe":
+            recipe = build_download_recipe(payload)
+            saved = save_recipe(recipe, payload.get("recipes_dir"))
+            _print({"ok": True, "recipe": recipe, "saved_to": saved})
+            return
+        if args.action == "get_recipe":
+            dataset_id = str(payload.get("dataset_id"))
+            recipe = get_download_recipe(dataset_id, payload.get("recipes_dir"))
+            _print({"ok": True, "recipe": recipe})
+            return
+        _print({"ok": False, "error": f"Unknown action: {args.action}"})
+    except Exception as e:
+        _print({"ok": False, "error": str(e)})
+if __name__ == "__main__":
+    main()

package/build/python/kaggle_engine.py CHANGED Viewed

@@ -109,8 +109,28 @@ def discover(query: str, limit: int = 20) -> Dict[str, Any]:
     api: KaggleApi = auth["api"]
     try:
-        datasets = api.dataset_list(search=query, page_size=max(1, min(limit, 100)))
-        items = [_dataset_to_dict(ds) for ds in datasets[:limit]]
+        desired = max(1, min(limit, 100))
+        try:
+            datasets = api.dataset_list(search=query, page_size=desired)
+            items = [_dataset_to_dict(ds) for ds in datasets[:limit]]
+            return {"ok": True, "results": items, "count": len(items)}
+        except TypeError:
+            pass
+        collected = []
+        page = 1
+        while len(collected) < limit:
+            page_items = api.dataset_list(search=query, page=page)
+            if not page_items:
+                break
+            collected.extend(page_items)
+            if len(page_items) < 20:
+                break
+            page += 1
+        items = [_dataset_to_dict(ds) for ds in collected[:limit]]
         return {"ok": True, "results": items, "count": len(items)}
     except Exception as e:
         return {"ok": False, "error": f"Kaggle discover failed: {str(e)}"}

package/build/python/vesper/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Vesper Python runtime package."""

package/build/python/vesper/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file

package/build/python/vesper/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core data engines for Vesper."""

package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file

package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc ADDED Viewed

Binary file

package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc ADDED Viewed

Binary file