@vespermcp/mcp-server 1.2.5 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -131,8 +131,83 @@ function extractRequestedRows(query, requirements) {
|
|
|
131
131
|
return Math.max(...allNums);
|
|
132
132
|
return undefined;
|
|
133
133
|
}
|
|
134
|
+
const verifiedPythonModules = new Set();
|
|
135
|
+
function getPythonCommand() {
|
|
136
|
+
return process.platform === "win32" ? "py" : "python";
|
|
137
|
+
}
|
|
138
|
+
function runPythonProcess(args, timeoutMs = 300000) {
|
|
139
|
+
const pyCmd = getPythonCommand();
|
|
140
|
+
return new Promise((resolve, reject) => {
|
|
141
|
+
const proc = spawn(pyCmd, args, {
|
|
142
|
+
env: {
|
|
143
|
+
...process.env,
|
|
144
|
+
PIP_DISABLE_PIP_VERSION_CHECK: "1",
|
|
145
|
+
PYTHONUTF8: "1",
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
let stdout = "";
|
|
149
|
+
let stderr = "";
|
|
150
|
+
proc.stdout.on("data", (d) => (stdout += d.toString()));
|
|
151
|
+
proc.stderr.on("data", (d) => (stderr += d.toString()));
|
|
152
|
+
const timer = setTimeout(() => {
|
|
153
|
+
try {
|
|
154
|
+
proc.kill();
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
// no-op
|
|
158
|
+
}
|
|
159
|
+
reject(new Error(`Python command timed out after ${timeoutMs}ms: ${args.join(" ")}`));
|
|
160
|
+
}, timeoutMs);
|
|
161
|
+
proc.on("close", (code) => {
|
|
162
|
+
clearTimeout(timer);
|
|
163
|
+
resolve({ code: code ?? 1, stdout, stderr });
|
|
164
|
+
});
|
|
165
|
+
proc.on("error", (error) => {
|
|
166
|
+
clearTimeout(timer);
|
|
167
|
+
reject(error);
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
async function ensurePythonModules(modulePackagePairs) {
|
|
172
|
+
const missing = [];
|
|
173
|
+
for (const pair of modulePackagePairs) {
|
|
174
|
+
if (verifiedPythonModules.has(pair.module)) {
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
const check = await runPythonProcess([
|
|
178
|
+
"-c",
|
|
179
|
+
`import importlib.util,sys; sys.exit(0 if importlib.util.find_spec(${JSON.stringify(pair.module)}) else 1)`
|
|
180
|
+
], 20000);
|
|
181
|
+
if (check.code === 0) {
|
|
182
|
+
verifiedPythonModules.add(pair.module);
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
missing.push(pair);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (missing.length === 0) {
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
const packages = [...new Set(missing.map(m => m.packageName))];
|
|
192
|
+
console.error(`[Vesper] Installing missing Python packages: ${packages.join(", ")}`);
|
|
193
|
+
const installArgs = ["-m", "pip", "install", "--disable-pip-version-check", ...packages];
|
|
194
|
+
let install = await runPythonProcess(installArgs, 600000);
|
|
195
|
+
if (install.code !== 0) {
|
|
196
|
+
console.error(`[Vesper] pip install failed (trying --user fallback): ${(install.stderr || "").slice(0, 300)}`);
|
|
197
|
+
const userInstallArgs = ["-m", "pip", "install", "--disable-pip-version-check", "--user", ...packages];
|
|
198
|
+
install = await runPythonProcess(userInstallArgs, 600000);
|
|
199
|
+
}
|
|
200
|
+
if (install.code !== 0) {
|
|
201
|
+
const details = (install.stderr || install.stdout || "Unknown pip install error").trim();
|
|
202
|
+
throw new Error(`Failed to install required Python packages (${packages.join(", ")}). ${details}`);
|
|
203
|
+
}
|
|
204
|
+
console.error(`[Vesper] Successfully installed: ${packages.join(", ")}`);
|
|
205
|
+
for (const pair of missing) {
|
|
206
|
+
verifiedPythonModules.add(pair.module);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
134
209
|
function runPythonJson(scriptPath, args) {
|
|
135
|
-
const pyCmd =
|
|
210
|
+
const pyCmd = getPythonCommand();
|
|
136
211
|
return new Promise((resolve, reject) => {
|
|
137
212
|
const proc = spawn(pyCmd, [scriptPath, ...args]);
|
|
138
213
|
let stdout = "";
|
|
@@ -932,6 +1007,30 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
932
1007
|
isError: true,
|
|
933
1008
|
};
|
|
934
1009
|
}
|
|
1010
|
+
const requiredModules = [
|
|
1011
|
+
{ module: "aiohttp", packageName: "aiohttp" },
|
|
1012
|
+
];
|
|
1013
|
+
if (source === "url") {
|
|
1014
|
+
requiredModules.push({ module: "aiofiles", packageName: "aiofiles" });
|
|
1015
|
+
}
|
|
1016
|
+
if (source === "huggingface") {
|
|
1017
|
+
requiredModules.push({ module: "datasets", packageName: "datasets" });
|
|
1018
|
+
}
|
|
1019
|
+
if (source === "kaggle") {
|
|
1020
|
+
requiredModules.push({ module: "kaggle", packageName: "kaggle" });
|
|
1021
|
+
}
|
|
1022
|
+
if (outputFormat === "webdataset") {
|
|
1023
|
+
requiredModules.push({ module: "webdataset", packageName: "webdataset" });
|
|
1024
|
+
}
|
|
1025
|
+
try {
|
|
1026
|
+
await ensurePythonModules(requiredModules);
|
|
1027
|
+
}
|
|
1028
|
+
catch (error) {
|
|
1029
|
+
return {
|
|
1030
|
+
content: [{ type: "text", text: `ERROR: Python dependency setup failed: ${error.message}` }],
|
|
1031
|
+
isError: true,
|
|
1032
|
+
};
|
|
1033
|
+
}
|
|
935
1034
|
const scriptPath = path.join(dataRoot, "python", "asset_downloader_engine.py");
|
|
936
1035
|
const payload = {
|
|
937
1036
|
dataset_id: datasetId,
|
|
Binary file
|
|
@@ -101,27 +101,60 @@ class AssetDownloader:
|
|
|
101
101
|
if source not in {"huggingface", "kaggle", "url"}:
|
|
102
102
|
raise ValueError("source must be one of: huggingface, kaggle, url")
|
|
103
103
|
|
|
104
|
-
|
|
105
|
-
images_dir = dataset_dir / "images"
|
|
106
|
-
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
-
images_dir.mkdir(parents=True, exist_ok=True)
|
|
108
|
-
|
|
109
|
-
errors_file = dataset_dir / "errors.jsonl"
|
|
110
|
-
metadata_file = dataset_dir / "metadata.jsonl"
|
|
111
|
-
|
|
104
|
+
# --- Validate imports and args BEFORE creating any directories ---
|
|
112
105
|
if source == "huggingface":
|
|
113
106
|
if not repo_id:
|
|
114
107
|
raise ValueError("repo_id is required for source=huggingface")
|
|
115
|
-
|
|
108
|
+
try:
|
|
109
|
+
from datasets import load_dataset as _ld # noqa: F401
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise RuntimeError(
|
|
112
|
+
f"datasets package is required for HuggingFace downloads. "
|
|
113
|
+
f"Install with: pip install datasets. Details: {e}"
|
|
114
|
+
)
|
|
116
115
|
elif source == "kaggle":
|
|
117
116
|
ref = kaggle_ref or repo_id
|
|
118
117
|
if not ref:
|
|
119
118
|
raise ValueError("kaggle_ref is required for source=kaggle")
|
|
120
|
-
|
|
119
|
+
try:
|
|
120
|
+
from kaggle.api.kaggle_api_extended import KaggleApi as _Ka # noqa: F401
|
|
121
|
+
except Exception as e:
|
|
122
|
+
raise RuntimeError(
|
|
123
|
+
f"kaggle package is required for Kaggle downloads. "
|
|
124
|
+
f"Install with: pip install kaggle. Details: {e}"
|
|
125
|
+
)
|
|
121
126
|
else:
|
|
122
127
|
if not urls:
|
|
123
128
|
raise ValueError("urls are required for source=url")
|
|
124
|
-
|
|
129
|
+
|
|
130
|
+
if output_format == "webdataset" and wds is None:
|
|
131
|
+
raise RuntimeError(
|
|
132
|
+
"webdataset package is required for webdataset output. "
|
|
133
|
+
"Install with: pip install webdataset"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# --- Now safe to create directories ---
|
|
137
|
+
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
|
+
images_dir = dataset_dir / "images"
|
|
139
|
+
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
images_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
errors_file = dataset_dir / "errors.jsonl"
|
|
143
|
+
metadata_file = dataset_dir / "metadata.jsonl"
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
if source == "huggingface":
|
|
147
|
+
summary = await self._download_huggingface(repo_id, dataset_id, images_dir, metadata_file, errors_file, max_items, image_column)
|
|
148
|
+
elif source == "kaggle":
|
|
149
|
+
ref = kaggle_ref or repo_id
|
|
150
|
+
summary = await self._download_kaggle(ref, dataset_id, images_dir, metadata_file, errors_file, max_items)
|
|
151
|
+
else:
|
|
152
|
+
summary = await self._download_urls(urls, dataset_id, images_dir, metadata_file, errors_file, max_items)
|
|
153
|
+
except Exception:
|
|
154
|
+
# Clean up empty directories on failure so we don't leave ghost artifacts
|
|
155
|
+
if images_dir.exists() and not any(images_dir.iterdir()):
|
|
156
|
+
shutil.rmtree(dataset_dir, ignore_errors=True)
|
|
157
|
+
raise
|
|
125
158
|
|
|
126
159
|
if output_format == "webdataset":
|
|
127
160
|
await self._write_webdataset(dataset_dir, images_dir, metadata_file)
|
|
@@ -150,10 +183,7 @@ class AssetDownloader:
|
|
|
150
183
|
max_items: Optional[int],
|
|
151
184
|
image_column: Optional[str],
|
|
152
185
|
) -> Dict[str, int]:
|
|
153
|
-
|
|
154
|
-
from datasets import load_dataset
|
|
155
|
-
except Exception as e:
|
|
156
|
-
raise RuntimeError(f"datasets package is required. Install with: pip install datasets. Details: {e}")
|
|
186
|
+
from datasets import load_dataset # validated in download_assets()
|
|
157
187
|
|
|
158
188
|
await self._emit("start", {"source": "huggingface", "repo_id": repo_id})
|
|
159
189
|
|
|
@@ -212,10 +242,7 @@ class AssetDownloader:
|
|
|
212
242
|
errors_file: Path,
|
|
213
243
|
max_items: Optional[int],
|
|
214
244
|
) -> Dict[str, int]:
|
|
215
|
-
|
|
216
|
-
from kaggle.api.kaggle_api_extended import KaggleApi
|
|
217
|
-
except Exception as e:
|
|
218
|
-
raise RuntimeError(f"kaggle package is required: {e}")
|
|
245
|
+
from kaggle.api.kaggle_api_extended import KaggleApi # validated in download_assets()
|
|
219
246
|
|
|
220
247
|
await self._emit("start", {"source": "kaggle", "dataset": kaggle_ref})
|
|
221
248
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.6",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|
package/scripts/postinstall.cjs
CHANGED
|
@@ -101,27 +101,60 @@ class AssetDownloader:
|
|
|
101
101
|
if source not in {"huggingface", "kaggle", "url"}:
|
|
102
102
|
raise ValueError("source must be one of: huggingface, kaggle, url")
|
|
103
103
|
|
|
104
|
-
|
|
105
|
-
images_dir = dataset_dir / "images"
|
|
106
|
-
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
-
images_dir.mkdir(parents=True, exist_ok=True)
|
|
108
|
-
|
|
109
|
-
errors_file = dataset_dir / "errors.jsonl"
|
|
110
|
-
metadata_file = dataset_dir / "metadata.jsonl"
|
|
111
|
-
|
|
104
|
+
# --- Validate imports and args BEFORE creating any directories ---
|
|
112
105
|
if source == "huggingface":
|
|
113
106
|
if not repo_id:
|
|
114
107
|
raise ValueError("repo_id is required for source=huggingface")
|
|
115
|
-
|
|
108
|
+
try:
|
|
109
|
+
from datasets import load_dataset as _ld # noqa: F401
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise RuntimeError(
|
|
112
|
+
f"datasets package is required for HuggingFace downloads. "
|
|
113
|
+
f"Install with: pip install datasets. Details: {e}"
|
|
114
|
+
)
|
|
116
115
|
elif source == "kaggle":
|
|
117
116
|
ref = kaggle_ref or repo_id
|
|
118
117
|
if not ref:
|
|
119
118
|
raise ValueError("kaggle_ref is required for source=kaggle")
|
|
120
|
-
|
|
119
|
+
try:
|
|
120
|
+
from kaggle.api.kaggle_api_extended import KaggleApi as _Ka # noqa: F401
|
|
121
|
+
except Exception as e:
|
|
122
|
+
raise RuntimeError(
|
|
123
|
+
f"kaggle package is required for Kaggle downloads. "
|
|
124
|
+
f"Install with: pip install kaggle. Details: {e}"
|
|
125
|
+
)
|
|
121
126
|
else:
|
|
122
127
|
if not urls:
|
|
123
128
|
raise ValueError("urls are required for source=url")
|
|
124
|
-
|
|
129
|
+
|
|
130
|
+
if output_format == "webdataset" and wds is None:
|
|
131
|
+
raise RuntimeError(
|
|
132
|
+
"webdataset package is required for webdataset output. "
|
|
133
|
+
"Install with: pip install webdataset"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# --- Now safe to create directories ---
|
|
137
|
+
dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
|
|
138
|
+
images_dir = dataset_dir / "images"
|
|
139
|
+
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
140
|
+
images_dir.mkdir(parents=True, exist_ok=True)
|
|
141
|
+
|
|
142
|
+
errors_file = dataset_dir / "errors.jsonl"
|
|
143
|
+
metadata_file = dataset_dir / "metadata.jsonl"
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
if source == "huggingface":
|
|
147
|
+
summary = await self._download_huggingface(repo_id, dataset_id, images_dir, metadata_file, errors_file, max_items, image_column)
|
|
148
|
+
elif source == "kaggle":
|
|
149
|
+
ref = kaggle_ref or repo_id
|
|
150
|
+
summary = await self._download_kaggle(ref, dataset_id, images_dir, metadata_file, errors_file, max_items)
|
|
151
|
+
else:
|
|
152
|
+
summary = await self._download_urls(urls, dataset_id, images_dir, metadata_file, errors_file, max_items)
|
|
153
|
+
except Exception:
|
|
154
|
+
# Clean up empty directories on failure so we don't leave ghost artifacts
|
|
155
|
+
if images_dir.exists() and not any(images_dir.iterdir()):
|
|
156
|
+
shutil.rmtree(dataset_dir, ignore_errors=True)
|
|
157
|
+
raise
|
|
125
158
|
|
|
126
159
|
if output_format == "webdataset":
|
|
127
160
|
await self._write_webdataset(dataset_dir, images_dir, metadata_file)
|
|
@@ -150,10 +183,7 @@ class AssetDownloader:
|
|
|
150
183
|
max_items: Optional[int],
|
|
151
184
|
image_column: Optional[str],
|
|
152
185
|
) -> Dict[str, int]:
|
|
153
|
-
|
|
154
|
-
from datasets import load_dataset
|
|
155
|
-
except Exception as e:
|
|
156
|
-
raise RuntimeError(f"datasets package is required. Install with: pip install datasets. Details: {e}")
|
|
186
|
+
from datasets import load_dataset # validated in download_assets()
|
|
157
187
|
|
|
158
188
|
await self._emit("start", {"source": "huggingface", "repo_id": repo_id})
|
|
159
189
|
|
|
@@ -212,10 +242,7 @@ class AssetDownloader:
|
|
|
212
242
|
errors_file: Path,
|
|
213
243
|
max_items: Optional[int],
|
|
214
244
|
) -> Dict[str, int]:
|
|
215
|
-
|
|
216
|
-
from kaggle.api.kaggle_api_extended import KaggleApi
|
|
217
|
-
except Exception as e:
|
|
218
|
-
raise RuntimeError(f"kaggle package is required: {e}")
|
|
245
|
+
from kaggle.api.kaggle_api_extended import KaggleApi # validated in download_assets()
|
|
219
246
|
|
|
220
247
|
await self._emit("start", {"source": "kaggle", "dataset": kaggle_ref})
|
|
221
248
|
|