@vespermcp/mcp-server 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -3
- package/build/index.js +50 -27
- package/build/ingestion/ingestor.js +18 -0
- package/build/ingestion/kaggle-downloader.js +25 -2
- package/build/quality/image-analyzer.js +45 -3
- package/build/quality/media-analyzer.js +46 -3
- package/build/tools/formatter.js +8 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -55,9 +55,16 @@ pip install opencv-python pillow numpy librosa soundfile
|
|
|
55
55
|
|
|
56
56
|
## ⚙️ MCP Configuration
|
|
57
57
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
### For Cursor
|
|
59
|
+
1. Go to **Settings** > **Features** > **MCP**
|
|
60
|
+
2. Click **Add New MCP Server**
|
|
61
|
+
3. Enter:
|
|
62
|
+
- **Name**: `vesper`
|
|
63
|
+
- **Type**: `command`
|
|
64
|
+
- **Command**: `vesper`
|
|
65
|
+
|
|
66
|
+
### For Claude Desktop
|
|
67
|
+
Vesper attempts to auto-configure itself! Restart Claude and check. If not:
|
|
61
68
|
|
|
62
69
|
```json
|
|
63
70
|
{
|
package/build/index.js
CHANGED
|
@@ -57,6 +57,13 @@ jobManager.on("jobUpdated", (job) => {
|
|
|
57
57
|
data: `${emoji} [Job ${job.id.substring(0, 8)}] ${progress} ${job.status_text}`
|
|
58
58
|
});
|
|
59
59
|
});
|
|
60
|
+
// IMPORTANT: Execute jobs when the manager emits them
|
|
61
|
+
// This connects the queue logic to the execution context
|
|
62
|
+
jobManager.on("processJob", async (job, execute) => {
|
|
63
|
+
// The JobManager controls concurrency, so if we receive this event,
|
|
64
|
+
// we should execute the job immediately.
|
|
65
|
+
await execute();
|
|
66
|
+
});
|
|
60
67
|
// Create the server
|
|
61
68
|
const server = new Server({
|
|
62
69
|
name: "vesper",
|
|
@@ -379,35 +386,51 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
379
386
|
const job = jobManager.createJob("prepare", 0, { query });
|
|
380
387
|
// Orchestrated Background Task
|
|
381
388
|
jobManager.runJob(job.id, async (update) => {
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
status_text: `Matched: ${topDataset.name} (${topDataset.source})`
|
|
391
|
-
});
|
|
392
|
-
const source = topDataset.source;
|
|
393
|
-
const filePath = await dataIngestor.ensureData(topDataset.id, source, (msg, prog) => {
|
|
389
|
+
try {
|
|
390
|
+
update({ progress: 10, status_text: "Searching for best dataset matching query..." });
|
|
391
|
+
const results = await searchEngine.search(query, { limit: 1 });
|
|
392
|
+
if (results.length === 0) {
|
|
393
|
+
throw new Error("No datasets found matching the query. Try refining your search terms.");
|
|
394
|
+
}
|
|
395
|
+
const topDataset = results[0];
|
|
396
|
+
// Phase 6: Real Ingestion
|
|
394
397
|
update({
|
|
395
|
-
|
|
396
|
-
|
|
398
|
+
progress: 20,
|
|
399
|
+
status_text: `Matched: ${topDataset.name} (${topDataset.source})`
|
|
397
400
|
});
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
401
|
+
const source = topDataset.source;
|
|
402
|
+
// Pre-check credentials for Kaggle to fail fast with helpful message
|
|
403
|
+
if (source === "kaggle" && !dataIngestor.hasKaggleCredentials()) {
|
|
404
|
+
const errorMsg = dataIngestor.getKaggleCredentialError();
|
|
405
|
+
throw new Error(errorMsg);
|
|
406
|
+
}
|
|
407
|
+
const filePath = await dataIngestor.ensureData(topDataset.id, source, (msg, prog) => {
|
|
408
|
+
update({
|
|
409
|
+
status_text: msg,
|
|
410
|
+
progress: prog !== undefined ? 20 + Math.floor(prog * 0.3) : undefined // 20% -> 50%
|
|
411
|
+
});
|
|
412
|
+
});
|
|
413
|
+
update({ progress: 55, status_text: "Analyzing dataset quality..." });
|
|
414
|
+
const quality = await qualityAnalyzer.analyze(filePath);
|
|
415
|
+
const pipelineResult = await pipelineExecutor.runPipeline(topDataset.id, filePath, "csv", (msg) => {
|
|
416
|
+
update({ status_text: msg });
|
|
417
|
+
});
|
|
418
|
+
update({ progress: 90, status_text: "Installing dataset into codebase..." });
|
|
419
|
+
const installPath = await installService.install(topDataset.id, pipelineResult.final_output_path);
|
|
420
|
+
update({ progress: 100, status_text: "Preparation complete!" });
|
|
421
|
+
const message = `✅ Preparation complete for ${topDataset.name}.\n` +
|
|
422
|
+
`📦 Dataset installed to: ${installPath}\n` +
|
|
423
|
+
`🚀 You can now use this dataset for training your models.`;
|
|
424
|
+
return message;
|
|
425
|
+
}
|
|
426
|
+
catch (error) {
|
|
427
|
+
// Re-throw with enhanced error message for credential issues
|
|
428
|
+
const errorMessage = error.message || String(error);
|
|
429
|
+
if (errorMessage.includes("Kaggle credentials") || errorMessage.includes("KAGGLE")) {
|
|
430
|
+
throw new Error(errorMessage);
|
|
431
|
+
}
|
|
432
|
+
throw error;
|
|
433
|
+
}
|
|
411
434
|
});
|
|
412
435
|
return {
|
|
413
436
|
content: [{ type: "text", text: `Autonomous preparation job started. ID: ${job.id}. Use check_job_status to monitor progress.` }]
|
|
@@ -18,6 +18,18 @@ export class DataIngestor {
|
|
|
18
18
|
this.hfDownloader = new HFDownloader();
|
|
19
19
|
this.kaggleDownloader = new KaggleDownloader();
|
|
20
20
|
}
|
|
21
|
+
/**
|
|
22
|
+
* Check if Kaggle credentials are available
|
|
23
|
+
*/
|
|
24
|
+
hasKaggleCredentials() {
|
|
25
|
+
return this.kaggleDownloader.hasCredentials();
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Get helpful error message if Kaggle credentials are missing
|
|
29
|
+
*/
|
|
30
|
+
getKaggleCredentialError() {
|
|
31
|
+
return this.kaggleDownloader.getCredentialError();
|
|
32
|
+
}
|
|
21
33
|
/**
|
|
22
34
|
* Ensures a dataset is available locally
|
|
23
35
|
*/
|
|
@@ -55,6 +67,12 @@ export class DataIngestor {
|
|
|
55
67
|
}
|
|
56
68
|
}
|
|
57
69
|
else if (source === "kaggle") {
|
|
70
|
+
// Check credentials early and provide helpful error
|
|
71
|
+
if (!this.kaggleDownloader.hasCredentials()) {
|
|
72
|
+
const errorMsg = this.kaggleDownloader.getCredentialError();
|
|
73
|
+
this.failDownload(datasetId, errorMsg);
|
|
74
|
+
throw new Error(errorMsg);
|
|
75
|
+
}
|
|
58
76
|
const safeId = datasetId.replace("kaggle:", "").replace(/\//g, "_");
|
|
59
77
|
const targetDir = path.join(this.rawDataDir, `kaggle_${safeId}`);
|
|
60
78
|
this.store.registerDownload(datasetId, targetDir, "downloading");
|
|
@@ -11,13 +11,36 @@ export class KaggleDownloader {
|
|
|
11
11
|
this.key = key || process.env.KAGGLE_KEY || "";
|
|
12
12
|
this.downloader = new RobustDownloader();
|
|
13
13
|
}
|
|
14
|
+
/**
|
|
15
|
+
* Check if Kaggle credentials are available
|
|
16
|
+
*/
|
|
17
|
+
hasCredentials() {
|
|
18
|
+
return !!(this.username && this.key);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Get a helpful error message if credentials are missing
|
|
22
|
+
*/
|
|
23
|
+
getCredentialError() {
|
|
24
|
+
if (!this.username && !this.key) {
|
|
25
|
+
return "Kaggle credentials missing. Please set KAGGLE_USERNAME and KAGGLE_KEY environment variables.\n" +
|
|
26
|
+
"💡 Tip: Get your API token from https://www.kaggle.com/settings → API → Create New Token\n" +
|
|
27
|
+
"💡 Alternative: Download the dataset manually and use analyze_quality() on local files.";
|
|
28
|
+
}
|
|
29
|
+
if (!this.username) {
|
|
30
|
+
return "KAGGLE_USERNAME is missing. Please set it in your MCP config or environment variables.";
|
|
31
|
+
}
|
|
32
|
+
if (!this.key) {
|
|
33
|
+
return "KAGGLE_KEY is missing. Please set it in your MCP config or environment variables.";
|
|
34
|
+
}
|
|
35
|
+
return "";
|
|
36
|
+
}
|
|
14
37
|
/**
|
|
15
38
|
* Downloads and extracts a Kaggle dataset
|
|
16
39
|
* returns the path to the primary data file
|
|
17
40
|
*/
|
|
18
41
|
async download(repoId, targetDir, onProgress) {
|
|
19
|
-
if (!this.
|
|
20
|
-
throw new Error(
|
|
42
|
+
if (!this.hasCredentials()) {
|
|
43
|
+
throw new Error(this.getCredentialError());
|
|
21
44
|
}
|
|
22
45
|
const auth = Buffer.from(`${this.username}:${this.key}`).toString('base64');
|
|
23
46
|
const url = `https://www.kaggle.com/api/v1/datasets/download/${repoId}`;
|
|
@@ -23,22 +23,64 @@ export class ImageAnalyzer {
|
|
|
23
23
|
process.stderr.on("data", (data) => {
|
|
24
24
|
stderr += data.toString();
|
|
25
25
|
});
|
|
26
|
+
process.on("error", (err) => {
|
|
27
|
+
if (err.code === "ENOENT") {
|
|
28
|
+
// Python not found - return a graceful failure report
|
|
29
|
+
resolve({
|
|
30
|
+
total_images: 1,
|
|
31
|
+
ok_images: 0,
|
|
32
|
+
failed_images: 1,
|
|
33
|
+
details: [{
|
|
34
|
+
status: "error",
|
|
35
|
+
error: "Python not installed or not in PATH. Please install Python to use image analysis features."
|
|
36
|
+
}]
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
reject(new Error(`Failed to start image analysis process: ${err.message}`));
|
|
41
|
+
}
|
|
42
|
+
});
|
|
26
43
|
process.on("close", (code) => {
|
|
27
44
|
if (code !== 0) {
|
|
28
|
-
|
|
45
|
+
// Handle case where script fails
|
|
46
|
+
resolve({
|
|
47
|
+
total_images: 1,
|
|
48
|
+
ok_images: 0,
|
|
49
|
+
failed_images: 1,
|
|
50
|
+
details: [{
|
|
51
|
+
status: "error",
|
|
52
|
+
error: `Image Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
|
|
53
|
+
}]
|
|
54
|
+
});
|
|
29
55
|
return;
|
|
30
56
|
}
|
|
31
57
|
try {
|
|
32
58
|
const result = JSON.parse(stdout);
|
|
33
59
|
if (result.error) {
|
|
34
|
-
|
|
60
|
+
resolve({
|
|
61
|
+
total_images: 1,
|
|
62
|
+
ok_images: 0,
|
|
63
|
+
failed_images: 1,
|
|
64
|
+
details: [{
|
|
65
|
+
status: "error",
|
|
66
|
+
error: result.error
|
|
67
|
+
}]
|
|
68
|
+
});
|
|
35
69
|
}
|
|
36
70
|
else {
|
|
37
71
|
resolve(result);
|
|
38
72
|
}
|
|
39
73
|
}
|
|
40
74
|
catch (e) {
|
|
41
|
-
|
|
75
|
+
resolve({
|
|
76
|
+
total_images: 1,
|
|
77
|
+
ok_images: 0,
|
|
78
|
+
failed_images: 1,
|
|
79
|
+
details: [{
|
|
80
|
+
status: "error",
|
|
81
|
+
error: `Failed to parse image analyzer output: ${stdout}`
|
|
82
|
+
}]
|
|
83
|
+
});
|
|
42
84
|
}
|
|
43
85
|
});
|
|
44
86
|
});
|
|
@@ -23,22 +23,65 @@ export class MediaAnalyzer {
|
|
|
23
23
|
process.stderr.on("data", (data) => {
|
|
24
24
|
stderr += data.toString();
|
|
25
25
|
});
|
|
26
|
+
process.on("error", (err) => {
|
|
27
|
+
if (err.code === "ENOENT") {
|
|
28
|
+
// Python not found - return a graceful failure report
|
|
29
|
+
resolve({
|
|
30
|
+
total_files: 1,
|
|
31
|
+
ok_files: 0,
|
|
32
|
+
failed_files: 1,
|
|
33
|
+
details: [{
|
|
34
|
+
status: "error",
|
|
35
|
+
error: "Python not installed or not in PATH. Please install Python to use media analysis features."
|
|
36
|
+
}]
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
reject(new Error(`Failed to start media analysis process: ${err.message}`));
|
|
41
|
+
}
|
|
42
|
+
});
|
|
26
43
|
process.on("close", (code) => {
|
|
27
44
|
if (code !== 0) {
|
|
28
|
-
|
|
45
|
+
// Handle case where python exists but script fails
|
|
46
|
+
resolve({
|
|
47
|
+
total_files: 1,
|
|
48
|
+
ok_files: 0,
|
|
49
|
+
failed_files: 1,
|
|
50
|
+
details: [{
|
|
51
|
+
status: "error",
|
|
52
|
+
error: `Media Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
|
|
53
|
+
}]
|
|
54
|
+
});
|
|
29
55
|
return;
|
|
30
56
|
}
|
|
31
57
|
try {
|
|
32
58
|
const result = JSON.parse(stdout);
|
|
33
59
|
if (result.error) {
|
|
34
|
-
|
|
60
|
+
// Return error as part of report instead of rejecting
|
|
61
|
+
resolve({
|
|
62
|
+
total_files: 1,
|
|
63
|
+
ok_files: 0,
|
|
64
|
+
failed_files: 1,
|
|
65
|
+
details: [{
|
|
66
|
+
status: "error",
|
|
67
|
+
error: result.error
|
|
68
|
+
}]
|
|
69
|
+
});
|
|
35
70
|
}
|
|
36
71
|
else {
|
|
37
72
|
resolve(result);
|
|
38
73
|
}
|
|
39
74
|
}
|
|
40
75
|
catch (e) {
|
|
41
|
-
|
|
76
|
+
resolve({
|
|
77
|
+
total_files: 1,
|
|
78
|
+
ok_files: 0,
|
|
79
|
+
failed_files: 1,
|
|
80
|
+
details: [{
|
|
81
|
+
status: "error",
|
|
82
|
+
error: `Failed to parse media analyzer output: ${stdout}`
|
|
83
|
+
}]
|
|
84
|
+
});
|
|
42
85
|
}
|
|
43
86
|
});
|
|
44
87
|
});
|
package/build/tools/formatter.js
CHANGED
|
@@ -21,10 +21,16 @@ export function formatJobStatus(job) {
|
|
|
21
21
|
output += `Progress: ${bar} ${job.progress}%\n`;
|
|
22
22
|
output += `Activity: ${job.status_text}\n`;
|
|
23
23
|
if (job.result_url) {
|
|
24
|
-
output +=
|
|
24
|
+
output += `\n✅ Result: ${job.result_url}\n`;
|
|
25
25
|
}
|
|
26
26
|
if (job.error) {
|
|
27
|
-
output +=
|
|
27
|
+
output += `\n❌ ERROR:\n`;
|
|
28
|
+
// Format multi-line errors nicely
|
|
29
|
+
const errorLines = job.error.split('\n');
|
|
30
|
+
errorLines.forEach(line => {
|
|
31
|
+
output += ` ${line}\n`;
|
|
32
|
+
});
|
|
33
|
+
output += `\n`;
|
|
28
34
|
}
|
|
29
35
|
output += `Updated: ${new Date(job.updated_at).toLocaleTimeString()}\n`;
|
|
30
36
|
output += "═".repeat(25) + "\n";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|