@vespermcp/mcp-server 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +6 -4
  2. package/build/cleaning/cleaner.js +27 -2
  3. package/build/cleaning/executor.js +7 -6
  4. package/build/cleaning/planner.js +16 -4
  5. package/build/config/config-manager.js +199 -0
  6. package/build/export/exporter.js +26 -2
  7. package/build/index.js +272 -72
  8. package/build/ingestion/ingestor.js +17 -16
  9. package/build/ingestion/kaggle-downloader.js +25 -2
  10. package/build/install/install-service.js +1 -1
  11. package/build/jobs/manager.js +17 -10
  12. package/build/metadata/monitoring-service.js +2 -2
  13. package/build/metadata/scraper.js +8 -8
  14. package/build/metadata/store.js +17 -2
  15. package/build/monitoring/observability.js +2 -2
  16. package/build/preparation/target-detector.js +75 -0
  17. package/build/python/cleaner.py +226 -0
  18. package/build/python/export_engine.py +131 -0
  19. package/build/python/framework_adapters.py +100 -0
  20. package/build/python/github_adapter.py +106 -0
  21. package/build/python/image_engine.py +86 -0
  22. package/build/python/media_engine.py +133 -0
  23. package/build/python/nasa_adapter.py +82 -0
  24. package/build/python/quality_engine.py +243 -0
  25. package/build/python/splitter_engine.py +283 -0
  26. package/build/python/target_engine.py +154 -0
  27. package/build/python/test_framework_adapters.py +61 -0
  28. package/build/python/uci_adapter.py +94 -0
  29. package/build/python/worldbank_adapter.py +99 -0
  30. package/build/quality/analyzer.js +40 -4
  31. package/build/quality/image-analyzer.js +73 -5
  32. package/build/quality/media-analyzer.js +74 -5
  33. package/build/scripts/cleanup-kaggle.js +41 -0
  34. package/build/scripts/repro-bug.js +37 -0
  35. package/build/scripts/repro-export-bug.js +56 -0
  36. package/build/scripts/test-mcp-v5.js +12 -11
  37. package/build/scripts/test-production-sync.js +36 -0
  38. package/build/scripts/test-target-detector.js +29 -0
  39. package/build/scripts/test-write.js +14 -0
  40. package/build/scripts/verify-integration.js +57 -0
  41. package/build/scripts/verify-priority.js +33 -0
  42. package/build/search/engine.js +13 -2
  43. package/build/search/jit-orchestrator.js +6 -40
  44. package/build/search/vector-store.js +18 -0
  45. package/build/splitting/splitter.js +27 -2
  46. package/build/tools/formatter.js +23 -8
  47. package/build/utils/downloader.js +2 -2
  48. package/build/utils/selector.js +69 -0
  49. package/package.json +8 -4
  50. package/src/python/cleaner.py +33 -3
  51. package/src/python/export_engine.py +19 -0
  52. package/src/python/target_engine.py +154 -0
package/README.md CHANGED
@@ -31,15 +31,17 @@ Vesper is a Model Context Protocol (MCP) server that helps you find, analyze, an
31
31
 
32
32
  ## 📦 Installation
33
33
 
34
- ### Install Globally (Recommended)
34
+ ### Option A: Install via NPX (Easiest)
35
+ Run the setup wizard to automatically configure Vesper for Cursor, Claude Desktop, and VS Code:
35
36
 
36
37
  ```bash
37
- npm install -g @vespermcp/mcp-server
38
+ npx @vespermcp/mcp-server@latest --setup
38
39
  ```
39
40
 
40
- ### Option B: Install via Git
41
+ ### Option B: Install Globally
41
42
  ```bash
42
- npm install -g git+https://github.com/vespermcp/mcp-server.git
43
+ npm install -g @vespermcp/mcp-server
44
+ vesper --setup
43
45
  ```
44
46
 
45
47
  The postinstall script will automatically:
@@ -1,10 +1,35 @@
1
1
  import { spawn } from "child_process";
2
2
  import path from "path";
3
+ import fs from "fs";
3
4
  export class DataCleaner {
4
5
  pythonPath = "python";
5
6
  scriptPath;
6
- constructor(projectRoot = process.cwd()) {
7
- this.scriptPath = path.join(projectRoot, "src", "python", "cleaner.py");
7
+ constructor(buildDir = process.cwd()) {
8
+ const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
9
+ const dataRoot = path.join(homeDir, ".vesper");
10
+ const scriptPath0 = path.resolve(dataRoot, "python", "cleaner.py");
11
+ const scriptPath1 = path.resolve(buildDir, "python", "cleaner.py");
12
+ const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "cleaner.py");
13
+ const scriptPath3 = path.resolve(buildDir, "..", "python", "cleaner.py");
14
+ if (fs.existsSync(scriptPath0)) {
15
+ this.scriptPath = scriptPath0;
16
+ }
17
+ else if (fs.existsSync(scriptPath1)) {
18
+ this.scriptPath = scriptPath1;
19
+ }
20
+ else if (fs.existsSync(scriptPath2)) {
21
+ this.scriptPath = scriptPath2;
22
+ }
23
+ else if (fs.existsSync(scriptPath3)) {
24
+ this.scriptPath = scriptPath3;
25
+ }
26
+ else {
27
+ this.scriptPath = scriptPath0; // Final fallback
28
+ }
29
+ // Detect Python command (Windows may use 'py' instead of 'python')
30
+ if (process.platform === "win32") {
31
+ this.pythonPath = "py";
32
+ }
8
33
  }
9
34
  /**
10
35
  * Execute a list of cleaning operations on a file
@@ -5,10 +5,11 @@ export class PipelineExecutor {
5
5
  analyzer;
6
6
  planner;
7
7
  cleaner;
8
- constructor(projectRoot = process.cwd()) {
9
- this.analyzer = new QualityAnalyzer(undefined, projectRoot);
10
- this.planner = new CleaningPlanner();
11
- this.cleaner = new DataCleaner(projectRoot);
8
+ constructor(dataRoot = process.cwd(), buildDir) {
9
+ // Use buildDir for analyzer and cleaner (where Python scripts are), dataRoot for data operations
10
+ this.analyzer = new QualityAnalyzer(undefined, buildDir || dataRoot);
11
+ this.planner = new CleaningPlanner(undefined, buildDir || dataRoot);
12
+ this.cleaner = new DataCleaner(buildDir || dataRoot);
12
13
  }
13
14
  /**
14
15
  * Run the full Auto-Cleaning Pipeline on a dataset file
@@ -16,7 +17,7 @@ export class PipelineExecutor {
16
17
  async runPipeline(datasetId, filePath, outputFormat = "csv", onProgress) {
17
18
  // ... (logging setup)
18
19
  const log = (m) => {
19
- console.log(`[Pipeline] ${m}`);
20
+ console.error(`[Pipeline] ${m}`);
20
21
  if (onProgress)
21
22
  onProgress(m);
22
23
  };
@@ -42,7 +43,7 @@ export class PipelineExecutor {
42
43
  }
43
44
  // 3. Execute Plan (includes conversion if requested)
44
45
  log(`Executing ${plan.operations.length} operations (Format: ${outputFormat})...`);
45
- plan.operations.forEach(op => console.log(` - ${op.type}: ${op.reason}`));
46
+ plan.operations.forEach(op => console.error(` - ${op.type}: ${op.reason}`));
46
47
  const cleaningResult = await this.cleaner.clean(filePath, plan.operations, outputFormat);
47
48
  if (cleaningResult.success) {
48
49
  log(`Cleaning complete. Output: ${cleaningResult.output_path}`);
@@ -1,16 +1,19 @@
1
+ import { TargetDetector } from "../preparation/target-detector.js";
1
2
  export class CleaningPlanner {
2
3
  cache;
3
- constructor(cache) {
4
+ targetDetector;
5
+ constructor(cache, buildDir = process.cwd()) {
4
6
  this.cache = cache;
7
+ this.targetDetector = new TargetDetector(buildDir);
5
8
  }
6
9
  /**
7
10
  * Generate a cleaning plan based on the quality report and optional custom rules
8
11
  */
9
- async generatePlan(datasetId, report, ruleSet) {
12
+ async generatePlan(datasetId, report, ruleSet, targetInfo) {
10
13
  if (this.cache) {
11
14
  const cached = await this.cache.getPlan(datasetId, { report, ruleSet });
12
15
  if (cached) {
13
- console.log(`[CleaningPlanner] Cache hit for ${datasetId}`);
16
+ console.error(`[CleaningPlanner] Cache hit for ${datasetId}`);
14
17
  return cached;
15
18
  }
16
19
  }
@@ -66,7 +69,16 @@ export class CleaningPlanner {
66
69
  });
67
70
  }
68
71
  }
69
- // 3. Apply Custom Rules
72
+ // 3. Target Unification
73
+ if (targetInfo && targetInfo.target !== "target" && targetInfo.confidence > 0.7) {
74
+ ops.push({
75
+ type: "RenameTarget",
76
+ params: { old_name: targetInfo.target, new_name: "target" },
77
+ reason: `Detected target '${targetInfo.target}' with high confidence (${targetInfo.confidence.toFixed(2)})`
78
+ });
79
+ estimatedColsSaved++; // Logic fix: effectively "saving" a column by standardizing it
80
+ }
81
+ // 4. Apply Custom Rules
70
82
  if (ruleSet) {
71
83
  for (const rule of ruleSet.rules) {
72
84
  const targets = rule.condition.column === "*"
@@ -0,0 +1,199 @@
1
+ import fs from "fs";
2
+ import path from "path";
3
+ import os from "os";
4
+ export class ConfigManager {
5
+ getHomeDir() {
6
+ return os.homedir();
7
+ }
8
+ getConfigPaths() {
9
+ const home = this.getHomeDir();
10
+ const isWin = process.platform === "win32";
11
+ const appData = process.env.APPDATA || "";
12
+ return {
13
+ "claude-desktop": {
14
+ name: "Claude Desktop",
15
+ path: isWin
16
+ ? path.join(appData, "Claude", "claude_desktop_config.json")
17
+ : path.join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json"),
18
+ key: "mcpServers"
19
+ },
20
+ "cursor": {
21
+ name: "Cursor",
22
+ path: isWin
23
+ ? path.join(appData, "Cursor", "User", "globalStorage", "rohit-gohri.cursor-mcp", "mcp.json")
24
+ : path.join(home, "Library", "Application Support", "Cursor", "User", "globalStorage", "rohit-gohri.cursor-mcp", "mcp.json"),
25
+ key: "mcpServers"
26
+ },
27
+ "vscode-cline": {
28
+ name: "VS Code (Cline)",
29
+ path: isWin
30
+ ? path.join(appData, "Code", "User", "globalStorage", "saoudrizwan.claude-dev", "settings", "cline_mcp_settings.json")
31
+ : path.join(home, "Library", "Application Support", "Code", "User", "globalStorage", "saoudrizwan.claude-dev", "settings", "cline_mcp_settings.json"),
32
+ key: "mcpServers"
33
+ },
34
+ "vscode-roo-code": {
35
+ name: "VS Code (Roo Code)",
36
+ path: isWin
37
+ ? path.join(appData, "Code", "User", "globalStorage", "RooVeterans.roo-cline", "settings", "cline_mcp_settings.json")
38
+ : path.join(home, "Library", "Application Support", "Code", "User", "globalStorage", "RooVeterans.roo-cline", "settings", "cline_mcp_settings.json"),
39
+ key: "mcpServers"
40
+ },
41
+ "cursor-project": {
42
+ name: "Cursor (Project-specific)",
43
+ path: path.join(process.cwd(), ".cursor", "mcp.json"),
44
+ key: "mcpServers"
45
+ },
46
+ "vscode-copilot": {
47
+ name: "VS Code (Copilot)",
48
+ path: path.join(home, ".copilot", "mcp-config.json"),
49
+ key: "mcpServers"
50
+ },
51
+ "vscode-global": {
52
+ name: "VS Code (Standard MCP)",
53
+ path: isWin
54
+ ? path.join(appData, "Code", "User", "mcp.json")
55
+ : path.join(home, "Library", "Application Support", "Code", "User", "mcp.json"),
56
+ key: "mcpServers"
57
+ },
58
+ "vscode-project": {
59
+ name: "VS Code (Project-specific)",
60
+ path: path.join(process.cwd(), ".vscode", "mcp.json"),
61
+ key: "mcpServers"
62
+ },
63
+ "vscode-insiders": {
64
+ name: "VS Code Insiders",
65
+ path: isWin
66
+ ? path.join(appData, "Code - Insiders", "User", "mcp.json")
67
+ : path.join(home, "Library", "Application Support", "Code - Insiders", "User", "mcp.json"),
68
+ key: "mcpServers"
69
+ },
70
+ "vscode-settings": {
71
+ name: "VS Code (Settings.json)",
72
+ path: isWin
73
+ ? path.join(appData, "Code", "User", "settings.json")
74
+ : path.join(home, "Library", "Application Support", "Code", "User", "settings.json"),
75
+ key: "github.copilot.chat.mcp.servers"
76
+ },
77
+ "vscode-root": {
78
+ name: "VS Code (Workspace Root)",
79
+ path: path.join(process.cwd(), "mcp.json"),
80
+ key: "mcpServers"
81
+ }
82
+ };
83
+ }
84
+ detectIDEs() {
85
+ const paths = this.getConfigPaths();
86
+ const detected = [];
87
+ const termProgram = (process.env.TERM_PROGRAM || "").toLowerCase();
88
+ const appData = process.env.APPDATA || "";
89
+ const home = this.getHomeDir();
90
+ for (const [key, cp] of Object.entries(paths)) {
91
+ const isClaude = key === "claude-desktop";
92
+ const isCursor = key === "cursor";
93
+ const isVSCode = key.startsWith("vscode");
94
+ const isCopilot = key === "vscode-copilot";
95
+ const isStandard = key === "vscode-global";
96
+ const isSettings = key === "vscode-settings";
97
+ const isRoot = key === "vscode-root";
98
+ const hasClaudeFolder = fs.existsSync(path.join(appData, "Claude"));
99
+ const hasCursorFolder = fs.existsSync(path.join(appData, "Cursor")) || fs.existsSync(path.join(home, ".cursor"));
100
+ const hasCodeFolder = fs.existsSync(path.join(appData, "Code")) || fs.existsSync(path.join(home, ".vscode"));
101
+ const hasInsidersFolder = fs.existsSync(path.join(appData, "Code - Insiders"));
102
+ const hasCopilotFolder = fs.existsSync(path.join(home, ".copilot"));
103
+ if (process.env.VESPER_DEBUG) {
104
+ console.log(`[Debug] Checking ${key}:`);
105
+ console.log(` - Config Path: ${cp.path} (${fs.existsSync(cp.path) ? "EXISTS" : "MISSING"})`);
106
+ if (isClaude)
107
+ console.log(` - Claude Folder: ${path.join(appData, "Claude")} (${hasClaudeFolder ? "EXISTS" : "MISSING"})`);
108
+ if (isCursor)
109
+ console.log(` - Cursor Folder: ${path.join(appData, "Cursor")} (${hasCursorFolder ? "EXISTS" : "MISSING"})`);
110
+ if (isVSCode)
111
+ console.log(` - Code Folder: ${path.join(appData, "Code")} (${hasCodeFolder ? "EXISTS" : "MISSING"})`);
112
+ if (isCopilot)
113
+ console.log(` - Copilot Folder: ${path.join(this.getHomeDir(), ".copilot")} (${hasCopilotFolder ? "EXISTS" : "MISSING"})`);
114
+ }
115
+ const isCurrentIDE = (isCursor && (termProgram.includes("cursor") || (termProgram === "vscode" && hasCursorFolder))) ||
116
+ (isClaude && termProgram.includes("claude")) ||
117
+ (isVSCode && termProgram === "vscode" && (hasCodeFolder || hasInsidersFolder)) ||
118
+ (isCopilot && hasCopilotFolder) ||
119
+ (isStandard && termProgram === "vscode") ||
120
+ (isSettings && termProgram === "vscode");
121
+ // Also check if the config file actually exists OR the root folder exists
122
+ const shouldDetectByFolder = (isClaude && hasClaudeFolder) ||
123
+ (isCursor && hasCursorFolder) ||
124
+ (isVSCode && hasCodeFolder) ||
125
+ (key === "vscode-insiders" && hasInsidersFolder) ||
126
+ (isCopilot && hasCopilotFolder) ||
127
+ (isStandard && hasCodeFolder) ||
128
+ (isSettings && hasCodeFolder) ||
129
+ (isRoot && fs.existsSync(process.cwd()));
130
+ if (shouldDetectByFolder || fs.existsSync(cp.path) || isCurrentIDE || key === "cursor-project" || key === "vscode-project" || key === "vscode-root") {
131
+ let displayName = cp.name;
132
+ if (isCurrentIDE) {
133
+ displayName += " (Current Terminal)";
134
+ }
135
+ detected.push({ ...cp, name: displayName });
136
+ }
137
+ }
138
+ return detected.filter((v, i, a) => a.findIndex(t => t.path === v.path) === i);
139
+ }
140
+ async installTo(configPath) {
141
+ try {
142
+ console.log(`[Vesper Setup] Installing to ${configPath.name} at ${configPath.path}...`);
143
+ let config = {};
144
+ if (fs.existsSync(configPath.path)) {
145
+ try {
146
+ const content = fs.readFileSync(configPath.path, "utf-8").trim();
147
+ config = content ? JSON.parse(content) : {};
148
+ }
149
+ catch (e) {
150
+ console.warn(`[Vesper Setup] Could not parse ${configPath.path}, starting fresh`);
151
+ config = {};
152
+ }
153
+ }
154
+ else {
155
+ console.log(`[Vesper Setup] Creating directory: ${path.dirname(configPath.path)}`);
156
+ fs.mkdirSync(path.dirname(configPath.path), { recursive: true });
157
+ }
158
+ // Handle nested keys (e.g., "github.copilot.chat.mcp.servers")
159
+ const keys = configPath.key.split('.');
160
+ let current = config;
161
+ for (let i = 0; i < keys.length - 1; i++) {
162
+ if (!current[keys[i]])
163
+ current[keys[i]] = {};
164
+ current = current[keys[i]];
165
+ }
166
+ const lastKey = keys[keys.length - 1];
167
+ const mcpServers = current[lastKey] || {};
168
+ // Use the portable npx command as requested
169
+ // We use both 'vesper' (clean) and 'vespermcp' (legacy/alias) for compatibility
170
+ const isWin = process.platform === "win32";
171
+ const serverConfig = {
172
+ command: isWin ? "npx.cmd" : "npx",
173
+ args: ["-y", "@vespermcp/mcp-server@latest"],
174
+ env: {
175
+ "HF_TOKEN": "YOUR_HUGGINGFACE_TOKEN_HERE",
176
+ // Empty by default to avoid clutter, user can add if needed
177
+ }
178
+ };
179
+ mcpServers["vesper"] = serverConfig;
180
+ // Also add alias for easy upgrading
181
+ mcpServers["@vespermcp/mcp-server"] = serverConfig;
182
+ current[lastKey] = mcpServers;
183
+ const content = JSON.stringify(config, null, 2);
184
+ if (process.env.VESPER_DEBUG) {
185
+ console.log(`[Vesper Setup] Writing to ${configPath.path}:`);
186
+ console.log(content);
187
+ }
188
+ fs.writeFileSync(configPath.path, content, "utf8");
189
+ // Verify write
190
+ const finalSize = fs.statSync(configPath.path).size;
191
+ console.log(`[Vesper Setup] Successfully wrote ${finalSize} bytes to ${configPath.name}`);
192
+ return true;
193
+ }
194
+ catch (error) {
195
+ console.error(`Failed to install to ${configPath.name}:`, error);
196
+ return false;
197
+ }
198
+ }
199
+ }
@@ -4,8 +4,32 @@ import fs from "fs";
4
4
  export class DataExporter {
5
5
  pythonPath = "python";
6
6
  scriptPath;
7
- constructor(projectRoot = process.cwd()) {
8
- this.scriptPath = path.join(projectRoot, "src", "python", "export_engine.py");
7
+ constructor(buildDir = process.cwd()) {
8
+ const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
9
+ const dataRoot = path.join(homeDir, ".vesper");
10
+ const scriptPath0 = path.resolve(dataRoot, "python", "export_engine.py");
11
+ const scriptPath1 = path.resolve(buildDir, "python", "export_engine.py");
12
+ const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "export_engine.py");
13
+ const scriptPath3 = path.resolve(buildDir, "..", "python", "export_engine.py");
14
+ if (fs.existsSync(scriptPath0)) {
15
+ this.scriptPath = scriptPath0;
16
+ }
17
+ else if (fs.existsSync(scriptPath1)) {
18
+ this.scriptPath = scriptPath1;
19
+ }
20
+ else if (fs.existsSync(scriptPath2)) {
21
+ this.scriptPath = scriptPath2;
22
+ }
23
+ else if (fs.existsSync(scriptPath3)) {
24
+ this.scriptPath = scriptPath3;
25
+ }
26
+ else {
27
+ this.scriptPath = scriptPath0;
28
+ }
29
+ // Detect Python command
30
+ if (process.platform === "win32") {
31
+ this.pythonPath = "py";
32
+ }
9
33
  }
10
34
  /**
11
35
  * Exports a dataset file to a specified format