@vespermcp/mcp-server 1.2.22 → 1.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ function nowIso() {
2
+ return new Date().toISOString();
3
+ }
4
+ function clamp01(x) {
5
+ if (!Number.isFinite(x))
6
+ return 0;
7
+ return Math.max(0, Math.min(1, x));
8
+ }
9
+ function scoreTo01(score0to100) {
10
+ return clamp01(Number(score0to100 || 0) / 100);
11
+ }
12
+ export class WebCoreEngine {
13
+ deps;
14
+ constructor(deps) {
15
+ this.deps = deps;
16
+ }
17
+ async find(input) {
18
+ const query = String(input.query || "").trim();
19
+ if (!query)
20
+ throw new Error("query is required");
21
+ const limit = Math.max(1, Math.min(50, Number(input.limit || 10)));
22
+ const requested = (input.sources && input.sources.length > 0 ? input.sources : ["arxiv", "github"])
23
+ .filter((s) => s === "arxiv" || s === "github" || s === "semantic_scholar" || s === "hackernews");
24
+ const arxivFullText = input.arxiv_full_text === true;
25
+ const githubIncludeReadme = input.github_include_readme === true;
26
+ const collectedAt = nowIso();
27
+ const results = [];
28
+ const telemetry = { per_source: [] };
29
+ const perSource = Math.max(1, Math.ceil(limit / Math.max(1, requested.length)));
30
+ for (const source of requested) {
31
+ if (source === "arxiv") {
32
+ const t0 = Date.now();
33
+ try {
34
+ const out = await this.deps.arxivSource.discoverWithTelemetry(query, perSource, { full_text: arxivFullText });
35
+ const docs = out.results.map((m) => this.fromArxiv(m, collectedAt)).filter((d) => d !== null);
36
+ results.push(...docs);
37
+ telemetry.per_source.push({
38
+ source,
39
+ cache_hit: out.cacheHit,
40
+ latency_ms: out.latencyMs || (Date.now() - t0),
41
+ result_count: docs.length,
42
+ pdf_extract_ms_total: out.pdf_extract_ms_total,
43
+ });
44
+ }
45
+ catch (e) {
46
+ telemetry.per_source.push({
47
+ source,
48
+ cache_hit: false,
49
+ latency_ms: Date.now() - t0,
50
+ result_count: 0,
51
+ error: e?.message || String(e),
52
+ });
53
+ }
54
+ }
55
+ if (source === "github") {
56
+ const t0 = Date.now();
57
+ try {
58
+ const out = await this.deps.githubSource.discoverWithTelemetry(query, perSource, { include_readme: githubIncludeReadme });
59
+ const docs = out.results.map((m) => this.fromGithub(m, collectedAt)).filter((d) => d !== null);
60
+ results.push(...docs);
61
+ telemetry.per_source.push({
62
+ source,
63
+ cache_hit: out.cacheHit,
64
+ latency_ms: out.latencyMs || (Date.now() - t0),
65
+ result_count: docs.length,
66
+ readme_fetch_ms_total: out.readme_fetch_ms_total,
67
+ });
68
+ }
69
+ catch (e) {
70
+ telemetry.per_source.push({
71
+ source,
72
+ cache_hit: false,
73
+ latency_ms: Date.now() - t0,
74
+ result_count: 0,
75
+ error: e?.message || String(e),
76
+ });
77
+ }
78
+ }
79
+ if (source === "semantic_scholar") {
80
+ const t0 = Date.now();
81
+ try {
82
+ const out = await this.deps.semanticScholarSource.discoverWithTelemetry(query, perSource);
83
+ const docs = out.results.map((m) => this.fromSemanticScholar(m, collectedAt)).filter((d) => d !== null);
84
+ results.push(...docs);
85
+ telemetry.per_source.push({
86
+ source,
87
+ cache_hit: out.cacheHit,
88
+ latency_ms: out.latencyMs || (Date.now() - t0),
89
+ result_count: docs.length,
90
+ });
91
+ }
92
+ catch (e) {
93
+ telemetry.per_source.push({
94
+ source,
95
+ cache_hit: false,
96
+ latency_ms: Date.now() - t0,
97
+ result_count: 0,
98
+ error: e?.message || String(e),
99
+ });
100
+ }
101
+ }
102
+ if (source === "hackernews") {
103
+ const t0 = Date.now();
104
+ try {
105
+ const out = await this.deps.hackerNewsSource.discoverWithTelemetry(query, perSource);
106
+ const docs = out.results.map((m) => this.fromHackerNews(m, collectedAt)).filter((d) => d !== null);
107
+ results.push(...docs);
108
+ telemetry.per_source.push({
109
+ source,
110
+ cache_hit: out.cacheHit,
111
+ latency_ms: out.latencyMs || (Date.now() - t0),
112
+ result_count: docs.length,
113
+ });
114
+ }
115
+ catch (e) {
116
+ telemetry.per_source.push({
117
+ source,
118
+ cache_hit: false,
119
+ latency_ms: Date.now() - t0,
120
+ result_count: 0,
121
+ error: e?.message || String(e),
122
+ });
123
+ }
124
+ }
125
+ }
126
+ return {
127
+ query,
128
+ sources: requested,
129
+ results: results.slice(0, limit),
130
+ collected_at: collectedAt,
131
+ telemetry,
132
+ };
133
+ }
134
+ fromArxiv(meta, collectedAt) {
135
+ const arxivId = meta.id;
136
+ const absUrl = meta.metadata_url || `https://arxiv.org/abs/${arxivId}`;
137
+ const pdfUrl = meta.download_url || `https://arxiv.org/pdf/${arxivId}`;
138
+ const title = meta.name;
139
+ const abstract = meta.description;
140
+ const authors = Array.isArray(meta.authors) ? meta.authors : undefined;
141
+ const content = meta.webcore_content || abstract;
142
+ if (!absUrl || !content)
143
+ return null;
144
+ return {
145
+ source_type: "arxiv",
146
+ source_url: absUrl,
147
+ content,
148
+ metadata_json: {
149
+ arxiv_id: arxivId,
150
+ title,
151
+ authors,
152
+ categories: meta.tags,
153
+ updated_at: meta.last_updated,
154
+ pdf_url: pdfUrl,
155
+ abstract,
156
+ pdf_text_present: !!meta.webcore_content,
157
+ pdf_text_preview_length: meta.webcore_content ? meta.webcore_content.length : 0,
158
+ },
159
+ quality_score: scoreTo01(meta.quality_score),
160
+ collected_at: collectedAt,
161
+ content_type: meta.webcore_content ? "paper_fulltext" : "paper_abstract",
162
+ };
163
+ }
164
+ fromGithub(meta, collectedAt) {
165
+ const fullName = meta.id;
166
+ const url = meta.metadata_url || meta.download_url || `https://github.com/${fullName}`;
167
+ const content = meta.webcore_content || meta.description || meta.name;
168
+ if (!url || !content)
169
+ return null;
170
+ return {
171
+ source_type: "github",
172
+ source_url: url,
173
+ content,
174
+ metadata_json: {
175
+ full_name: fullName,
176
+ name: meta.name,
177
+ description: meta.description,
178
+ stars: meta.stars,
179
+ forks_proxy_downloads: meta.downloads,
180
+ topics: meta.tags,
181
+ license_id: meta.license?.id,
182
+ license_name: meta.license?.name,
183
+ updated_at: meta.last_updated,
184
+ language: (meta.languages || [])[0],
185
+ readme_present: !!meta.webcore_content,
186
+ readme_preview_length: meta.webcore_content ? meta.webcore_content.length : 0,
187
+ },
188
+ quality_score: scoreTo01(meta.quality_score),
189
+ collected_at: collectedAt,
190
+ content_type: meta.webcore_content ? "text" : "repo",
191
+ };
192
+ }
193
+ fromSemanticScholar(meta, collectedAt) {
194
+ const paperId = meta.id;
195
+ const url = meta.metadata_url || meta.download_url || (paperId ? `https://www.semanticscholar.org/paper/${paperId}` : "");
196
+ const content = meta.description;
197
+ if (!url || !content)
198
+ return null;
199
+ return {
200
+ source_type: "semantic_scholar",
201
+ source_url: url,
202
+ content,
203
+ metadata_json: {
204
+ paper_id: paperId,
205
+ title: meta.name,
206
+ authors: meta.authors,
207
+ abstract: meta.description,
208
+ tags: meta.tags,
209
+ citation_count: meta.stars,
210
+ updated_at: meta.last_updated,
211
+ },
212
+ quality_score: scoreTo01(meta.quality_score),
213
+ collected_at: collectedAt,
214
+ content_type: "paper_abstract",
215
+ };
216
+ }
217
+ fromHackerNews(meta, collectedAt) {
218
+ const itemId = meta.id;
219
+ const url = meta.metadata_url || meta.download_url || (itemId ? `https://news.ycombinator.com/item?id=${itemId}` : "");
220
+ const content = meta.description;
221
+ const authors = meta.authors;
222
+ if (!url || !content)
223
+ return null;
224
+ return {
225
+ source_type: "hackernews",
226
+ source_url: url,
227
+ content,
228
+ metadata_json: {
229
+ item_id: itemId,
230
+ title: meta.name,
231
+ authors,
232
+ points: meta.stars,
233
+ comments_proxy: meta.downloads,
234
+ tags: meta.tags,
235
+ updated_at: meta.last_updated,
236
+ },
237
+ quality_score: scoreTo01(meta.quality_score),
238
+ collected_at: collectedAt,
239
+ content_type: "thread",
240
+ };
241
+ }
242
+ }
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.22",
3
+ "version": "1.2.24",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
7
7
  "bin": {
8
+ "vespermcp": "build/index.js",
8
9
  "vesper-wizard": "wizard.cjs"
9
10
  },
10
11
  "files": [
@@ -66,10 +67,13 @@
66
67
  "npm": ">=8.0.0"
67
68
  },
68
69
  "dependencies": {
70
+ "@aws-sdk/client-s3": "^3.1017.0",
71
+ "@aws-sdk/credential-providers": "^3.1017.0",
69
72
  "@huggingface/hub": "^2.7.1",
70
73
  "@modelcontextprotocol/sdk": "^1.25.2",
71
74
  "@polar-sh/nextjs": "^0.9.4",
72
75
  "@supabase/supabase-js": "^2.98.0",
76
+ "@vercel/analytics": "^2.0.0",
73
77
  "@xenova/transformers": "^2.17.2",
74
78
  "adm-zip": "^0.5.16",
75
79
  "ajv": "^8.17.1",
@@ -77,6 +81,7 @@
77
81
  "better-sqlite3": "^12.6.0",
78
82
  "inquirer": "^13.3.0",
79
83
  "lodash": "^4.17.21",
84
+ "pdf-parse": "^2.4.5",
80
85
  "uuid": "^13.0.0",
81
86
  "zod": "^4.3.5",
82
87
  "zod-to-json-schema": "^3.25.1"
@@ -18,6 +18,7 @@ const readline = require('readline');
18
18
  const HOME = os.homedir();
19
19
  const VESPER_DIR = path.join(HOME, '.vesper');
20
20
  const CONFIG_TOML = path.join(VESPER_DIR, 'config.toml');
21
+ const CONFIG_JSON = path.join(VESPER_DIR, 'config.json');
21
22
  const DATA_DIR = path.join(VESPER_DIR, 'data');
22
23
  const IS_WIN = process.platform === 'win32';
23
24
  const APPDATA = process.env.APPDATA || path.join(HOME, 'AppData', 'Roaming');
@@ -49,6 +50,32 @@ function writeToml(filePath, data) {
49
50
  fs.writeFileSync(filePath, lines.join('\n') + '\n', 'utf8');
50
51
  }
51
52
 
53
+ function readWizardState() {
54
+ if (!fs.existsSync(CONFIG_JSON)) return {};
55
+ try {
56
+ const content = fs.readFileSync(CONFIG_JSON, 'utf8').trim();
57
+ return content ? JSON.parse(content) : {};
58
+ } catch {
59
+ return {};
60
+ }
61
+ }
62
+
63
+ function writeWizardState(state) {
64
+ ensureDir(path.dirname(CONFIG_JSON));
65
+ fs.writeFileSync(CONFIG_JSON, JSON.stringify(state, null, 2), 'utf8');
66
+ }
67
+
68
+ function hasCompletedOnboarding() {
69
+ const state = readWizardState();
70
+ return state.onboardingCompleted === true;
71
+ }
72
+
73
+ function markOnboardingCompleted() {
74
+ const state = readWizardState();
75
+ state.onboardingCompleted = true;
76
+ writeWizardState(state);
77
+ }
78
+
52
79
  function dim(text) { return `\x1b[2m${text}\x1b[0m`; }
53
80
  function bold(text) { return `\x1b[1m${text}\x1b[0m`; }
54
81
  function green(text) { return `\x1b[32m${text}\x1b[0m`; }
@@ -155,6 +182,10 @@ function openBrowser(url) {
155
182
  } catch { /* browser open is best-effort */ }
156
183
  }
157
184
 
185
+ function isInteractiveTerminal() {
186
+ return Boolean(process.stdin && process.stdin.isTTY && process.stdout && process.stdout.isTTY);
187
+ }
188
+
158
189
  function askYesNo(question) {
159
190
  return new Promise((resolve) => {
160
191
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
@@ -282,8 +313,16 @@ async function deviceAuthFlow() {
282
313
  console.log(` │ │`);
283
314
  console.log(` └───────────────────────────────────────────────┘\n`);
284
315
 
285
- openBrowser(loginUrl);
286
- console.log(` ${dim('Browser opened automatically.')}`);
316
+ const shouldAutoOpenBrowser = process.env.VESPER_WIZARD_AUTO_OPEN !== '0';
317
+ if (shouldAutoOpenBrowser) {
318
+ openBrowser(loginUrl);
319
+ if (!hasCompletedOnboarding()) {
320
+ markOnboardingCompleted();
321
+ }
322
+ console.log(` ${dim('Browser opened automatically for device auth.')}`);
323
+ } else {
324
+ console.log(` ${dim('Browser auto-open disabled via VESPER_WIZARD_AUTO_OPEN=0. Open the URL manually to continue.')}`);
325
+ }
287
326
  console.log(` ${dim('Waiting for you to sign in...')}\n`);
288
327
 
289
328
  // Step 3: Poll until confirmed or expired
@@ -386,7 +425,7 @@ function getAllAgentConfigs() {
386
425
 
387
426
  function installMcpToAgent(agent) {
388
427
  const npxCmd = IS_WIN ? 'npx.cmd' : 'npx';
389
- const serverEntry = { command: npxCmd, args: ['-y', '@vespermcp/mcp-server@latest'] };
428
+ const serverEntry = { command: npxCmd, args: ['-y', '-p', '@vespermcp/mcp-server@latest', 'vespermcp'] };
390
429
 
391
430
  try {
392
431
  if (agent.format === 'toml') {
@@ -437,6 +476,13 @@ async function checkServerHealth() {
437
476
 
438
477
  // ── Main Wizard ──────────────────────────────────────────────
439
478
  async function main() {
479
+ if (!isInteractiveTerminal()) {
480
+ console.error(red('vesper-wizard is interactive and cannot run in MCP stdio mode.'));
481
+ console.error(dim('Use this command for MCP server runtime instead:'));
482
+ console.error(cyan('npx -y -p @vespermcp/mcp-server@latest vespermcp'));
483
+ process.exit(2);
484
+ }
485
+
440
486
  printBanner();
441
487
 
442
488
  console.log(` ${green('→')} Setting up Vesper on ${bold(os.hostname())}\n`);
@@ -487,17 +533,22 @@ async function main() {
487
533
  console.log(` ${green('✓')}`);
488
534
  console.log(` ${dim('Mode:')} ${dim(vaultData.auth_mode === 'cloud' ? 'cloud (linked to Vesper account)' : 'single local Vesper key (no external keys required)')}`);
489
535
 
490
- // ─── Step 4: Install @vespermcp/mcp-server ─────────────────
491
- console.log(`\n ${dim('[')}${cyan('4/6')}${dim(']')} Installing Vesper MCP server...`);
536
+ // ─── Step 4: Verify @vespermcp/mcp-server command ───────────
537
+ console.log(`\n ${dim('[')}${cyan('4/6')}${dim(']')} Verifying Vesper MCP server command...`);
492
538
  try {
493
539
  const npmCmd = IS_WIN ? 'npx.cmd' : 'npx';
494
- spawnSync(npmCmd, ['-y', '@vespermcp/mcp-server@latest', '--setup', '--silent'], {
495
- stdio: 'inherit',
496
- timeout: 120000,
540
+ const verify = spawnSync(npmCmd, ['-y', '-p', '@vespermcp/mcp-server@latest', 'vespermcp', '--version'], {
541
+ stdio: 'pipe',
542
+ timeout: 30000,
543
+ encoding: 'utf8',
497
544
  });
498
- console.log(` ${green('✓')} @vespermcp/mcp-server installed`);
545
+ if (verify.status === 0) {
546
+ console.log(` ${green('✓')} @vespermcp/mcp-server command is available`);
547
+ } else {
548
+ console.log(` ${yellow('⚠')} Could not verify vespermcp binary. MCP clients will still install it on first run.`);
549
+ }
499
550
  } catch {
500
- console.log(` ${yellow('⚠')} Could not auto-install run manually: npx -y @vespermcp/mcp-server@latest --setup`);
551
+ console.log(` ${yellow('⚠')} Could not verify vespermcp binary. MCP clients will still install it on first run.`);
501
552
  }
502
553
 
503
554
  // ─── Step 5: Auto-configure all detected IDEs ──────────────
package/scripts/wizard.js CHANGED
@@ -18,6 +18,7 @@ const readline = require('readline');
18
18
  const HOME = os.homedir();
19
19
  const VESPER_DIR = path.join(HOME, '.vesper');
20
20
  const CONFIG_TOML = path.join(VESPER_DIR, 'config.toml');
21
+ const CONFIG_JSON = path.join(VESPER_DIR, 'config.json');
21
22
  const DATA_DIR = path.join(VESPER_DIR, 'data');
22
23
  const IS_WIN = process.platform === 'win32';
23
24
  const APPDATA = process.env.APPDATA || path.join(HOME, 'AppData', 'Roaming');
@@ -49,6 +50,32 @@ function writeToml(filePath, data) {
49
50
  fs.writeFileSync(filePath, lines.join('\n') + '\n', 'utf8');
50
51
  }
51
52
 
53
+ function readWizardState() {
54
+ if (!fs.existsSync(CONFIG_JSON)) return {};
55
+ try {
56
+ const content = fs.readFileSync(CONFIG_JSON, 'utf8').trim();
57
+ return content ? JSON.parse(content) : {};
58
+ } catch {
59
+ return {};
60
+ }
61
+ }
62
+
63
+ function writeWizardState(state) {
64
+ ensureDir(path.dirname(CONFIG_JSON));
65
+ fs.writeFileSync(CONFIG_JSON, JSON.stringify(state, null, 2), 'utf8');
66
+ }
67
+
68
+ function hasCompletedOnboarding() {
69
+ const state = readWizardState();
70
+ return state.onboardingCompleted === true;
71
+ }
72
+
73
+ function markOnboardingCompleted() {
74
+ const state = readWizardState();
75
+ state.onboardingCompleted = true;
76
+ writeWizardState(state);
77
+ }
78
+
52
79
  function dim(text) { return `\x1b[2m${text}\x1b[0m`; }
53
80
  function bold(text) { return `\x1b[1m${text}\x1b[0m`; }
54
81
  function green(text) { return `\x1b[32m${text}\x1b[0m`; }
@@ -282,8 +309,13 @@ async function deviceAuthFlow() {
282
309
  console.log(` │ │`);
283
310
  console.log(` └───────────────────────────────────────────────┘\n`);
284
311
 
285
- openBrowser(loginUrl);
286
- console.log(` ${dim('Browser opened automatically.')}`);
312
+ if (!hasCompletedOnboarding()) {
313
+ openBrowser(loginUrl);
314
+ markOnboardingCompleted();
315
+ console.log(` ${dim('Browser opened automatically (first-time onboarding).')}`);
316
+ } else {
317
+ console.log(` ${dim('Browser auto-open skipped (onboarding already completed).')}`);
318
+ }
287
319
  console.log(` ${dim('Waiting for you to sign in...')}\n`);
288
320
 
289
321
  // Step 3: Poll until confirmed or expired
package/wizard.cjs CHANGED
@@ -1,3 +1,3 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env node
2
2
 
3
3
  require('./scripts/wizard.cjs');