@vespermcp/mcp-server 1.2.24 → 1.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -101,6 +101,18 @@ export class GithubSource {
101
101
  qualityWarnings.push("Low star count; may be low-signal");
102
102
  if (description.length < 80)
103
103
  qualityWarnings.push("Short description; relevance may be weaker");
104
+ const lowSignalPatterns = [
105
+ /\bawesome\b/i,
106
+ /\bresources?\b/i,
107
+ /\bcurated\b/i,
108
+ /\blist\b/i,
109
+ /\bcollection\b/i,
110
+ ];
111
+ const lowSignalText = `${fullName} ${description}`;
112
+ const looksResourceList = lowSignalPatterns.some((rx) => rx.test(lowSignalText));
113
+ if (looksResourceList) {
114
+ qualityWarnings.push("Repository appears to be a resource/list collection; relevance may be indirect.");
115
+ }
104
116
  const abstractLength = input.readmeText ? input.readmeText.length : description.length;
105
117
  const authorsPresent = !!owner;
106
118
  const datePresent = !!updatedAt;
@@ -111,6 +123,19 @@ export class GithubSource {
111
123
  datePresent,
112
124
  contentDepth,
113
125
  });
126
+ let adjustedQuality01 = quality01;
127
+ // Calibrate GitHub quality so resource-list repos don't dominate.
128
+ // Keep penalty moderate; readme-rich/long-form repos still score well.
129
+ if (looksResourceList) {
130
+ adjustedQuality01 -= 0.14;
131
+ }
132
+ if (!input.readmeText && description.length < 140) {
133
+ adjustedQuality01 -= 0.08;
134
+ }
135
+ if (stars < 50) {
136
+ adjustedQuality01 -= 0.04;
137
+ }
138
+ adjustedQuality01 = Math.max(0.3, Math.min(1.0, adjustedQuality01));
114
139
  return {
115
140
  id: fullName,
116
141
  source: "github",
@@ -133,7 +158,7 @@ export class GithubSource {
133
158
  usage_restrictions: [],
134
159
  warnings: [],
135
160
  },
136
- quality_score: Math.round(quality01 * 100),
161
+ quality_score: Math.round(adjustedQuality01 * 100),
137
162
  quality_warnings: qualityWarnings,
138
163
  download_url: String(repo.html_url || `https://github.com/${fullName}`),
139
164
  format: "GIT",
@@ -43,14 +43,39 @@ function tokenize(content) {
43
43
  .filter((w) => w.length >= 3);
44
44
  return new Set(words);
45
45
  }
46
+ function titleTokens(doc) {
47
+ const mj = doc.metadata_json || {};
48
+ const raw = typeof mj.title === "string" ? mj.title : "";
49
+ return tokenize(raw);
50
+ }
46
51
  function isSuspiciousPair(a, b) {
47
52
  // semantic fallback should be selective; do cheap prefilter first
48
53
  const aLen = a.content.length;
49
54
  const bLen = b.content.length;
50
55
  const maxLen = Math.max(aLen, bLen, 1);
51
56
  const lenRatio = Math.abs(aLen - bLen) / maxLen;
52
- if (lenRatio > 0.45)
57
+ // Loosened again to allow abstract-vs-summary style comparisons.
58
+ if (lenRatio > 0.8)
53
59
  return false;
60
+ // Fast path: same normalized title-like prefix often indicates same research object.
61
+ const aPrefix = a.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
62
+ const bPrefix = b.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
63
+ if (aPrefix && bPrefix && (aPrefix.includes(bPrefix) || bPrefix.includes(aPrefix))) {
64
+ return true;
65
+ }
66
+ // Cross-source papers often have close titles even if abstracts differ.
67
+ const aTitle = titleTokens(a);
68
+ const bTitle = titleTokens(b);
69
+ if (aTitle.size > 0 && bTitle.size > 0) {
70
+ let tInter = 0;
71
+ for (const t of aTitle)
72
+ if (bTitle.has(t))
73
+ tInter++;
74
+ const tUnion = aTitle.size + bTitle.size - tInter;
75
+ const tJaccard = tUnion > 0 ? tInter / tUnion : 0;
76
+ if (tJaccard >= 0.25)
77
+ return true;
78
+ }
54
79
  const aTokens = tokenize(a.content);
55
80
  const bTokens = tokenize(b.content);
56
81
  if (aTokens.size === 0 || bTokens.size === 0)
@@ -61,7 +86,8 @@ function isSuspiciousPair(a, b) {
61
86
  inter++;
62
87
  const union = aTokens.size + bTokens.size - inter;
63
88
  const jaccard = union > 0 ? inter / union : 0;
64
- return jaccard >= 0.18;
89
+ // Loosened from 0.12 -> 0.08 to let semantic stage inspect more borderline matches.
90
+ return jaccard >= 0.08;
65
91
  }
66
92
  function normalizeStars(doc) {
67
93
  const mj = doc.metadata_json || {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.24",
3
+ "version": "1.2.25",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",