@vespermcp/mcp-server 1.2.24 → 1.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -101,6 +101,18 @@ export class GithubSource {
|
|
|
101
101
|
qualityWarnings.push("Low star count; may be low-signal");
|
|
102
102
|
if (description.length < 80)
|
|
103
103
|
qualityWarnings.push("Short description; relevance may be weaker");
|
|
104
|
+
const lowSignalPatterns = [
|
|
105
|
+
/\bawesome\b/i,
|
|
106
|
+
/\bresources?\b/i,
|
|
107
|
+
/\bcurated\b/i,
|
|
108
|
+
/\blist\b/i,
|
|
109
|
+
/\bcollection\b/i,
|
|
110
|
+
];
|
|
111
|
+
const lowSignalText = `${fullName} ${description}`;
|
|
112
|
+
const looksResourceList = lowSignalPatterns.some((rx) => rx.test(lowSignalText));
|
|
113
|
+
if (looksResourceList) {
|
|
114
|
+
qualityWarnings.push("Repository appears to be a resource/list collection; relevance may be indirect.");
|
|
115
|
+
}
|
|
104
116
|
const abstractLength = input.readmeText ? input.readmeText.length : description.length;
|
|
105
117
|
const authorsPresent = !!owner;
|
|
106
118
|
const datePresent = !!updatedAt;
|
|
@@ -111,6 +123,19 @@ export class GithubSource {
|
|
|
111
123
|
datePresent,
|
|
112
124
|
contentDepth,
|
|
113
125
|
});
|
|
126
|
+
let adjustedQuality01 = quality01;
|
|
127
|
+
// Calibrate GitHub quality so resource-list repos don't dominate.
|
|
128
|
+
// Keep penalty moderate; readme-rich/long-form repos still score well.
|
|
129
|
+
if (looksResourceList) {
|
|
130
|
+
adjustedQuality01 -= 0.14;
|
|
131
|
+
}
|
|
132
|
+
if (!input.readmeText && description.length < 140) {
|
|
133
|
+
adjustedQuality01 -= 0.08;
|
|
134
|
+
}
|
|
135
|
+
if (stars < 50) {
|
|
136
|
+
adjustedQuality01 -= 0.04;
|
|
137
|
+
}
|
|
138
|
+
adjustedQuality01 = Math.max(0.3, Math.min(1.0, adjustedQuality01));
|
|
114
139
|
return {
|
|
115
140
|
id: fullName,
|
|
116
141
|
source: "github",
|
|
@@ -133,7 +158,7 @@ export class GithubSource {
|
|
|
133
158
|
usage_restrictions: [],
|
|
134
159
|
warnings: [],
|
|
135
160
|
},
|
|
136
|
-
quality_score: Math.round(
|
|
161
|
+
quality_score: Math.round(adjustedQuality01 * 100),
|
|
137
162
|
quality_warnings: qualityWarnings,
|
|
138
163
|
download_url: String(repo.html_url || `https://github.com/${fullName}`),
|
|
139
164
|
format: "GIT",
|
|
@@ -43,14 +43,39 @@ function tokenize(content) {
|
|
|
43
43
|
.filter((w) => w.length >= 3);
|
|
44
44
|
return new Set(words);
|
|
45
45
|
}
|
|
46
|
+
function titleTokens(doc) {
|
|
47
|
+
const mj = doc.metadata_json || {};
|
|
48
|
+
const raw = typeof mj.title === "string" ? mj.title : "";
|
|
49
|
+
return tokenize(raw);
|
|
50
|
+
}
|
|
46
51
|
function isSuspiciousPair(a, b) {
|
|
47
52
|
// semantic fallback should be selective; do cheap prefilter first
|
|
48
53
|
const aLen = a.content.length;
|
|
49
54
|
const bLen = b.content.length;
|
|
50
55
|
const maxLen = Math.max(aLen, bLen, 1);
|
|
51
56
|
const lenRatio = Math.abs(aLen - bLen) / maxLen;
|
|
52
|
-
|
|
57
|
+
// Loosened again to allow abstract-vs-summary style comparisons.
|
|
58
|
+
if (lenRatio > 0.8)
|
|
53
59
|
return false;
|
|
60
|
+
// Fast path: same normalized title-like prefix often indicates same research object.
|
|
61
|
+
const aPrefix = a.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
|
|
62
|
+
const bPrefix = b.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
|
|
63
|
+
if (aPrefix && bPrefix && (aPrefix.includes(bPrefix) || bPrefix.includes(aPrefix))) {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
// Cross-source papers often have close titles even if abstracts differ.
|
|
67
|
+
const aTitle = titleTokens(a);
|
|
68
|
+
const bTitle = titleTokens(b);
|
|
69
|
+
if (aTitle.size > 0 && bTitle.size > 0) {
|
|
70
|
+
let tInter = 0;
|
|
71
|
+
for (const t of aTitle)
|
|
72
|
+
if (bTitle.has(t))
|
|
73
|
+
tInter++;
|
|
74
|
+
const tUnion = aTitle.size + bTitle.size - tInter;
|
|
75
|
+
const tJaccard = tUnion > 0 ? tInter / tUnion : 0;
|
|
76
|
+
if (tJaccard >= 0.25)
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
54
79
|
const aTokens = tokenize(a.content);
|
|
55
80
|
const bTokens = tokenize(b.content);
|
|
56
81
|
if (aTokens.size === 0 || bTokens.size === 0)
|
|
@@ -61,7 +86,8 @@ function isSuspiciousPair(a, b) {
|
|
|
61
86
|
inter++;
|
|
62
87
|
const union = aTokens.size + bTokens.size - inter;
|
|
63
88
|
const jaccard = union > 0 ? inter / union : 0;
|
|
64
|
-
|
|
89
|
+
// Loosened from 0.12 -> 0.08 to let semantic stage inspect more borderline matches.
|
|
90
|
+
return jaccard >= 0.08;
|
|
65
91
|
}
|
|
66
92
|
function normalizeStars(doc) {
|
|
67
93
|
const mj = doc.metadata_json || {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vespermcp/mcp-server",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.25",
|
|
4
4
|
"description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "build/index.js",
|