npm - @vespermcp/mcp-server - Versions diffs - 1.2.24 → 1.2.26 - Mend

@vespermcp/mcp-server 1.2.24 → 1.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/build/metadata/github-source.js +26 -1
package/build/web/fusion-engine.js +59 -2
package/package.json +1 -1

package/build/metadata/github-source.js CHANGED Viewed

@@ -101,6 +101,18 @@ export class GithubSource {
             qualityWarnings.push("Low star count; may be low-signal");
         if (description.length < 80)
             qualityWarnings.push("Short description; relevance may be weaker");
+        const lowSignalPatterns = [
+            /\bawesome\b/i,
+            /\bresources?\b/i,
+            /\bcurated\b/i,
+            /\blist\b/i,
+            /\bcollection\b/i,
+        ];
+        const lowSignalText = `${fullName} ${description}`;
+        const looksResourceList = lowSignalPatterns.some((rx) => rx.test(lowSignalText));
+        if (looksResourceList) {
+            qualityWarnings.push("Repository appears to be a resource/list collection; relevance may be indirect.");
+        }
         const abstractLength = input.readmeText ? input.readmeText.length : description.length;
         const authorsPresent = !!owner;
         const datePresent = !!updatedAt;
@@ -111,6 +123,19 @@ export class GithubSource {
             datePresent,
             contentDepth,
         });
+        let adjustedQuality01 = quality01;
+        // Calibrate GitHub quality so resource-list repos don't dominate.
+        // Keep penalty moderate; readme-rich/long-form repos still score well.
+        if (looksResourceList) {
+            adjustedQuality01 -= 0.14;
+        }
+        if (!input.readmeText && description.length < 140) {
+            adjustedQuality01 -= 0.08;
+        }
+        if (stars < 50) {
+            adjustedQuality01 -= 0.04;
+        }
+        adjustedQuality01 = Math.max(0.3, Math.min(1.0, adjustedQuality01));
         return {
             id: fullName,
             source: "github",
@@ -133,7 +158,7 @@ export class GithubSource {
                 usage_restrictions: [],
                 warnings: [],
             },
-            quality_score: Math.round(quality01 * 100),
+            quality_score: Math.round(adjustedQuality01 * 100),
             quality_warnings: qualityWarnings,
             download_url: String(repo.html_url || `https://github.com/${fullName}`),
             format: "GIT",

package/build/web/fusion-engine.js CHANGED Viewed

@@ -43,14 +43,70 @@ function tokenize(content) {
         .filter((w) => w.length >= 3);
     return new Set(words);
 }
+function titleTokens(doc) {
+    const mj = doc.metadata_json || {};
+    const raw = typeof mj.title === "string" ? mj.title : "";
+    return tokenize(raw);
+}
+function semanticHintTokens(doc) {
+    const mj = doc.metadata_json || {};
+    const fields = [];
+    if (typeof mj.title === "string")
+        fields.push(mj.title);
+    if (typeof mj.name === "string")
+        fields.push(mj.name);
+    if (typeof mj.description === "string")
+        fields.push(mj.description);
+    if (typeof mj.abstract === "string")
+        fields.push(mj.abstract);
+    if (Array.isArray(mj.tags))
+        fields.push(mj.tags.join(" "));
+    if (Array.isArray(mj.topics))
+        fields.push(mj.topics.join(" "));
+    fields.push(doc.source_url || "");
+    return tokenize(fields.join(" "));
+}
 function isSuspiciousPair(a, b) {
     // semantic fallback should be selective; do cheap prefilter first
+    // Metadata/topic overlap can indicate same object even with very different body lengths.
+    const aHints = semanticHintTokens(a);
+    const bHints = semanticHintTokens(b);
+    if (aHints.size > 0 && bHints.size > 0) {
+        let hInter = 0;
+        for (const t of aHints)
+            if (bHints.has(t))
+                hInter++;
+        const hUnion = aHints.size + bHints.size - hInter;
+        const hJaccard = hUnion > 0 ? hInter / hUnion : 0;
+        if (hJaccard >= 0.2)
+            return true;
+    }
     const aLen = a.content.length;
     const bLen = b.content.length;
     const maxLen = Math.max(aLen, bLen, 1);
     const lenRatio = Math.abs(aLen - bLen) / maxLen;
-    if (lenRatio > 0.45)
+    // Loosened again to allow abstract-vs-summary style comparisons.
+    if (lenRatio > 0.9)
         return false;
+    // Fast path: same normalized title-like prefix often indicates same research object.
+    const aPrefix = a.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
+    const bPrefix = b.content.slice(0, 140).toLowerCase().replace(/[^a-z0-9\s]/g, " ").trim();
+    if (aPrefix && bPrefix && (aPrefix.includes(bPrefix) || bPrefix.includes(aPrefix))) {
+        return true;
+    }
+    // Cross-source papers often have close titles even if abstracts differ.
+    const aTitle = titleTokens(a);
+    const bTitle = titleTokens(b);
+    if (aTitle.size > 0 && bTitle.size > 0) {
+        let tInter = 0;
+        for (const t of aTitle)
+            if (bTitle.has(t))
+                tInter++;
+        const tUnion = aTitle.size + bTitle.size - tInter;
+        const tJaccard = tUnion > 0 ? tInter / tUnion : 0;
+        if (tJaccard >= 0.25)
+            return true;
+    }
     const aTokens = tokenize(a.content);
     const bTokens = tokenize(b.content);
     if (aTokens.size === 0 || bTokens.size === 0)
@@ -61,7 +117,8 @@ function isSuspiciousPair(a, b) {
             inter++;
     const union = aTokens.size + bTokens.size - inter;
     const jaccard = union > 0 ? inter / union : 0;
-    return jaccard >= 0.18;
+    // Loosened from 0.12 -> 0.08 to let semantic stage inspect more borderline matches.
+    return jaccard >= 0.08;
 }
 function normalizeStars(doc) {
     const mj = doc.metadata_json || {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vespermcp/mcp-server",
-  "version": "1.2.24",
+  "version": "1.2.26",
   "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
   "type": "module",
   "main": "build/index.js",