soup-chop 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +224 -48
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -650,6 +650,18 @@ var DOCS_PATH_HINTS = ["/docs", "/documentation", "/guide", "/guides", "/api", "
|
|
|
650
650
|
var DOC_LABEL_PATTERN = /\b(docs?|documentation|guide|guides|api|reference|learn|tutorial|get started)\b/i;
|
|
651
651
|
var EXCLUDED_DOC_HOST_PATTERN = /(^|\.)(github\.com|npmjs\.com|www\.npmjs\.com|unpkg\.com)$/i;
|
|
652
652
|
var EXCLUDED_FILE_EXTENSION_PATTERN = /\.(?:png|jpe?g|gif|svg|webp|ico|pdf|zip|gz|tgz|woff2?|ttf|eot|mp4|webm|mov|mp3|wav)$/i;
|
|
653
|
+
function discoveryMethodRank(discoveryMethod) {
|
|
654
|
+
switch (discoveryMethod) {
|
|
655
|
+
case "explicit":
|
|
656
|
+
return 0;
|
|
657
|
+
case "readme_link":
|
|
658
|
+
return 1;
|
|
659
|
+
case "homepage":
|
|
660
|
+
return 2;
|
|
661
|
+
case "derived_pages":
|
|
662
|
+
return 3;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
653
665
|
var turndown = new TurndownService({
|
|
654
666
|
bulletListMarker: "-",
|
|
655
667
|
codeBlockStyle: "fenced",
|
|
@@ -706,24 +718,40 @@ function extractMarkdownLinks(markdown) {
|
|
|
706
718
|
}
|
|
707
719
|
return links;
|
|
708
720
|
}
|
|
709
|
-
function
|
|
710
|
-
|
|
721
|
+
function createDocsCandidate(url, discoveryMethod, score) {
|
|
722
|
+
return {
|
|
723
|
+
url,
|
|
724
|
+
discoveryMethod,
|
|
725
|
+
score
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
function dedupeDocsCandidates(candidates) {
|
|
729
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
730
|
+
for (const candidate of candidates) {
|
|
731
|
+
const existing = deduped.get(candidate.url);
|
|
732
|
+
if (existing === void 0 || discoveryMethodRank(candidate.discoveryMethod) < discoveryMethodRank(existing.discoveryMethod) || discoveryMethodRank(candidate.discoveryMethod) === discoveryMethodRank(existing.discoveryMethod) && candidate.score > existing.score) {
|
|
733
|
+
deduped.set(candidate.url, candidate);
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
return [...deduped.values()].sort((left, right) => {
|
|
737
|
+
if (right.score !== left.score) {
|
|
738
|
+
return right.score - left.score;
|
|
739
|
+
}
|
|
740
|
+
if (left.discoveryMethod !== right.discoveryMethod) {
|
|
741
|
+
return discoveryMethodRank(left.discoveryMethod) - discoveryMethodRank(right.discoveryMethod);
|
|
742
|
+
}
|
|
743
|
+
return left.url.localeCompare(right.url);
|
|
744
|
+
});
|
|
745
|
+
}
|
|
746
|
+
function detectDocsUrls(readmeContent) {
|
|
747
|
+
return dedupeDocsCandidates(extractMarkdownLinks(readmeContent).filter((candidate) => {
|
|
711
748
|
try {
|
|
712
749
|
const url = new URL(candidate.url);
|
|
713
750
|
return url.protocol === "http:" || url.protocol === "https:";
|
|
714
751
|
} catch {
|
|
715
752
|
return false;
|
|
716
753
|
}
|
|
717
|
-
}).map((candidate) => ({ candidate, score: scoreDocsCandidate(candidate) })).filter((entry) => entry.score > 0).
|
|
718
|
-
if (right.score !== left.score) {
|
|
719
|
-
return right.score - left.score;
|
|
720
|
-
}
|
|
721
|
-
return left.candidate.url.localeCompare(right.candidate.url);
|
|
722
|
-
})[0];
|
|
723
|
-
if (best === void 0) {
|
|
724
|
-
return null;
|
|
725
|
-
}
|
|
726
|
-
return normalizeUrl(new URL(best.candidate.url));
|
|
754
|
+
}).map((candidate) => ({ candidate, score: scoreDocsCandidate(candidate) })).filter((entry) => entry.score > 0).map((entry) => createDocsCandidate(normalizeUrl(new URL(entry.candidate.url)), "readme_link", entry.score)));
|
|
727
755
|
}
|
|
728
756
|
function readRepositoryUrl(repository) {
|
|
729
757
|
if (typeof repository === "string" && repository.trim().length > 0) {
|
|
@@ -780,6 +808,55 @@ function detectPagesUrl(packageJson) {
|
|
|
780
808
|
}
|
|
781
809
|
return null;
|
|
782
810
|
}
|
|
811
|
+
function scoreHomepageCandidate(homepage) {
|
|
812
|
+
const host = homepage.hostname.toLowerCase();
|
|
813
|
+
const path = normalizePathname(homepage.pathname).toLowerCase();
|
|
814
|
+
let score = homepage.protocol === "https:" ? 1 : 0;
|
|
815
|
+
if (DOC_LABEL_PATTERN.test(path)) {
|
|
816
|
+
score += 6;
|
|
817
|
+
}
|
|
818
|
+
if (DOCS_PATH_HINTS.some((hint) => path === hint || path.startsWith(`${hint}/`))) {
|
|
819
|
+
score += 3;
|
|
820
|
+
}
|
|
821
|
+
if (EXCLUDED_DOC_HOST_PATTERN.test(host)) {
|
|
822
|
+
score -= 10;
|
|
823
|
+
}
|
|
824
|
+
return score;
|
|
825
|
+
}
|
|
826
|
+
function collectDocsCandidates(packageJson, readmeContent, explicitDocsUrl) {
|
|
827
|
+
const candidates = [];
|
|
828
|
+
if (explicitDocsUrl !== void 0) {
|
|
829
|
+
try {
|
|
830
|
+
candidates.push(createDocsCandidate(normalizeUrl(new URL(explicitDocsUrl)), "explicit", 100));
|
|
831
|
+
} catch {
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
candidates.push(...detectDocsUrls(readmeContent));
|
|
835
|
+
if (typeof packageJson.homepage === "string" && packageJson.homepage.trim().length > 0) {
|
|
836
|
+
try {
|
|
837
|
+
const homepage = new URL(packageJson.homepage.trim());
|
|
838
|
+
if (!/\.github\.io$/i.test(homepage.hostname) && !/\.gitlab\.io$/i.test(homepage.hostname)) {
|
|
839
|
+
const score = scoreHomepageCandidate(homepage);
|
|
840
|
+
if (score > 0) {
|
|
841
|
+
candidates.push(createDocsCandidate(normalizeUrl(homepage), "homepage", score));
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
} catch {
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
const pagesUrl = detectPagesUrl(packageJson);
|
|
848
|
+
if (pagesUrl !== null) {
|
|
849
|
+
candidates.push(createDocsCandidate(pagesUrl, "derived_pages", 1));
|
|
850
|
+
}
|
|
851
|
+
return dedupeDocsCandidates(candidates);
|
|
852
|
+
}
|
|
853
|
+
function isLikelySpaShell(html) {
|
|
854
|
+
const markdown = convertHtmlToMarkdown(html);
|
|
855
|
+
if (markdown.replace(/\s+/g, " ").trim().length >= 200) {
|
|
856
|
+
return false;
|
|
857
|
+
}
|
|
858
|
+
return /<div\b[^>]+id=["'](?:app|root|__next)["']/i.test(html);
|
|
859
|
+
}
|
|
783
860
|
function extractPrimaryHtml(html) {
|
|
784
861
|
const mainMatch = html.match(/<main\b[^>]*>([\s\S]*?)<\/main>/i);
|
|
785
862
|
if (mainMatch?.[1]) {
|
|
@@ -877,9 +954,10 @@ async function fetchHtmlPage(fetchImpl, url) {
|
|
|
877
954
|
if (!/text\/html|application\/xhtml\+xml/i.test(contentType)) {
|
|
878
955
|
return null;
|
|
879
956
|
}
|
|
957
|
+
const responseUrl = response.url.length > 0 ? response.url : url;
|
|
880
958
|
return {
|
|
881
959
|
html: await response.text(),
|
|
882
|
-
pageUrl: new URL(
|
|
960
|
+
pageUrl: new URL(responseUrl)
|
|
883
961
|
};
|
|
884
962
|
}
|
|
885
963
|
function createWebsiteSource(html, pageUrl) {
|
|
@@ -954,6 +1032,46 @@ async function discoverStructuredRouteUrls(fetchImpl, initialUrl, initialHtml) {
|
|
|
954
1032
|
}
|
|
955
1033
|
return [];
|
|
956
1034
|
}
|
|
1035
|
+
async function probeDocsCandidate(candidate, options = {}) {
|
|
1036
|
+
const fetchImpl = options.fetchImpl ?? fetch;
|
|
1037
|
+
const page = await fetchHtmlPage(fetchImpl, candidate.url);
|
|
1038
|
+
if (page === null) {
|
|
1039
|
+
return null;
|
|
1040
|
+
}
|
|
1041
|
+
const structuredRouteUrls = await discoverStructuredRouteUrls(fetchImpl, page.pageUrl, page.html);
|
|
1042
|
+
if (isLikelySpaShell(page.html) && structuredRouteUrls.length === 0) {
|
|
1043
|
+
return null;
|
|
1044
|
+
}
|
|
1045
|
+
const links = extractHtmlLinks(page.html, page.pageUrl);
|
|
1046
|
+
const docsPrefix = deriveDocsPrefix(page.pageUrl, links);
|
|
1047
|
+
const docsScopedLinks = links.filter((link) => matchesDocsScope(link, page.pageUrl, docsPrefix));
|
|
1048
|
+
const markdown = convertHtmlToMarkdown(page.html).replace(/\s+/g, " ").trim();
|
|
1049
|
+
let score = candidate.score;
|
|
1050
|
+
if (structuredRouteUrls.length > 0) {
|
|
1051
|
+
score += 6;
|
|
1052
|
+
}
|
|
1053
|
+
if (docsScopedLinks.length > 0) {
|
|
1054
|
+
score += 4;
|
|
1055
|
+
}
|
|
1056
|
+
if (markdown.length >= 200) {
|
|
1057
|
+
score += 2;
|
|
1058
|
+
}
|
|
1059
|
+
return {
|
|
1060
|
+
...candidate,
|
|
1061
|
+
url: normalizeUrl(page.pageUrl),
|
|
1062
|
+
score
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
1065
|
+
async function probeDocsCandidates(candidates, options = {}) {
|
|
1066
|
+
const probed = await Promise.all(candidates.map(async (candidate) => {
|
|
1067
|
+
try {
|
|
1068
|
+
return await probeDocsCandidate(candidate, options);
|
|
1069
|
+
} catch {
|
|
1070
|
+
return null;
|
|
1071
|
+
}
|
|
1072
|
+
}));
|
|
1073
|
+
return dedupeDocsCandidates(probed.filter((candidate) => candidate !== null));
|
|
1074
|
+
}
|
|
957
1075
|
async function crawlDocsSite(entryUrl, options = {}) {
|
|
958
1076
|
const fetchImpl = options.fetchImpl ?? fetch;
|
|
959
1077
|
const maxPages = options.maxPages ?? DEFAULT_MAX_WEBSITE_PAGES;
|
|
@@ -1008,7 +1126,6 @@ async function crawlDocsSite(entryUrl, options = {}) {
|
|
|
1008
1126
|
continue;
|
|
1009
1127
|
}
|
|
1010
1128
|
visited.add(current);
|
|
1011
|
-
const currentUrl = new URL(current);
|
|
1012
1129
|
const page = await fetchHtmlPage(fetchImpl, current);
|
|
1013
1130
|
if (page === null) {
|
|
1014
1131
|
if (sources.length === 0) {
|
|
@@ -1017,7 +1134,7 @@ async function crawlDocsSite(entryUrl, options = {}) {
|
|
|
1017
1134
|
continue;
|
|
1018
1135
|
}
|
|
1019
1136
|
const html = page.html;
|
|
1020
|
-
const links = extractHtmlLinks(html,
|
|
1137
|
+
const links = extractHtmlLinks(html, page.pageUrl);
|
|
1021
1138
|
docsPrefix ??= deriveDocsPrefix(initialUrl, links);
|
|
1022
1139
|
for (const link of links) {
|
|
1023
1140
|
const normalized = normalizeUrl(link);
|
|
@@ -1031,10 +1148,10 @@ async function crawlDocsSite(entryUrl, options = {}) {
|
|
|
1031
1148
|
queue.push(normalized);
|
|
1032
1149
|
}
|
|
1033
1150
|
}
|
|
1034
|
-
if (!matchesDocsScope(
|
|
1151
|
+
if (!matchesDocsScope(page.pageUrl, initialUrl, docsPrefix)) {
|
|
1035
1152
|
continue;
|
|
1036
1153
|
}
|
|
1037
|
-
const source = createWebsiteSource(html,
|
|
1154
|
+
const source = createWebsiteSource(html, page.pageUrl);
|
|
1038
1155
|
if (source === null) {
|
|
1039
1156
|
continue;
|
|
1040
1157
|
}
|
|
@@ -1555,18 +1672,16 @@ function extractRepositoryUrl(repository) {
|
|
|
1555
1672
|
|
|
1556
1673
|
// src/lib/discoverSources.ts
|
|
1557
1674
|
var UNPKG_BASE2 = "https://unpkg.com";
|
|
1558
|
-
var SOURCE_MANIFEST_VERSION =
|
|
1675
|
+
var SOURCE_MANIFEST_VERSION = 4;
|
|
1559
1676
|
var TOP_LEVEL_DOC_ALLOWLIST = /* @__PURE__ */ new Set(["API.MD", "FAQ.MD", "MIGRATING.MD", "UPGRADING.MD", "CHANGELOG.MD", "CONTRIBUTING.MD"]);
|
|
1560
1677
|
var LOCAL_TS_DIR_EXCLUDES = /* @__PURE__ */ new Set([".git", ".windsurf", "coverage", "dist", "dist-test", "node_modules", "sandbox"]);
|
|
1678
|
+
var MAX_WEBSITE_CANDIDATES = 2;
|
|
1561
1679
|
function normalizeOrigin(origin) {
|
|
1562
1680
|
return origin.replace(/^\//, "");
|
|
1563
1681
|
}
|
|
1564
1682
|
function canonicalTopLevelDocName(path) {
|
|
1565
1683
|
return path.replace(/\.md$/i, ".md").toUpperCase();
|
|
1566
1684
|
}
|
|
1567
|
-
function isSearchSource(source) {
|
|
1568
|
-
return source !== null;
|
|
1569
|
-
}
|
|
1570
1685
|
function cacheFileForOrigin(origin) {
|
|
1571
1686
|
const normalized = normalizeOrigin(origin);
|
|
1572
1687
|
if (normalized === "README.md") {
|
|
@@ -1577,39 +1692,84 @@ function cacheFileForOrigin(origin) {
|
|
|
1577
1692
|
function createSourceId(origin) {
|
|
1578
1693
|
return normalizeOrigin(origin).replace(/[^a-zA-Z0-9]+/g, "__").replace(/^__+|__+$/g, "").toLowerCase();
|
|
1579
1694
|
}
|
|
1580
|
-
function createSource(sourceKind, origin, content) {
|
|
1695
|
+
function createSource(sourceKind, origin, content, discoveryMethod) {
|
|
1581
1696
|
const normalized = normalizeOrigin(origin);
|
|
1582
1697
|
return {
|
|
1583
1698
|
sourceId: createSourceId(normalized),
|
|
1584
1699
|
sourceKind,
|
|
1585
1700
|
origin: normalized,
|
|
1586
1701
|
title: posix2.basename(normalized),
|
|
1587
|
-
content
|
|
1702
|
+
content,
|
|
1703
|
+
discoveryMethod
|
|
1588
1704
|
};
|
|
1589
1705
|
}
|
|
1590
1706
|
function mapSourceKind(source, sourceKind, originPrefix) {
|
|
1591
1707
|
const origin = originPrefix === void 0 ? source.origin : `${originPrefix}/${source.origin}`;
|
|
1592
|
-
return createSourceWithTitle(sourceKind, origin, source.title, source.content);
|
|
1708
|
+
return createSourceWithTitle(sourceKind, origin, source.title, source.content, source.discoveryMethod);
|
|
1593
1709
|
}
|
|
1594
|
-
function createSourceWithTitle(sourceKind, origin, title, content) {
|
|
1710
|
+
function createSourceWithTitle(sourceKind, origin, title, content, discoveryMethod) {
|
|
1595
1711
|
const normalized = normalizeOrigin(origin);
|
|
1596
1712
|
return {
|
|
1597
1713
|
sourceId: createSourceId(normalized),
|
|
1598
1714
|
sourceKind,
|
|
1599
1715
|
origin: normalized,
|
|
1600
1716
|
title,
|
|
1601
|
-
content
|
|
1717
|
+
content,
|
|
1718
|
+
discoveryMethod
|
|
1602
1719
|
};
|
|
1603
1720
|
}
|
|
1604
1721
|
function countLines(content) {
|
|
1605
1722
|
return content.length === 0 ? 0 : content.split("\n").length;
|
|
1606
1723
|
}
|
|
1607
1724
|
async function discoverWebsiteSourcesFromTarget(target, readme, canCache, options = {}) {
|
|
1608
|
-
const
|
|
1609
|
-
|
|
1725
|
+
const packageJson = await readPackageJsonFromTarget(target, canCache);
|
|
1726
|
+
const candidates = await probeDocsCandidates(collectDocsCandidates(packageJson, readme, options.docsUrl));
|
|
1727
|
+
if (candidates.length === 0) {
|
|
1610
1728
|
return [];
|
|
1611
1729
|
}
|
|
1612
|
-
return
|
|
1730
|
+
return discoverWebsiteSourcesFromCandidates(candidates);
|
|
1731
|
+
}
|
|
1732
|
+
async function discoverWebsiteSourcesFromCandidates(candidates) {
|
|
1733
|
+
const crawled = await Promise.all(candidates.slice(0, MAX_WEBSITE_CANDIDATES).map(async (candidate) => {
|
|
1734
|
+
try {
|
|
1735
|
+
const sources = await crawlDocsSite(candidate.url);
|
|
1736
|
+
return sources.map((source) => ({ ...source, discoveryMethod: candidate.discoveryMethod }));
|
|
1737
|
+
} catch {
|
|
1738
|
+
return [];
|
|
1739
|
+
}
|
|
1740
|
+
}));
|
|
1741
|
+
return dedupeWebsiteSources(crawled.flat());
|
|
1742
|
+
}
|
|
1743
|
+
function dedupeWebsiteSources(sources) {
|
|
1744
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
1745
|
+
for (const source of sources) {
|
|
1746
|
+
const key = source.content.replace(/\s+/g, " ").trim();
|
|
1747
|
+
if (key.length === 0) {
|
|
1748
|
+
continue;
|
|
1749
|
+
}
|
|
1750
|
+
const existing = deduped.get(key);
|
|
1751
|
+
if (existing === void 0 || compareDiscoveryMethod(existing.discoveryMethod, source.discoveryMethod) > 0) {
|
|
1752
|
+
deduped.set(key, source);
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
return [...deduped.values()].sort((left, right) => left.origin.localeCompare(right.origin));
|
|
1756
|
+
}
|
|
1757
|
+
function compareDiscoveryMethod(left, right) {
|
|
1758
|
+
const rank = (value) => {
|
|
1759
|
+
switch (value) {
|
|
1760
|
+
case "explicit":
|
|
1761
|
+
return 0;
|
|
1762
|
+
case "readme_link":
|
|
1763
|
+
return 1;
|
|
1764
|
+
case "homepage":
|
|
1765
|
+
return 2;
|
|
1766
|
+
case "derived_pages":
|
|
1767
|
+
return 3;
|
|
1768
|
+
default:
|
|
1769
|
+
return 4;
|
|
1770
|
+
}
|
|
1771
|
+
};
|
|
1772
|
+
return rank(left) - rank(right);
|
|
1613
1773
|
}
|
|
1614
1774
|
async function discoverWikiSourcesFromTarget(target, canCache) {
|
|
1615
1775
|
const packageJson = await readPackageJsonFromTarget(target, canCache);
|
|
@@ -1696,17 +1856,6 @@ async function readPackageJsonFromTarget(target, canCache) {
|
|
|
1696
1856
|
const parsed = JSON.parse(raw);
|
|
1697
1857
|
return isPackageJsonLike(parsed) ? parsed : {};
|
|
1698
1858
|
}
|
|
1699
|
-
async function detectResolvedDocsUrl(target, readme, canCache, explicitDocsUrl) {
|
|
1700
|
-
if (explicitDocsUrl !== void 0) {
|
|
1701
|
-
return explicitDocsUrl;
|
|
1702
|
-
}
|
|
1703
|
-
const packageJson = await readPackageJsonFromTarget(target, canCache);
|
|
1704
|
-
const pagesUrl = detectPagesUrl(packageJson);
|
|
1705
|
-
if (pagesUrl !== null) {
|
|
1706
|
-
return pagesUrl;
|
|
1707
|
-
}
|
|
1708
|
-
return detectDocsUrl(readme);
|
|
1709
|
-
}
|
|
1710
1859
|
async function discoverJsDocSourcesFromTarget(target, packageJson, canCache) {
|
|
1711
1860
|
const fetcher = await discoverSourceCodeFetcher(target, packageJson, { canCache });
|
|
1712
1861
|
if (fetcher === null) {
|
|
@@ -1740,7 +1889,8 @@ function buildSourceCatalogEntries(sources) {
|
|
|
1740
1889
|
sourceKind: source.sourceKind,
|
|
1741
1890
|
origin: source.origin,
|
|
1742
1891
|
title: source.title,
|
|
1743
|
-
lineCount: countLines(source.content)
|
|
1892
|
+
lineCount: countLines(source.content),
|
|
1893
|
+
discoveryMethod: source.discoveryMethod
|
|
1744
1894
|
}));
|
|
1745
1895
|
}
|
|
1746
1896
|
async function collectWorkspacePackages(workspaceRoot) {
|
|
@@ -1847,11 +1997,19 @@ async function readManifestSources(pkg, version) {
|
|
|
1847
1997
|
sourceKind: entry.sourceKind,
|
|
1848
1998
|
origin: entry.origin,
|
|
1849
1999
|
title: entry.title,
|
|
1850
|
-
content
|
|
2000
|
+
content,
|
|
2001
|
+
discoveryMethod: entry.discoveryMethod
|
|
1851
2002
|
};
|
|
1852
2003
|
})
|
|
1853
2004
|
);
|
|
1854
|
-
|
|
2005
|
+
const resolvedSources = [];
|
|
2006
|
+
for (const source of sources) {
|
|
2007
|
+
if (source === null) {
|
|
2008
|
+
return null;
|
|
2009
|
+
}
|
|
2010
|
+
resolvedSources.push(source);
|
|
2011
|
+
}
|
|
2012
|
+
return resolvedSources;
|
|
1855
2013
|
}
|
|
1856
2014
|
async function writeManifestSources(pkg, version, sources) {
|
|
1857
2015
|
await Promise.all(
|
|
@@ -1864,7 +2022,8 @@ async function writeManifestSources(pkg, version, sources) {
|
|
|
1864
2022
|
sourceKind: source.sourceKind,
|
|
1865
2023
|
origin: source.origin,
|
|
1866
2024
|
title: source.title,
|
|
1867
|
-
cacheFile: cacheFileForOrigin(source.origin)
|
|
2025
|
+
cacheFile: cacheFileForOrigin(source.origin),
|
|
2026
|
+
discoveryMethod: source.discoveryMethod
|
|
1868
2027
|
}))
|
|
1869
2028
|
};
|
|
1870
2029
|
await writeCache(pkg, version, "sources-manifest.json", JSON.stringify(manifest, null, 2));
|
|
@@ -2465,7 +2624,8 @@ function extractMarkdownChunks(source) {
|
|
|
2465
2624
|
path: source.origin,
|
|
2466
2625
|
startLine: 1,
|
|
2467
2626
|
endLine: totalLines,
|
|
2468
|
-
content
|
|
2627
|
+
content,
|
|
2628
|
+
discoveryMethod: source.discoveryMethod
|
|
2469
2629
|
}];
|
|
2470
2630
|
}
|
|
2471
2631
|
return entries.map((entry, index) => ({
|
|
@@ -2478,7 +2638,8 @@ function extractMarkdownChunks(source) {
|
|
|
2478
2638
|
path: entry.path,
|
|
2479
2639
|
startLine: entry.startLine,
|
|
2480
2640
|
endLine: entry.endLine,
|
|
2481
|
-
content: sliceLines(source.content, entry)
|
|
2641
|
+
content: sliceLines(source.content, entry),
|
|
2642
|
+
discoveryMethod: source.discoveryMethod
|
|
2482
2643
|
}));
|
|
2483
2644
|
}
|
|
2484
2645
|
|
|
@@ -2782,7 +2943,7 @@ function stemPorter(word) {
|
|
|
2782
2943
|
// src/lib/searchDocs.ts
|
|
2783
2944
|
var STOPWORDS = /* @__PURE__ */ new Set(["a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "how", "in", "is", "it", "of", "on", "or", "that", "the", "to", "with"]);
|
|
2784
2945
|
var FIELD_WEIGHTS = { title: 5, path: 3, body: 1 };
|
|
2785
|
-
var SEARCH_INDEX_VERSION =
|
|
2946
|
+
var SEARCH_INDEX_VERSION = 5;
|
|
2786
2947
|
function searchIndexCacheFile() {
|
|
2787
2948
|
return "search-index.json";
|
|
2788
2949
|
}
|
|
@@ -2927,6 +3088,20 @@ function buildExampleMetadata(chunk) {
|
|
|
2927
3088
|
topicId
|
|
2928
3089
|
};
|
|
2929
3090
|
}
|
|
3091
|
+
function discoveryMethodScore(discoveryMethod) {
|
|
3092
|
+
switch (discoveryMethod) {
|
|
3093
|
+
case "explicit":
|
|
3094
|
+
return 0.3;
|
|
3095
|
+
case "readme_link":
|
|
3096
|
+
return 0.2;
|
|
3097
|
+
case "homepage":
|
|
3098
|
+
return 0.1;
|
|
3099
|
+
case "derived_pages":
|
|
3100
|
+
return 0;
|
|
3101
|
+
default:
|
|
3102
|
+
return 0;
|
|
3103
|
+
}
|
|
3104
|
+
}
|
|
2930
3105
|
function rankIndexEntries(index, query) {
|
|
2931
3106
|
const queryTokens = [...new Set(tokenize(query))];
|
|
2932
3107
|
if (queryTokens.length === 0) {
|
|
@@ -2960,6 +3135,7 @@ function rankIndexEntries(index, query) {
|
|
|
2960
3135
|
score += 1.5;
|
|
2961
3136
|
}
|
|
2962
3137
|
}
|
|
3138
|
+
score += discoveryMethodScore(entry.chunk.discoveryMethod);
|
|
2963
3139
|
return {
|
|
2964
3140
|
chunk: entry.chunk,
|
|
2965
3141
|
indexPosition,
|
|
@@ -3866,7 +4042,7 @@ async function buildCompareVersionsResponse(pkg, v_old, v_new) {
|
|
|
3866
4042
|
function createServer() {
|
|
3867
4043
|
const server = new McpServer({
|
|
3868
4044
|
name: "soup-chop",
|
|
3869
|
-
version: "1.0.
|
|
4045
|
+
version: "1.0.5"
|
|
3870
4046
|
});
|
|
3871
4047
|
server.registerResource(
|
|
3872
4048
|
"capabilities",
|