@arabold/docs-mcp-server 1.25.3 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/android-icon-144x144.png +0 -0
- package/dist/android-icon-192x192.png +0 -0
- package/dist/android-icon-36x36.png +0 -0
- package/dist/android-icon-48x48.png +0 -0
- package/dist/android-icon-72x72.png +0 -0
- package/dist/android-icon-96x96.png +0 -0
- package/dist/apple-icon-114x114.png +0 -0
- package/dist/apple-icon-120x120.png +0 -0
- package/dist/apple-icon-144x144.png +0 -0
- package/dist/apple-icon-152x152.png +0 -0
- package/dist/apple-icon-180x180.png +0 -0
- package/dist/apple-icon-57x57.png +0 -0
- package/dist/apple-icon-60x60.png +0 -0
- package/dist/apple-icon-72x72.png +0 -0
- package/dist/apple-icon-76x76.png +0 -0
- package/dist/apple-icon-precomposed.png +0 -0
- package/dist/apple-icon.png +0 -0
- package/dist/assets/android-icon-144x144.png +0 -0
- package/dist/assets/android-icon-192x192.png +0 -0
- package/dist/assets/android-icon-36x36.png +0 -0
- package/dist/assets/android-icon-48x48.png +0 -0
- package/dist/assets/android-icon-72x72.png +0 -0
- package/dist/assets/android-icon-96x96.png +0 -0
- package/dist/assets/apple-icon-114x114.png +0 -0
- package/dist/assets/apple-icon-120x120.png +0 -0
- package/dist/assets/apple-icon-144x144.png +0 -0
- package/dist/assets/apple-icon-152x152.png +0 -0
- package/dist/assets/apple-icon-180x180.png +0 -0
- package/dist/assets/apple-icon-57x57.png +0 -0
- package/dist/assets/apple-icon-60x60.png +0 -0
- package/dist/assets/apple-icon-72x72.png +0 -0
- package/dist/assets/apple-icon-76x76.png +0 -0
- package/dist/assets/apple-icon-precomposed.png +0 -0
- package/dist/assets/apple-icon.png +0 -0
- package/dist/assets/favicon-16x16.png +0 -0
- package/dist/assets/favicon-32x32.png +0 -0
- package/dist/assets/favicon-96x96.png +0 -0
- package/dist/assets/favicon.ico +0 -0
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +167 -81
- package/dist/assets/main.js.map +1 -1
- package/dist/assets/manifest.json +47 -0
- package/dist/assets/ms-icon-144x144.png +0 -0
- package/dist/assets/ms-icon-150x150.png +0 -0
- package/dist/assets/ms-icon-310x310.png +0 -0
- package/dist/assets/ms-icon-70x70.png +0 -0
- package/dist/favicon-16x16.png +0 -0
- package/dist/favicon-32x32.png +0 -0
- package/dist/favicon-96x96.png +0 -0
- package/dist/favicon.ico +0 -0
- package/dist/index.js +854 -148
- package/dist/index.js.map +1 -1
- package/dist/manifest.json +47 -0
- package/dist/ms-icon-144x144.png +0 -0
- package/dist/ms-icon-150x150.png +0 -0
- package/dist/ms-icon-310x310.png +0 -0
- package/dist/ms-icon-70x70.png +0 -0
- package/package.json +3 -7
- package/public/android-icon-144x144.png +0 -0
- package/public/android-icon-192x192.png +0 -0
- package/public/android-icon-36x36.png +0 -0
- package/public/android-icon-48x48.png +0 -0
- package/public/android-icon-72x72.png +0 -0
- package/public/android-icon-96x96.png +0 -0
- package/public/apple-icon-114x114.png +0 -0
- package/public/apple-icon-120x120.png +0 -0
- package/public/apple-icon-144x144.png +0 -0
- package/public/apple-icon-152x152.png +0 -0
- package/public/apple-icon-180x180.png +0 -0
- package/public/apple-icon-57x57.png +0 -0
- package/public/apple-icon-60x60.png +0 -0
- package/public/apple-icon-72x72.png +0 -0
- package/public/apple-icon-76x76.png +0 -0
- package/public/apple-icon-precomposed.png +0 -0
- package/public/apple-icon.png +0 -0
- package/public/assets/android-icon-144x144.png +0 -0
- package/public/assets/android-icon-192x192.png +0 -0
- package/public/assets/android-icon-36x36.png +0 -0
- package/public/assets/android-icon-48x48.png +0 -0
- package/public/assets/android-icon-72x72.png +0 -0
- package/public/assets/android-icon-96x96.png +0 -0
- package/public/assets/apple-icon-114x114.png +0 -0
- package/public/assets/apple-icon-120x120.png +0 -0
- package/public/assets/apple-icon-144x144.png +0 -0
- package/public/assets/apple-icon-152x152.png +0 -0
- package/public/assets/apple-icon-180x180.png +0 -0
- package/public/assets/apple-icon-57x57.png +0 -0
- package/public/assets/apple-icon-60x60.png +0 -0
- package/public/assets/apple-icon-72x72.png +0 -0
- package/public/assets/apple-icon-76x76.png +0 -0
- package/public/assets/apple-icon-precomposed.png +0 -0
- package/public/assets/apple-icon.png +0 -0
- package/public/assets/favicon-16x16.png +0 -0
- package/public/assets/favicon-32x32.png +0 -0
- package/public/assets/favicon-96x96.png +0 -0
- package/public/assets/favicon.ico +0 -0
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +167 -81
- package/public/assets/main.js.map +1 -1
- package/public/assets/manifest.json +47 -0
- package/public/assets/ms-icon-144x144.png +0 -0
- package/public/assets/ms-icon-150x150.png +0 -0
- package/public/assets/ms-icon-310x310.png +0 -0
- package/public/assets/ms-icon-70x70.png +0 -0
- package/public/favicon-16x16.png +0 -0
- package/public/favicon-32x32.png +0 -0
- package/public/favicon-96x96.png +0 -0
- package/public/favicon.ico +0 -0
- package/public/manifest.json +47 -0
- package/public/ms-icon-144x144.png +0 -0
- package/public/ms-icon-150x150.png +0 -0
- package/public/ms-icon-310x310.png +0 -0
- package/public/ms-icon-70x70.png +0 -0
package/dist/index.js
CHANGED
|
@@ -24,9 +24,9 @@ import { v4 } from "uuid";
|
|
|
24
24
|
import { VirtualConsole, JSDOM } from "jsdom";
|
|
25
25
|
import mime from "mime";
|
|
26
26
|
import psl from "psl";
|
|
27
|
+
import { HeaderGenerator } from "header-generator";
|
|
27
28
|
import fs$1 from "node:fs/promises";
|
|
28
29
|
import axios from "axios";
|
|
29
|
-
import { HeaderGenerator } from "header-generator";
|
|
30
30
|
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
|
|
31
31
|
import remarkGfm from "remark-gfm";
|
|
32
32
|
import remarkHtml from "remark-html";
|
|
@@ -752,14 +752,14 @@ function extractProtocol(urlOrPath) {
|
|
|
752
752
|
}
|
|
753
753
|
}
|
|
754
754
|
const name = "@arabold/docs-mcp-server";
|
|
755
|
-
const version = "1.25.
|
|
755
|
+
const version = "1.25.3";
|
|
756
756
|
const description = "MCP server for fetching and searching documentation";
|
|
757
757
|
const type = "module";
|
|
758
758
|
const bin = { "docs-mcp-server": "dist/index.js" };
|
|
759
759
|
const license = "MIT";
|
|
760
760
|
const repository = { "type": "git", "url": "git+https://github.com/arabold/docs-mcp-server.git" };
|
|
761
761
|
const files = ["dist", "public", "db", "README.md", "LICENSE", "package.json"];
|
|
762
|
-
const scripts = { "prepare": "husky || true", "build": "vite build --config vite.config.web.ts && vite build", "start": "node --enable-source-maps dist/index.js", "cli": "node --enable-source-maps dist/index.js", "server": "node --enable-source-maps dist/index.ts", "web": "node --enable-source-maps dist/index.ts web", "dev": "
|
|
762
|
+
const scripts = { "prepare": "husky || true", "build": "vite build --config vite.config.web.ts && vite build", "start": "node --enable-source-maps dist/index.js", "cli": "node --enable-source-maps dist/index.js", "server": "node --enable-source-maps dist/index.ts", "web": "node --enable-source-maps dist/index.ts web", "dev": "npm-run-all --parallel dev:server dev:web", "dev:server": "vite-node --watch src/index.ts", "dev:web": "vite build --config vite.config.web.ts --watch", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "test:e2e": "vitest run --config test/vitest.config.ts", "test:e2e:watch": "vitest --config test/vitest.config.ts", "lint": "biome check .", "lint:fix": "biome check . --fix", "format": "biome format . --write", "postinstall": "echo 'Skipping Playwright browser install. See README.md for details.'" };
|
|
763
763
|
const dependencies = { "@fastify/formbody": "^8.0.2", "@fastify/static": "^8.2.0", "@joplin/turndown-plugin-gfm": "^1.0.62", "@kitajs/html": "^4.2.9", "@kitajs/ts-html-plugin": "^4.1.1", "@langchain/aws": "^0.1.13", "@langchain/google-genai": "^0.2.16", "@langchain/google-vertexai": "^0.2.16", "@langchain/openai": "^0.6.3", "@modelcontextprotocol/sdk": "^1.17.1", "@trpc/client": "^11.4.4", "@trpc/server": "^11.4.4", "alpinejs": "^3.14.9", "axios": "^1.11.0", "axios-retry": "^4.5.0", "better-sqlite3": "^12.2.0", "cheerio": "^1.1.2", "commander": "^14.0.0", "dompurify": "^3.2.6", "dotenv": "^17.2.1", "env-paths": "^3.0.0", "fastify": "^5.4.0", "flowbite": "^3.1.2", "fuse.js": "^7.1.0", "header-generator": "^2.1.69", "htmx.org": "^2.0.6", "iconv-lite": "^0.6.3", "jose": "^6.0.12", "jsdom": "^26.1.0", "langchain": "^0.3.30", "mime": "^4.0.7", "minimatch": "^10.0.1", "playwright": "^1.52.0", "posthog-node": "^5.7.0", "psl": "^1.15.0", "remark": "^15.0.1", "remark-gfm": "^4.0.1", "remark-html": "^16.0.1", "semver": "^7.7.2", "sqlite-vec": "^0.1.7-alpha.2", "tree-sitter": "^0.21.1", "tree-sitter-javascript": "^0.23.1", "tree-sitter-python": "^0.21.0", "tree-sitter-typescript": "^0.23.2", "turndown": "^7.2.0", "zod": "^4.0.14" };
|
|
764
764
|
const devDependencies = { "@biomejs/biome": "^2.1.3", "@commitlint/cli": "^19.8.1", "@commitlint/config-conventional": "^19.8.1", "@semantic-release/changelog": "^6.0.3", "@semantic-release/git": "^10.0.1", "@semantic-release/github": "^11.0.3", "@semantic-release/npm": "^12.0.2", "@tailwindcss/postcss": "^4.1.11", "@tailwindcss/vite": "^4.1.11", "@types/alpinejs": "^3.13.11", "@types/better-sqlite3": "^7.6.13", "@types/jsdom": "~21.1.7", "@types/lint-staged": "~13.3.0", "@types/node": "^24.1.0", "@types/node-fetch": "^2.6.13", "@types/psl": "^1.1.3", "@types/semver": "^7.7.0", "@types/turndown": "^5.0.5", "autoprefixer": "^10.4.21", "flowbite-typography": "^1.0.5", "husky": "^9.1.7", "lint-staged": "^16.1.2", "memfs": "^4.34.0", "npm-run-all": "^4.1.5", "postcss": "^8.5.6", "semantic-release": "^24.2.7", "tailwindcss": "^4.1.4", "typescript": "^5.9.2", "vite": "^6.3.5", "vite-node": "^3.1.2", "vite-plugin-dts": "^4.5.4", "vitest": "^3.2.4" };
|
|
765
765
|
const engines = { "node": ">=20.0.0" };
|
|
@@ -1407,6 +1407,17 @@ class RedirectError extends ScraperError {
|
|
|
1407
1407
|
this.statusCode = statusCode;
|
|
1408
1408
|
}
|
|
1409
1409
|
}
|
|
1410
|
+
class ChallengeError extends ScraperError {
|
|
1411
|
+
constructor(url, statusCode, challengeType) {
|
|
1412
|
+
super(
|
|
1413
|
+
`Challenge detected for ${url} (status: ${statusCode}, type: ${challengeType})`,
|
|
1414
|
+
false
|
|
1415
|
+
);
|
|
1416
|
+
this.url = url;
|
|
1417
|
+
this.statusCode = statusCode;
|
|
1418
|
+
this.challengeType = challengeType;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1410
1421
|
class MimeTypeUtils {
|
|
1411
1422
|
/**
|
|
1412
1423
|
* Parses a Content-Type header string into its MIME type and charset.
|
|
@@ -1683,6 +1694,132 @@ function extractPrimaryDomain(hostname) {
|
|
|
1683
1694
|
const domain = psl.get(hostname.toLowerCase());
|
|
1684
1695
|
return domain || hostname;
|
|
1685
1696
|
}
|
|
1697
|
+
class FingerprintGenerator {
|
|
1698
|
+
headerGenerator;
|
|
1699
|
+
/**
|
|
1700
|
+
* Creates an instance of FingerprintGenerator.
|
|
1701
|
+
* @param options Optional configuration for the header generator.
|
|
1702
|
+
*/
|
|
1703
|
+
constructor(options) {
|
|
1704
|
+
const defaultOptions = {
|
|
1705
|
+
browsers: [{ name: "chrome", minVersion: 100 }, "firefox", "safari"],
|
|
1706
|
+
devices: ["desktop", "mobile"],
|
|
1707
|
+
operatingSystems: ["windows", "linux", "macos", "android", "ios"],
|
|
1708
|
+
locales: ["en-US", "en"],
|
|
1709
|
+
httpVersion: "2"
|
|
1710
|
+
};
|
|
1711
|
+
this.headerGenerator = new HeaderGenerator({
|
|
1712
|
+
...defaultOptions,
|
|
1713
|
+
...options
|
|
1714
|
+
});
|
|
1715
|
+
}
|
|
1716
|
+
/**
|
|
1717
|
+
* Generates a set of realistic HTTP headers.
|
|
1718
|
+
* @returns A set of realistic HTTP headers.
|
|
1719
|
+
*/
|
|
1720
|
+
generateHeaders() {
|
|
1721
|
+
return this.headerGenerator.getHeaders();
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
class BrowserFetcher {
|
|
1725
|
+
browser = null;
|
|
1726
|
+
page = null;
|
|
1727
|
+
fingerprintGenerator;
|
|
1728
|
+
constructor() {
|
|
1729
|
+
this.fingerprintGenerator = new FingerprintGenerator();
|
|
1730
|
+
}
|
|
1731
|
+
canFetch(source) {
|
|
1732
|
+
return source.startsWith("http://") || source.startsWith("https://");
|
|
1733
|
+
}
|
|
1734
|
+
async fetch(source, options) {
|
|
1735
|
+
try {
|
|
1736
|
+
await this.ensureBrowserReady();
|
|
1737
|
+
if (!this.page) {
|
|
1738
|
+
throw new ScraperError("Failed to create browser page", false);
|
|
1739
|
+
}
|
|
1740
|
+
if (options?.headers) {
|
|
1741
|
+
await this.page.setExtraHTTPHeaders(options.headers);
|
|
1742
|
+
}
|
|
1743
|
+
const timeout = options?.timeout || 3e4;
|
|
1744
|
+
logger.debug(`🌐 Navigating to ${source} with browser...`);
|
|
1745
|
+
const response = await this.page.goto(source, {
|
|
1746
|
+
waitUntil: "networkidle",
|
|
1747
|
+
timeout
|
|
1748
|
+
});
|
|
1749
|
+
if (!response) {
|
|
1750
|
+
throw new ScraperError(`Failed to navigate to ${source}`, false);
|
|
1751
|
+
}
|
|
1752
|
+
if (options?.followRedirects === false && response.status() >= 300 && response.status() < 400) {
|
|
1753
|
+
const location = response.headers().location;
|
|
1754
|
+
if (location) {
|
|
1755
|
+
throw new ScraperError(`Redirect blocked: ${source} -> ${location}`, false);
|
|
1756
|
+
}
|
|
1757
|
+
}
|
|
1758
|
+
const finalUrl = this.page.url();
|
|
1759
|
+
const content = await this.page.content();
|
|
1760
|
+
const contentBuffer = Buffer.from(content, "utf-8");
|
|
1761
|
+
const contentType = response.headers()["content-type"] || "text/html";
|
|
1762
|
+
const { mimeType, charset } = MimeTypeUtils.parseContentType(contentType);
|
|
1763
|
+
return {
|
|
1764
|
+
content: contentBuffer,
|
|
1765
|
+
mimeType,
|
|
1766
|
+
charset,
|
|
1767
|
+
encoding: void 0,
|
|
1768
|
+
// Browser handles encoding automatically
|
|
1769
|
+
source: finalUrl
|
|
1770
|
+
};
|
|
1771
|
+
} catch (error) {
|
|
1772
|
+
if (options?.signal?.aborted) {
|
|
1773
|
+
throw new ScraperError("Browser fetch cancelled", false);
|
|
1774
|
+
}
|
|
1775
|
+
logger.error(`❌ Browser fetch failed for ${source}: ${error}`);
|
|
1776
|
+
throw new ScraperError(
|
|
1777
|
+
`Browser fetch failed for ${source}: ${error instanceof Error ? error.message : String(error)}`,
|
|
1778
|
+
false,
|
|
1779
|
+
error instanceof Error ? error : void 0
|
|
1780
|
+
);
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
async ensureBrowserReady() {
|
|
1784
|
+
if (!this.browser) {
|
|
1785
|
+
logger.debug("🚀 Launching browser...");
|
|
1786
|
+
this.browser = await chromium.launch({
|
|
1787
|
+
headless: true,
|
|
1788
|
+
args: [
|
|
1789
|
+
"--no-sandbox",
|
|
1790
|
+
"--disable-setuid-sandbox",
|
|
1791
|
+
"--disable-dev-shm-usage",
|
|
1792
|
+
"--disable-web-security",
|
|
1793
|
+
"--disable-features=site-per-process"
|
|
1794
|
+
]
|
|
1795
|
+
});
|
|
1796
|
+
}
|
|
1797
|
+
if (!this.page) {
|
|
1798
|
+
this.page = await this.browser.newPage();
|
|
1799
|
+
const dynamicHeaders = this.fingerprintGenerator.generateHeaders();
|
|
1800
|
+
await this.page.setExtraHTTPHeaders(dynamicHeaders);
|
|
1801
|
+
await this.page.setViewportSize({ width: 1920, height: 1080 });
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
/**
|
|
1805
|
+
* Close the browser and clean up resources
|
|
1806
|
+
*/
|
|
1807
|
+
async close() {
|
|
1808
|
+
try {
|
|
1809
|
+
if (this.page) {
|
|
1810
|
+
await this.page.close();
|
|
1811
|
+
this.page = null;
|
|
1812
|
+
}
|
|
1813
|
+
if (this.browser) {
|
|
1814
|
+
await this.browser.close();
|
|
1815
|
+
this.browser = null;
|
|
1816
|
+
}
|
|
1817
|
+
logger.debug("🔒 Browser closed successfully");
|
|
1818
|
+
} catch (error) {
|
|
1819
|
+
logger.warn(`⚠️ Error closing browser: ${error}`);
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1686
1823
|
class FileFetcher {
|
|
1687
1824
|
canFetch(source) {
|
|
1688
1825
|
return source.startsWith("file://");
|
|
@@ -1716,33 +1853,6 @@ class FileFetcher {
|
|
|
1716
1853
|
}
|
|
1717
1854
|
}
|
|
1718
1855
|
}
|
|
1719
|
-
class FingerprintGenerator {
|
|
1720
|
-
headerGenerator;
|
|
1721
|
-
/**
|
|
1722
|
-
* Creates an instance of FingerprintGenerator.
|
|
1723
|
-
* @param options Optional configuration for the header generator.
|
|
1724
|
-
*/
|
|
1725
|
-
constructor(options) {
|
|
1726
|
-
const defaultOptions = {
|
|
1727
|
-
browsers: [{ name: "chrome", minVersion: 100 }, "firefox", "safari"],
|
|
1728
|
-
devices: ["desktop", "mobile"],
|
|
1729
|
-
operatingSystems: ["windows", "linux", "macos", "android", "ios"],
|
|
1730
|
-
locales: ["en-US", "en"],
|
|
1731
|
-
httpVersion: "2"
|
|
1732
|
-
};
|
|
1733
|
-
this.headerGenerator = new HeaderGenerator({
|
|
1734
|
-
...defaultOptions,
|
|
1735
|
-
...options
|
|
1736
|
-
});
|
|
1737
|
-
}
|
|
1738
|
-
/**
|
|
1739
|
-
* Generates a set of realistic HTTP headers.
|
|
1740
|
-
* @returns A set of realistic HTTP headers.
|
|
1741
|
-
*/
|
|
1742
|
-
generateHeaders() {
|
|
1743
|
-
return this.headerGenerator.getHeaders();
|
|
1744
|
-
}
|
|
1745
|
-
}
|
|
1746
1856
|
class HttpFetcher {
|
|
1747
1857
|
retryableStatusCodes = [
|
|
1748
1858
|
408,
|
|
@@ -1877,6 +1987,27 @@ class HttpFetcher {
|
|
|
1877
1987
|
throw new RedirectError(source, location, status);
|
|
1878
1988
|
}
|
|
1879
1989
|
}
|
|
1990
|
+
if (status === 403) {
|
|
1991
|
+
const cfMitigated = axiosError.response?.headers?.["cf-mitigated"];
|
|
1992
|
+
const server = axiosError.response?.headers?.server;
|
|
1993
|
+
let responseBody = "";
|
|
1994
|
+
if (axiosError.response?.data) {
|
|
1995
|
+
try {
|
|
1996
|
+
if (typeof axiosError.response.data === "string") {
|
|
1997
|
+
responseBody = axiosError.response.data;
|
|
1998
|
+
} else if (Buffer.isBuffer(axiosError.response.data)) {
|
|
1999
|
+
responseBody = axiosError.response.data.toString("utf-8");
|
|
2000
|
+
} else if (axiosError.response.data instanceof ArrayBuffer) {
|
|
2001
|
+
responseBody = Buffer.from(axiosError.response.data).toString("utf-8");
|
|
2002
|
+
}
|
|
2003
|
+
} catch {
|
|
2004
|
+
}
|
|
2005
|
+
}
|
|
2006
|
+
const isCloudflareChallenge = cfMitigated === "challenge" || server === "cloudflare" || responseBody.includes("Enable JavaScript and cookies to continue") || responseBody.includes("Just a moment...") || responseBody.includes("cf_chl_opt");
|
|
2007
|
+
if (isCloudflareChallenge) {
|
|
2008
|
+
throw new ChallengeError(source, status, "cloudflare");
|
|
2009
|
+
}
|
|
2010
|
+
}
|
|
1880
2011
|
if (attempt < maxRetries && (status === void 0 || this.retryableStatusCodes.includes(status))) {
|
|
1881
2012
|
const delay = baseDelay * 2 ** attempt;
|
|
1882
2013
|
logger.warn(
|
|
@@ -1898,6 +2029,52 @@ class HttpFetcher {
|
|
|
1898
2029
|
);
|
|
1899
2030
|
}
|
|
1900
2031
|
}
|
|
2032
|
+
class AutoDetectFetcher {
|
|
2033
|
+
httpFetcher = new HttpFetcher();
|
|
2034
|
+
browserFetcher = new BrowserFetcher();
|
|
2035
|
+
fileFetcher = new FileFetcher();
|
|
2036
|
+
/**
|
|
2037
|
+
* Check if this fetcher can handle the given source.
|
|
2038
|
+
* Returns true for any URL that any of the underlying fetchers can handle.
|
|
2039
|
+
*/
|
|
2040
|
+
canFetch(source) {
|
|
2041
|
+
return this.httpFetcher.canFetch(source) || this.browserFetcher.canFetch(source) || this.fileFetcher.canFetch(source);
|
|
2042
|
+
}
|
|
2043
|
+
/**
|
|
2044
|
+
* Fetch content from the source, automatically selecting the appropriate fetcher
|
|
2045
|
+
* and handling fallbacks when challenges are detected.
|
|
2046
|
+
*/
|
|
2047
|
+
async fetch(source, options) {
|
|
2048
|
+
if (this.fileFetcher.canFetch(source)) {
|
|
2049
|
+
logger.debug(`Using FileFetcher for: ${source}`);
|
|
2050
|
+
return this.fileFetcher.fetch(source, options);
|
|
2051
|
+
}
|
|
2052
|
+
if (this.httpFetcher.canFetch(source)) {
|
|
2053
|
+
try {
|
|
2054
|
+
logger.debug(`Using HttpFetcher for: ${source}`);
|
|
2055
|
+
return await this.httpFetcher.fetch(source, options);
|
|
2056
|
+
} catch (error) {
|
|
2057
|
+
if (error instanceof ChallengeError) {
|
|
2058
|
+
logger.info(
|
|
2059
|
+
`🔄 Challenge detected for ${source}, falling back to browser fetcher...`
|
|
2060
|
+
);
|
|
2061
|
+
return this.browserFetcher.fetch(source, options);
|
|
2062
|
+
}
|
|
2063
|
+
throw error;
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
throw new Error(`No suitable fetcher found for URL: ${source}`);
|
|
2067
|
+
}
|
|
2068
|
+
/**
|
|
2069
|
+
* Close all underlying fetchers to prevent resource leaks.
|
|
2070
|
+
*/
|
|
2071
|
+
async close() {
|
|
2072
|
+
await Promise.allSettled([
|
|
2073
|
+
this.browserFetcher.close()
|
|
2074
|
+
// HttpFetcher and FileFetcher don't need explicit cleanup
|
|
2075
|
+
]);
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
1901
2078
|
class SplitterError extends Error {
|
|
1902
2079
|
}
|
|
1903
2080
|
class MinimumChunkSizeError extends SplitterError {
|
|
@@ -5765,7 +5942,7 @@ class BaseScraperStrategy {
|
|
|
5765
5942
|
async cleanup() {
|
|
5766
5943
|
}
|
|
5767
5944
|
}
|
|
5768
|
-
class
|
|
5945
|
+
class GitHubRepoScraperStrategy extends BaseScraperStrategy {
|
|
5769
5946
|
httpFetcher = new HttpFetcher();
|
|
5770
5947
|
pipelines;
|
|
5771
5948
|
resolvedBranch;
|
|
@@ -5799,9 +5976,18 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
5799
5976
|
throw new Error(`Invalid GitHub repository URL: ${url}`);
|
|
5800
5977
|
}
|
|
5801
5978
|
const [, owner, repo] = match;
|
|
5802
|
-
const
|
|
5803
|
-
|
|
5804
|
-
|
|
5979
|
+
const segments = parsedUrl.pathname.split("/").filter(Boolean);
|
|
5980
|
+
if (segments.length >= 4 && segments[2] === "blob") {
|
|
5981
|
+
const branch2 = segments[3];
|
|
5982
|
+
const filePath = segments.length > 4 ? segments.slice(4).join("/") : void 0;
|
|
5983
|
+
return { owner, repo, branch: branch2, filePath, isBlob: true };
|
|
5984
|
+
}
|
|
5985
|
+
if (segments.length < 4 || segments[2] !== "tree") {
|
|
5986
|
+
return { owner, repo };
|
|
5987
|
+
}
|
|
5988
|
+
const branch = segments[3];
|
|
5989
|
+
const subPath = segments.length > 4 ? segments.slice(4).join("/") : void 0;
|
|
5990
|
+
return { owner, repo, branch, subPath };
|
|
5805
5991
|
}
|
|
5806
5992
|
/**
|
|
5807
5993
|
* Fetches the repository tree structure from GitHub API.
|
|
@@ -5999,13 +6185,24 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
5999
6185
|
async processItem(item, options, _progressCallback, signal) {
|
|
6000
6186
|
const repoInfo = this.parseGitHubUrl(options.url);
|
|
6001
6187
|
if (item.depth === 0) {
|
|
6188
|
+
if ("isBlob" in repoInfo && repoInfo.isBlob) {
|
|
6189
|
+
if (repoInfo.filePath) {
|
|
6190
|
+
logger.info(
|
|
6191
|
+
`📄 Processing single file: ${repoInfo.owner}/${repoInfo.repo}/${repoInfo.filePath}`
|
|
6192
|
+
);
|
|
6193
|
+
return { links: [`github-file://${repoInfo.filePath}`] };
|
|
6194
|
+
} else {
|
|
6195
|
+
logger.warn(
|
|
6196
|
+
`⚠️ Blob URL without file path: ${options.url}. No files to process.`
|
|
6197
|
+
);
|
|
6198
|
+
return { links: [] };
|
|
6199
|
+
}
|
|
6200
|
+
}
|
|
6002
6201
|
logger.info(
|
|
6003
6202
|
`🗂️ Discovering repository structure for ${repoInfo.owner}/${repoInfo.repo}`
|
|
6004
6203
|
);
|
|
6005
6204
|
const { tree, resolvedBranch } = await this.fetchRepositoryTree(repoInfo, signal);
|
|
6006
|
-
const fileItems = tree.tree.filter(
|
|
6007
|
-
(treeItem) => this.shouldProcessFile(treeItem, options)
|
|
6008
|
-
);
|
|
6205
|
+
const fileItems = tree.tree.filter((treeItem) => this.isWithinSubPath(treeItem.path, repoInfo.subPath)).filter((treeItem) => this.shouldProcessFile(treeItem, options));
|
|
6009
6206
|
logger.info(
|
|
6010
6207
|
`📁 Found ${fileItems.length} processable files in repository (branch: ${resolvedBranch})`
|
|
6011
6208
|
);
|
|
@@ -6039,12 +6236,15 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
6039
6236
|
logger.warn(`⚠️ Processing error for ${filePath}: ${err.message}`);
|
|
6040
6237
|
}
|
|
6041
6238
|
const githubUrl = `https://github.com/${repoInfo.owner}/${repoInfo.repo}/blob/${this.resolvedBranch || repoInfo.branch || "main"}/${filePath}`;
|
|
6239
|
+
const processedTitle = processed.metadata.title;
|
|
6240
|
+
const hasValidTitle = typeof processedTitle === "string" && processedTitle.trim() !== "";
|
|
6241
|
+
const fallbackTitle = filePath.split("/").pop() || "Untitled";
|
|
6042
6242
|
return {
|
|
6043
6243
|
document: {
|
|
6044
6244
|
content: typeof processed.textContent === "string" ? processed.textContent : "",
|
|
6045
6245
|
metadata: {
|
|
6046
6246
|
url: githubUrl,
|
|
6047
|
-
title:
|
|
6247
|
+
title: hasValidTitle ? processedTitle : fallbackTitle,
|
|
6048
6248
|
library: options.library,
|
|
6049
6249
|
version: options.version
|
|
6050
6250
|
},
|
|
@@ -6057,6 +6257,26 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
6057
6257
|
}
|
|
6058
6258
|
return { document: void 0, links: [] };
|
|
6059
6259
|
}
|
|
6260
|
+
/**
|
|
6261
|
+
* Normalize a path by removing leading and trailing slashes.
|
|
6262
|
+
*/
|
|
6263
|
+
normalizePath(path2) {
|
|
6264
|
+
return path2.replace(/^\/+/, "").replace(/\/+$/, "");
|
|
6265
|
+
}
|
|
6266
|
+
isWithinSubPath(path2, subPath) {
|
|
6267
|
+
if (!subPath) {
|
|
6268
|
+
return true;
|
|
6269
|
+
}
|
|
6270
|
+
const trimmedSubPath = this.normalizePath(subPath);
|
|
6271
|
+
if (trimmedSubPath.length === 0) {
|
|
6272
|
+
return true;
|
|
6273
|
+
}
|
|
6274
|
+
const normalizedPath = this.normalizePath(path2);
|
|
6275
|
+
if (normalizedPath === trimmedSubPath) {
|
|
6276
|
+
return true;
|
|
6277
|
+
}
|
|
6278
|
+
return normalizedPath.startsWith(`${trimmedSubPath}/`);
|
|
6279
|
+
}
|
|
6060
6280
|
async scrape(options, progressCallback, signal) {
|
|
6061
6281
|
const url = new URL(options.url);
|
|
6062
6282
|
if (!url.hostname.includes("github.com")) {
|
|
@@ -6071,6 +6291,228 @@ class GitHubScraperStrategy extends BaseScraperStrategy {
|
|
|
6071
6291
|
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
6072
6292
|
}
|
|
6073
6293
|
}
|
|
6294
|
+
class GitHubWikiScraperStrategy extends BaseScraperStrategy {
|
|
6295
|
+
httpFetcher = new HttpFetcher();
|
|
6296
|
+
pipelines;
|
|
6297
|
+
constructor() {
|
|
6298
|
+
super();
|
|
6299
|
+
this.pipelines = PipelineFactory$1.createStandardPipelines();
|
|
6300
|
+
}
|
|
6301
|
+
canHandle(url) {
|
|
6302
|
+
try {
|
|
6303
|
+
const parsedUrl = new URL(url);
|
|
6304
|
+
const { hostname, pathname } = parsedUrl;
|
|
6305
|
+
return ["github.com", "www.github.com"].includes(hostname) && pathname.includes("/wiki") && pathname.match(/^\/([^/]+)\/([^/]+)\/wiki/) !== null;
|
|
6306
|
+
} catch {
|
|
6307
|
+
return false;
|
|
6308
|
+
}
|
|
6309
|
+
}
|
|
6310
|
+
/**
|
|
6311
|
+
* Parses a GitHub wiki URL to extract repository information.
|
|
6312
|
+
*/
|
|
6313
|
+
parseGitHubWikiUrl(url) {
|
|
6314
|
+
const parsedUrl = new URL(url);
|
|
6315
|
+
const match = parsedUrl.pathname.match(/^\/([^/]+)\/([^/]+)\/wiki/);
|
|
6316
|
+
if (!match) {
|
|
6317
|
+
throw new Error(`Invalid GitHub wiki URL: ${url}`);
|
|
6318
|
+
}
|
|
6319
|
+
const [, owner, repo] = match;
|
|
6320
|
+
return { owner, repo };
|
|
6321
|
+
}
|
|
6322
|
+
/**
|
|
6323
|
+
* Override shouldProcessUrl to only process URLs within the wiki scope.
|
|
6324
|
+
*/
|
|
6325
|
+
shouldProcessUrl(url, options) {
|
|
6326
|
+
try {
|
|
6327
|
+
const parsedUrl = new URL(url);
|
|
6328
|
+
const wikiInfo = this.parseGitHubWikiUrl(options.url);
|
|
6329
|
+
const expectedWikiPath = `/${wikiInfo.owner}/${wikiInfo.repo}/wiki`;
|
|
6330
|
+
if (!parsedUrl.pathname.startsWith(expectedWikiPath)) {
|
|
6331
|
+
return false;
|
|
6332
|
+
}
|
|
6333
|
+
const wikiPagePath = parsedUrl.pathname.replace(expectedWikiPath, "").replace(/^\//, "");
|
|
6334
|
+
return shouldIncludeUrl(
|
|
6335
|
+
wikiPagePath || "Home",
|
|
6336
|
+
options.includePatterns,
|
|
6337
|
+
options.excludePatterns
|
|
6338
|
+
);
|
|
6339
|
+
} catch {
|
|
6340
|
+
return false;
|
|
6341
|
+
}
|
|
6342
|
+
}
|
|
6343
|
+
async processItem(item, options, _progressCallback, signal) {
|
|
6344
|
+
const currentUrl = item.url;
|
|
6345
|
+
logger.info(
|
|
6346
|
+
`📖 Processing wiki page ${this.pageCount}/${options.maxPages}: ${currentUrl}`
|
|
6347
|
+
);
|
|
6348
|
+
try {
|
|
6349
|
+
const rawContent = await this.httpFetcher.fetch(currentUrl, { signal });
|
|
6350
|
+
let processed;
|
|
6351
|
+
for (const pipeline of this.pipelines) {
|
|
6352
|
+
if (pipeline.canProcess(rawContent)) {
|
|
6353
|
+
logger.debug(
|
|
6354
|
+
`Selected ${pipeline.constructor.name} for content type "${rawContent.mimeType}" (${currentUrl})`
|
|
6355
|
+
);
|
|
6356
|
+
const wikiOptions = { ...options, scrapeMode: ScrapeMode.Fetch };
|
|
6357
|
+
processed = await pipeline.process(rawContent, wikiOptions, this.httpFetcher);
|
|
6358
|
+
break;
|
|
6359
|
+
}
|
|
6360
|
+
}
|
|
6361
|
+
if (!processed) {
|
|
6362
|
+
logger.warn(
|
|
6363
|
+
`⚠️ Unsupported content type "${rawContent.mimeType}" for wiki page ${currentUrl}. Skipping processing.`
|
|
6364
|
+
);
|
|
6365
|
+
return { document: void 0, links: [] };
|
|
6366
|
+
}
|
|
6367
|
+
for (const err of processed.errors) {
|
|
6368
|
+
logger.warn(`⚠️ Processing error for ${currentUrl}: ${err.message}`);
|
|
6369
|
+
}
|
|
6370
|
+
const parsedUrl = new URL(currentUrl);
|
|
6371
|
+
const wikiInfo = this.parseGitHubWikiUrl(currentUrl);
|
|
6372
|
+
const wikiPagePath = parsedUrl.pathname.replace(`/${wikiInfo.owner}/${wikiInfo.repo}/wiki`, "").replace(/^\//, "");
|
|
6373
|
+
const pageTitle = wikiPagePath || "Home";
|
|
6374
|
+
const document2 = {
|
|
6375
|
+
content: typeof processed.textContent === "string" ? processed.textContent : "",
|
|
6376
|
+
metadata: {
|
|
6377
|
+
url: currentUrl,
|
|
6378
|
+
title: typeof processed.metadata.title === "string" && processed.metadata.title.trim() !== "" ? processed.metadata.title : pageTitle,
|
|
6379
|
+
library: options.library,
|
|
6380
|
+
version: options.version
|
|
6381
|
+
},
|
|
6382
|
+
contentType: rawContent.mimeType
|
|
6383
|
+
};
|
|
6384
|
+
const links = processed.links || [];
|
|
6385
|
+
const wikiLinks = links.filter((link) => {
|
|
6386
|
+
if (!link || link.trim() === "" || link === "invalid-url" || link === "not-a-url-at-all") {
|
|
6387
|
+
return false;
|
|
6388
|
+
}
|
|
6389
|
+
return true;
|
|
6390
|
+
}).map((link) => {
|
|
6391
|
+
try {
|
|
6392
|
+
return new URL(link, currentUrl).href;
|
|
6393
|
+
} catch {
|
|
6394
|
+
return null;
|
|
6395
|
+
}
|
|
6396
|
+
}).filter((link) => link !== null).filter((link) => {
|
|
6397
|
+
try {
|
|
6398
|
+
const linkUrl = new URL(link);
|
|
6399
|
+
return linkUrl.hostname === parsedUrl.hostname && linkUrl.pathname.startsWith(`/${wikiInfo.owner}/${wikiInfo.repo}/wiki`);
|
|
6400
|
+
} catch {
|
|
6401
|
+
return false;
|
|
6402
|
+
}
|
|
6403
|
+
});
|
|
6404
|
+
return { document: document2, links: wikiLinks };
|
|
6405
|
+
} catch (error) {
|
|
6406
|
+
logger.warn(`⚠️ Failed to process wiki page ${currentUrl}: ${error}`);
|
|
6407
|
+
return { document: void 0, links: [] };
|
|
6408
|
+
}
|
|
6409
|
+
}
|
|
6410
|
+
async scrape(options, progressCallback, signal) {
|
|
6411
|
+
const url = new URL(options.url);
|
|
6412
|
+
if (!url.hostname.includes("github.com") || !url.pathname.includes("/wiki")) {
|
|
6413
|
+
throw new Error("URL must be a GitHub wiki URL");
|
|
6414
|
+
}
|
|
6415
|
+
let startUrl = options.url;
|
|
6416
|
+
if (url.pathname.endsWith("/wiki") || url.pathname.endsWith("/wiki/")) {
|
|
6417
|
+
startUrl = url.pathname.endsWith("/") ? `${options.url}Home` : `${options.url}/Home`;
|
|
6418
|
+
}
|
|
6419
|
+
const wikiOptions = { ...options, url: startUrl };
|
|
6420
|
+
return super.scrape(wikiOptions, progressCallback, signal);
|
|
6421
|
+
}
|
|
6422
|
+
/**
|
|
6423
|
+
* Cleanup resources used by this strategy.
|
|
6424
|
+
*/
|
|
6425
|
+
async cleanup() {
|
|
6426
|
+
await Promise.allSettled(this.pipelines.map((pipeline) => pipeline.close()));
|
|
6427
|
+
}
|
|
6428
|
+
}
|
|
6429
|
+
class GitHubScraperStrategy {
|
|
6430
|
+
repoStrategy = new GitHubRepoScraperStrategy();
|
|
6431
|
+
wikiStrategy = new GitHubWikiScraperStrategy();
|
|
6432
|
+
canHandle(url) {
|
|
6433
|
+
try {
|
|
6434
|
+
const parsedUrl = new URL(url);
|
|
6435
|
+
const { hostname, pathname } = parsedUrl;
|
|
6436
|
+
if (!["github.com", "www.github.com"].includes(hostname)) {
|
|
6437
|
+
return false;
|
|
6438
|
+
}
|
|
6439
|
+
const pathMatch = pathname.match(/^\/([^/]+)\/([^/]+)\/?$/);
|
|
6440
|
+
return pathMatch !== null;
|
|
6441
|
+
} catch {
|
|
6442
|
+
return false;
|
|
6443
|
+
}
|
|
6444
|
+
}
|
|
6445
|
+
async scrape(options, progressCallback, signal) {
|
|
6446
|
+
const url = new URL(options.url);
|
|
6447
|
+
if (!url.hostname.includes("github.com")) {
|
|
6448
|
+
throw new Error("URL must be a GitHub URL");
|
|
6449
|
+
}
|
|
6450
|
+
const pathMatch = url.pathname.match(/^\/([^/]+)\/([^/]+)\/?$/);
|
|
6451
|
+
if (!pathMatch) {
|
|
6452
|
+
throw new Error("URL must be a base GitHub repository URL");
|
|
6453
|
+
}
|
|
6454
|
+
const [, owner, repo] = pathMatch;
|
|
6455
|
+
logger.info(`🚀 Starting comprehensive GitHub scraping for ${owner}/${repo}`);
|
|
6456
|
+
let totalPagesDiscovered = 0;
|
|
6457
|
+
let wikiPagesScraped = 0;
|
|
6458
|
+
let wikiCompleted = false;
|
|
6459
|
+
let repoCompleted = false;
|
|
6460
|
+
const mergedProgressCallback = async (progress) => {
|
|
6461
|
+
if (!wikiCompleted) {
|
|
6462
|
+
totalPagesDiscovered = progress.totalDiscovered;
|
|
6463
|
+
wikiPagesScraped = progress.pagesScraped;
|
|
6464
|
+
} else if (!repoCompleted) {
|
|
6465
|
+
progress = {
|
|
6466
|
+
...progress,
|
|
6467
|
+
pagesScraped: wikiPagesScraped + progress.pagesScraped,
|
|
6468
|
+
totalPages: wikiPagesScraped + progress.totalPages,
|
|
6469
|
+
totalDiscovered: totalPagesDiscovered + progress.totalDiscovered
|
|
6470
|
+
};
|
|
6471
|
+
}
|
|
6472
|
+
await progressCallback(progress);
|
|
6473
|
+
};
|
|
6474
|
+
try {
|
|
6475
|
+
const wikiUrl = `${options.url.replace(/\/$/, "")}/wiki`;
|
|
6476
|
+
const wikiOptions = { ...options, url: wikiUrl };
|
|
6477
|
+
logger.info(`📖 Attempting to scrape wiki for ${owner}/${repo}`);
|
|
6478
|
+
try {
|
|
6479
|
+
await this.wikiStrategy.scrape(wikiOptions, mergedProgressCallback, signal);
|
|
6480
|
+
wikiCompleted = true;
|
|
6481
|
+
logger.info(
|
|
6482
|
+
`✅ Completed wiki scraping for ${owner}/${repo} (${wikiPagesScraped} pages)`
|
|
6483
|
+
);
|
|
6484
|
+
} catch (error) {
|
|
6485
|
+
wikiCompleted = true;
|
|
6486
|
+
logger.info(`ℹ️ Wiki not available or accessible for ${owner}/${repo}: ${error}`);
|
|
6487
|
+
}
|
|
6488
|
+
const maxPages = options.maxPages || 1e3;
|
|
6489
|
+
const remainingPages = Math.max(0, maxPages - wikiPagesScraped);
|
|
6490
|
+
if (remainingPages > 0) {
|
|
6491
|
+
logger.info(
|
|
6492
|
+
`📂 Scraping repository code for ${owner}/${repo} (${remainingPages} pages remaining)`
|
|
6493
|
+
);
|
|
6494
|
+
const repoOptions = { ...options, maxPages: remainingPages };
|
|
6495
|
+
await this.repoStrategy.scrape(repoOptions, mergedProgressCallback, signal);
|
|
6496
|
+
repoCompleted = true;
|
|
6497
|
+
logger.info(`✅ Completed repository code scraping for ${owner}/${repo}`);
|
|
6498
|
+
} else {
|
|
6499
|
+
logger.info(
|
|
6500
|
+
`ℹ️ Skipping repository code scraping - page limit reached with wiki content`
|
|
6501
|
+
);
|
|
6502
|
+
}
|
|
6503
|
+
logger.info(`🎉 Comprehensive GitHub scraping completed for ${owner}/${repo}`);
|
|
6504
|
+
} catch (error) {
|
|
6505
|
+
logger.error(`❌ GitHub scraping failed for ${owner}/${repo}: ${error}`);
|
|
6506
|
+
throw error;
|
|
6507
|
+
}
|
|
6508
|
+
}
|
|
6509
|
+
/**
|
|
6510
|
+
* Cleanup resources used by both underlying strategies.
|
|
6511
|
+
*/
|
|
6512
|
+
async cleanup() {
|
|
6513
|
+
await Promise.allSettled([this.repoStrategy.cleanup(), this.wikiStrategy.cleanup()]);
|
|
6514
|
+
}
|
|
6515
|
+
}
|
|
6074
6516
|
class LocalFileStrategy extends BaseScraperStrategy {
|
|
6075
6517
|
fileFetcher = new FileFetcher();
|
|
6076
6518
|
pipelines;
|
|
@@ -6135,7 +6577,7 @@ class LocalFileStrategy extends BaseScraperStrategy {
|
|
|
6135
6577
|
}
|
|
6136
6578
|
}
|
|
6137
6579
|
class WebScraperStrategy extends BaseScraperStrategy {
|
|
6138
|
-
|
|
6580
|
+
fetcher = new AutoDetectFetcher();
|
|
6139
6581
|
shouldFollowLinkFn;
|
|
6140
6582
|
pipelines;
|
|
6141
6583
|
constructor(options = {}) {
|
|
@@ -6169,14 +6611,14 @@ class WebScraperStrategy extends BaseScraperStrategy {
|
|
|
6169
6611
|
headers: options.headers
|
|
6170
6612
|
// Forward custom headers
|
|
6171
6613
|
};
|
|
6172
|
-
const rawContent = await this.
|
|
6614
|
+
const rawContent = await this.fetcher.fetch(url, fetchOptions);
|
|
6173
6615
|
let processed;
|
|
6174
6616
|
for (const pipeline of this.pipelines) {
|
|
6175
6617
|
if (pipeline.canProcess(rawContent)) {
|
|
6176
6618
|
logger.debug(
|
|
6177
6619
|
`Selected ${pipeline.constructor.name} for content type "${rawContent.mimeType}" (${url})`
|
|
6178
6620
|
);
|
|
6179
|
-
processed = await pipeline.process(rawContent, options, this.
|
|
6621
|
+
processed = await pipeline.process(rawContent, options, this.fetcher);
|
|
6180
6622
|
break;
|
|
6181
6623
|
}
|
|
6182
6624
|
}
|
|
@@ -6225,10 +6667,13 @@ class WebScraperStrategy extends BaseScraperStrategy {
|
|
|
6225
6667
|
}
|
|
6226
6668
|
}
|
|
6227
6669
|
/**
|
|
6228
|
-
* Cleanup resources used by this strategy, specifically the pipeline browser instances.
|
|
6670
|
+
* Cleanup resources used by this strategy, specifically the pipeline browser instances and fetcher.
|
|
6229
6671
|
*/
|
|
6230
6672
|
async cleanup() {
|
|
6231
|
-
await Promise.allSettled(
|
|
6673
|
+
await Promise.allSettled([
|
|
6674
|
+
...this.pipelines.map((pipeline) => pipeline.close()),
|
|
6675
|
+
this.fetcher.close()
|
|
6676
|
+
]);
|
|
6232
6677
|
}
|
|
6233
6678
|
}
|
|
6234
6679
|
class NpmScraperStrategy {
|
|
@@ -6291,6 +6736,7 @@ class ScraperRegistry {
|
|
|
6291
6736
|
this.strategies = [
|
|
6292
6737
|
new NpmScraperStrategy(),
|
|
6293
6738
|
new PyPiScraperStrategy(),
|
|
6739
|
+
new GitHubWikiScraperStrategy(),
|
|
6294
6740
|
new GitHubScraperStrategy(),
|
|
6295
6741
|
new WebScraperStrategy(),
|
|
6296
6742
|
new LocalFileStrategy()
|
|
@@ -7629,17 +8075,17 @@ class LibraryNotFoundError extends ToolError {
|
|
|
7629
8075
|
}
|
|
7630
8076
|
class FetchUrlTool {
|
|
7631
8077
|
/**
|
|
7632
|
-
*
|
|
8078
|
+
* AutoDetectFetcher handles all URL types and fallback logic automatically.
|
|
7633
8079
|
*/
|
|
7634
|
-
|
|
8080
|
+
fetcher;
|
|
7635
8081
|
/**
|
|
7636
8082
|
* Collection of pipelines that will be tried in order for processing content.
|
|
7637
8083
|
* The first pipeline that can process the content type will be used.
|
|
7638
8084
|
* Currently includes HtmlPipeline, MarkdownPipeline, and TextPipeline (as fallback).
|
|
7639
8085
|
*/
|
|
7640
8086
|
pipelines;
|
|
7641
|
-
constructor(
|
|
7642
|
-
this.
|
|
8087
|
+
constructor(fetcher) {
|
|
8088
|
+
this.fetcher = fetcher;
|
|
7643
8089
|
const htmlPipeline = new HtmlPipeline();
|
|
7644
8090
|
const markdownPipeline = new MarkdownPipeline();
|
|
7645
8091
|
const textPipeline = new TextPipeline();
|
|
@@ -7653,24 +8099,21 @@ class FetchUrlTool {
|
|
|
7653
8099
|
*/
|
|
7654
8100
|
async execute(options) {
|
|
7655
8101
|
const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
|
|
7656
|
-
|
|
7657
|
-
const fetcherIndex = canFetchResults.indexOf(true);
|
|
7658
|
-
if (fetcherIndex === -1) {
|
|
8102
|
+
if (!this.fetcher.canFetch(url)) {
|
|
7659
8103
|
throw new ToolError(
|
|
7660
8104
|
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
7661
8105
|
this.constructor.name
|
|
7662
8106
|
);
|
|
7663
8107
|
}
|
|
7664
|
-
const fetcher = this.fetchers[fetcherIndex];
|
|
7665
|
-
logger.debug(`Using fetcher "${fetcher.constructor.name}" for URL: ${url}`);
|
|
7666
8108
|
try {
|
|
7667
8109
|
logger.info(`📡 Fetching ${url}...`);
|
|
7668
|
-
const
|
|
8110
|
+
const fetchOptions = {
|
|
7669
8111
|
followRedirects: options.followRedirects ?? true,
|
|
7670
8112
|
maxRetries: 3,
|
|
7671
8113
|
headers
|
|
7672
8114
|
// propagate custom headers
|
|
7673
|
-
}
|
|
8115
|
+
};
|
|
8116
|
+
const rawContent = await this.fetcher.fetch(url, fetchOptions);
|
|
7674
8117
|
logger.info("🔄 Processing content...");
|
|
7675
8118
|
let processed;
|
|
7676
8119
|
for (const pipeline of this.pipelines) {
|
|
@@ -7692,7 +8135,7 @@ class FetchUrlTool {
|
|
|
7692
8135
|
headers
|
|
7693
8136
|
// propagate custom headers
|
|
7694
8137
|
},
|
|
7695
|
-
fetcher
|
|
8138
|
+
this.fetcher
|
|
7696
8139
|
);
|
|
7697
8140
|
break;
|
|
7698
8141
|
}
|
|
@@ -7732,7 +8175,10 @@ class FetchUrlTool {
|
|
|
7732
8175
|
this.constructor.name
|
|
7733
8176
|
);
|
|
7734
8177
|
} finally {
|
|
7735
|
-
await Promise.allSettled(
|
|
8178
|
+
await Promise.allSettled([
|
|
8179
|
+
...this.pipelines.map((pipeline) => pipeline.close()),
|
|
8180
|
+
this.fetcher.close()
|
|
8181
|
+
]);
|
|
7736
8182
|
}
|
|
7737
8183
|
}
|
|
7738
8184
|
}
|
|
@@ -8566,7 +9012,7 @@ async function initializeTools(docService, pipeline) {
|
|
|
8566
9012
|
cancelJob: new CancelJobTool(pipeline),
|
|
8567
9013
|
// clearCompletedJobs: new ClearCompletedJobsTool(pipeline),
|
|
8568
9014
|
remove: new RemoveTool(docService, pipeline),
|
|
8569
|
-
fetchUrl: new FetchUrlTool(new
|
|
9015
|
+
fetchUrl: new FetchUrlTool(new AutoDetectFetcher())
|
|
8570
9016
|
};
|
|
8571
9017
|
return tools;
|
|
8572
9018
|
}
|
|
@@ -8924,15 +9370,130 @@ const Layout = ({ title, version: version2, children }) => {
|
|
|
8924
9370
|
try {
|
|
8925
9371
|
const packageJson2 = JSON.parse(readFileSync("package.json", "utf-8"));
|
|
8926
9372
|
versionString = packageJson2.version;
|
|
9373
|
+
logger.debug(`Resolved version from package.json: ${versionString}`);
|
|
8927
9374
|
} catch (error) {
|
|
8928
9375
|
logger.error(`Error reading package.json: ${error}`);
|
|
8929
9376
|
}
|
|
8930
9377
|
}
|
|
9378
|
+
const versionInitializer = `versionUpdate({ currentVersion: ${versionString ? `'${versionString}'` : "null"} })`;
|
|
8931
9379
|
return /* @__PURE__ */ jsxs("html", { lang: "en", children: [
|
|
8932
9380
|
/* @__PURE__ */ jsxs("head", { children: [
|
|
8933
9381
|
/* @__PURE__ */ jsx("meta", { charset: "UTF-8" }),
|
|
8934
9382
|
/* @__PURE__ */ jsx("meta", { name: "viewport", content: "width=device-width, initial-scale=1.0" }),
|
|
8935
9383
|
/* @__PURE__ */ jsx("title", { safe: true, children: title }),
|
|
9384
|
+
/* @__PURE__ */ jsx(
|
|
9385
|
+
"link",
|
|
9386
|
+
{
|
|
9387
|
+
rel: "apple-touch-icon",
|
|
9388
|
+
sizes: "57x57",
|
|
9389
|
+
href: "/apple-icon-57x57.png"
|
|
9390
|
+
}
|
|
9391
|
+
),
|
|
9392
|
+
/* @__PURE__ */ jsx(
|
|
9393
|
+
"link",
|
|
9394
|
+
{
|
|
9395
|
+
rel: "apple-touch-icon",
|
|
9396
|
+
sizes: "60x60",
|
|
9397
|
+
href: "/apple-icon-60x60.png"
|
|
9398
|
+
}
|
|
9399
|
+
),
|
|
9400
|
+
/* @__PURE__ */ jsx(
|
|
9401
|
+
"link",
|
|
9402
|
+
{
|
|
9403
|
+
rel: "apple-touch-icon",
|
|
9404
|
+
sizes: "72x72",
|
|
9405
|
+
href: "/apple-icon-72x72.png"
|
|
9406
|
+
}
|
|
9407
|
+
),
|
|
9408
|
+
/* @__PURE__ */ jsx(
|
|
9409
|
+
"link",
|
|
9410
|
+
{
|
|
9411
|
+
rel: "apple-touch-icon",
|
|
9412
|
+
sizes: "76x76",
|
|
9413
|
+
href: "/apple-icon-76x76.png"
|
|
9414
|
+
}
|
|
9415
|
+
),
|
|
9416
|
+
/* @__PURE__ */ jsx(
|
|
9417
|
+
"link",
|
|
9418
|
+
{
|
|
9419
|
+
rel: "apple-touch-icon",
|
|
9420
|
+
sizes: "114x114",
|
|
9421
|
+
href: "/apple-icon-114x114.png"
|
|
9422
|
+
}
|
|
9423
|
+
),
|
|
9424
|
+
/* @__PURE__ */ jsx(
|
|
9425
|
+
"link",
|
|
9426
|
+
{
|
|
9427
|
+
rel: "apple-touch-icon",
|
|
9428
|
+
sizes: "120x120",
|
|
9429
|
+
href: "/apple-icon-120x120.png"
|
|
9430
|
+
}
|
|
9431
|
+
),
|
|
9432
|
+
/* @__PURE__ */ jsx(
|
|
9433
|
+
"link",
|
|
9434
|
+
{
|
|
9435
|
+
rel: "apple-touch-icon",
|
|
9436
|
+
sizes: "144x144",
|
|
9437
|
+
href: "/apple-icon-144x144.png"
|
|
9438
|
+
}
|
|
9439
|
+
),
|
|
9440
|
+
/* @__PURE__ */ jsx(
|
|
9441
|
+
"link",
|
|
9442
|
+
{
|
|
9443
|
+
rel: "apple-touch-icon",
|
|
9444
|
+
sizes: "152x152",
|
|
9445
|
+
href: "/apple-icon-152x152.png"
|
|
9446
|
+
}
|
|
9447
|
+
),
|
|
9448
|
+
/* @__PURE__ */ jsx(
|
|
9449
|
+
"link",
|
|
9450
|
+
{
|
|
9451
|
+
rel: "apple-touch-icon",
|
|
9452
|
+
sizes: "180x180",
|
|
9453
|
+
href: "/apple-icon-180x180.png"
|
|
9454
|
+
}
|
|
9455
|
+
),
|
|
9456
|
+
/* @__PURE__ */ jsx(
|
|
9457
|
+
"link",
|
|
9458
|
+
{
|
|
9459
|
+
rel: "icon",
|
|
9460
|
+
type: "image/png",
|
|
9461
|
+
sizes: "192x192",
|
|
9462
|
+
href: "/android-icon-192x192.png"
|
|
9463
|
+
}
|
|
9464
|
+
),
|
|
9465
|
+
/* @__PURE__ */ jsx(
|
|
9466
|
+
"link",
|
|
9467
|
+
{
|
|
9468
|
+
rel: "icon",
|
|
9469
|
+
type: "image/png",
|
|
9470
|
+
sizes: "32x32",
|
|
9471
|
+
href: "/favicon-32x32.png"
|
|
9472
|
+
}
|
|
9473
|
+
),
|
|
9474
|
+
/* @__PURE__ */ jsx(
|
|
9475
|
+
"link",
|
|
9476
|
+
{
|
|
9477
|
+
rel: "icon",
|
|
9478
|
+
type: "image/png",
|
|
9479
|
+
sizes: "96x96",
|
|
9480
|
+
href: "/favicon-96x96.png"
|
|
9481
|
+
}
|
|
9482
|
+
),
|
|
9483
|
+
/* @__PURE__ */ jsx(
|
|
9484
|
+
"link",
|
|
9485
|
+
{
|
|
9486
|
+
rel: "icon",
|
|
9487
|
+
type: "image/png",
|
|
9488
|
+
sizes: "16x16",
|
|
9489
|
+
href: "/favicon-16x16.png"
|
|
9490
|
+
}
|
|
9491
|
+
),
|
|
9492
|
+
/* @__PURE__ */ jsx("link", { rel: "shortcut icon", href: "/favicon.ico" }),
|
|
9493
|
+
/* @__PURE__ */ jsx("link", { rel: "manifest", href: "/manifest.json" }),
|
|
9494
|
+
/* @__PURE__ */ jsx("meta", { name: "msapplication-TileColor", content: "#ffffff" }),
|
|
9495
|
+
/* @__PURE__ */ jsx("meta", { name: "msapplication-TileImage", content: "/ms-icon-144x144.png" }),
|
|
9496
|
+
/* @__PURE__ */ jsx("meta", { name: "theme-color", content: "#ffffff" }),
|
|
8936
9497
|
/* @__PURE__ */ jsx("link", { rel: "stylesheet", href: "/assets/main.css" }),
|
|
8937
9498
|
/* @__PURE__ */ jsx("style", { children: `
|
|
8938
9499
|
.htmx-indicator {
|
|
@@ -8956,27 +9517,143 @@ const Layout = ({ title, version: version2, children }) => {
|
|
|
8956
9517
|
form .htmx-indicator .spinner { display: flex; }
|
|
8957
9518
|
form .htmx-indicator .search-text { display: none; }
|
|
8958
9519
|
form .spinner { display: none; }
|
|
8959
|
-
|
|
9520
|
+
` })
|
|
8960
9521
|
] }),
|
|
8961
9522
|
/* @__PURE__ */ jsxs("body", { class: "bg-gray-50 dark:bg-gray-900", children: [
|
|
8962
|
-
/* @__PURE__ */
|
|
8963
|
-
|
|
8964
|
-
|
|
8965
|
-
|
|
8966
|
-
|
|
8967
|
-
|
|
8968
|
-
|
|
8969
|
-
|
|
8970
|
-
|
|
8971
|
-
|
|
8972
|
-
|
|
8973
|
-
|
|
8974
|
-
|
|
8975
|
-
|
|
8976
|
-
|
|
8977
|
-
|
|
8978
|
-
|
|
8979
|
-
|
|
9523
|
+
/* @__PURE__ */ jsx(
|
|
9524
|
+
"header",
|
|
9525
|
+
{
|
|
9526
|
+
class: "bg-white border-b border-gray-200 dark:bg-gray-800 dark:border-gray-700",
|
|
9527
|
+
"x-data": versionInitializer,
|
|
9528
|
+
"x-init": "queueCheck()",
|
|
9529
|
+
children: /* @__PURE__ */ jsxs("div", { class: "container max-w-2xl mx-auto px-4 py-4", children: [
|
|
9530
|
+
/* @__PURE__ */ jsxs("div", { class: "hidden sm:flex items-center justify-between", children: [
|
|
9531
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center gap-3", children: [
|
|
9532
|
+
/* @__PURE__ */ jsxs(
|
|
9533
|
+
"a",
|
|
9534
|
+
{
|
|
9535
|
+
href: "https://grounded.tools",
|
|
9536
|
+
target: "_blank",
|
|
9537
|
+
rel: "noopener noreferrer",
|
|
9538
|
+
class: "text-xl font-medium text-gray-900 dark:text-white hover:text-primary-500 dark:hover:text-primary-400 transition-colors font-brand",
|
|
9539
|
+
children: [
|
|
9540
|
+
/* @__PURE__ */ jsx("span", { class: "text-primary-600 dark:text-primary-300", children: "grounded" }),
|
|
9541
|
+
/* @__PURE__ */ jsx("span", { class: "text-accent-500", children: "." }),
|
|
9542
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-900 dark:text-gray-100", children: "tools" })
|
|
9543
|
+
]
|
|
9544
|
+
}
|
|
9545
|
+
),
|
|
9546
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-400 dark:text-gray-400", children: "|" }),
|
|
9547
|
+
/* @__PURE__ */ jsx(
|
|
9548
|
+
"a",
|
|
9549
|
+
{
|
|
9550
|
+
href: "/",
|
|
9551
|
+
class: "text-lg font-semibold text-gray-900 dark:text-white hover:text-primary-500 dark:hover:text-primary-400 transition-colors font-brand",
|
|
9552
|
+
children: "Docs MCP Server"
|
|
9553
|
+
}
|
|
9554
|
+
),
|
|
9555
|
+
versionString ? /* @__PURE__ */ jsxs(
|
|
9556
|
+
"span",
|
|
9557
|
+
{
|
|
9558
|
+
safe: true,
|
|
9559
|
+
class: "text-sm font-normal text-gray-500 dark:text-slate-400",
|
|
9560
|
+
title: `Version ${versionString}`,
|
|
9561
|
+
children: [
|
|
9562
|
+
"v",
|
|
9563
|
+
versionString
|
|
9564
|
+
]
|
|
9565
|
+
}
|
|
9566
|
+
) : null
|
|
9567
|
+
] }),
|
|
9568
|
+
/* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs(
|
|
9569
|
+
"span",
|
|
9570
|
+
{
|
|
9571
|
+
"x-show": "hasUpdate",
|
|
9572
|
+
"x-cloak": true,
|
|
9573
|
+
class: "inline-flex items-center gap-2 rounded-full bg-amber-100 dark:bg-amber-500/20 px-3 py-1.5 text-sm font-medium text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-500/30",
|
|
9574
|
+
role: "status",
|
|
9575
|
+
"aria-live": "polite",
|
|
9576
|
+
children: [
|
|
9577
|
+
/* @__PURE__ */ jsx("span", { class: "flex h-4 w-4 items-center justify-center rounded-full bg-amber-500 text-amber-800 dark:text-amber-900 text-xs font-bold", children: "!" }),
|
|
9578
|
+
/* @__PURE__ */ jsx(
|
|
9579
|
+
"a",
|
|
9580
|
+
{
|
|
9581
|
+
"x-bind:href": "latestReleaseUrl",
|
|
9582
|
+
target: "_blank",
|
|
9583
|
+
rel: "noopener noreferrer",
|
|
9584
|
+
class: "hover:text-amber-800 dark:hover:text-amber-200 transition-colors",
|
|
9585
|
+
children: /* @__PURE__ */ jsx("span", { class: "mr-1", children: "Update available" })
|
|
9586
|
+
}
|
|
9587
|
+
)
|
|
9588
|
+
]
|
|
9589
|
+
}
|
|
9590
|
+
) })
|
|
9591
|
+
] }),
|
|
9592
|
+
/* @__PURE__ */ jsxs("div", { class: "sm:hidden space-y-2", children: [
|
|
9593
|
+
/* @__PURE__ */ jsx("div", { class: "flex justify-center", children: /* @__PURE__ */ jsxs(
|
|
9594
|
+
"a",
|
|
9595
|
+
{
|
|
9596
|
+
href: "https://grounded.tools",
|
|
9597
|
+
target: "_blank",
|
|
9598
|
+
rel: "noopener noreferrer",
|
|
9599
|
+
class: "text-xl font-medium text-gray-900 dark:text-white hover:text-primary-500 dark:hover:text-primary-400 transition-colors font-brand",
|
|
9600
|
+
children: [
|
|
9601
|
+
/* @__PURE__ */ jsx("span", { class: "text-primary-600 dark:text-primary-300", children: "grounded" }),
|
|
9602
|
+
/* @__PURE__ */ jsx("span", { class: "text-accent-500", children: "." }),
|
|
9603
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-900 dark:text-gray-100", children: "tools" })
|
|
9604
|
+
]
|
|
9605
|
+
}
|
|
9606
|
+
) }),
|
|
9607
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center justify-center gap-2", children: [
|
|
9608
|
+
/* @__PURE__ */ jsx(
|
|
9609
|
+
"a",
|
|
9610
|
+
{
|
|
9611
|
+
href: "/",
|
|
9612
|
+
class: "text-lg font-semibold text-gray-900 dark:text-white hover:text-primary-500 dark:hover:text-primary-400 transition-colors font-brand",
|
|
9613
|
+
children: "Docs MCP Server"
|
|
9614
|
+
}
|
|
9615
|
+
),
|
|
9616
|
+
versionString ? /* @__PURE__ */ jsxs(
|
|
9617
|
+
"span",
|
|
9618
|
+
{
|
|
9619
|
+
safe: true,
|
|
9620
|
+
class: "text-sm font-normal text-gray-500 dark:text-slate-400",
|
|
9621
|
+
title: `Version ${versionString}`,
|
|
9622
|
+
children: [
|
|
9623
|
+
"v",
|
|
9624
|
+
versionString
|
|
9625
|
+
]
|
|
9626
|
+
}
|
|
9627
|
+
) : null
|
|
9628
|
+
] }),
|
|
9629
|
+
/* @__PURE__ */ jsx("div", { class: "flex justify-center", children: /* @__PURE__ */ jsxs(
|
|
9630
|
+
"span",
|
|
9631
|
+
{
|
|
9632
|
+
"x-show": "hasUpdate",
|
|
9633
|
+
"x-cloak": true,
|
|
9634
|
+
class: "inline-flex items-center gap-2 rounded-full bg-amber-100 dark:bg-amber-500/20 px-3 py-1.5 text-sm font-medium text-amber-700 dark:text-amber-300 border border-amber-200 dark:border-amber-500/30",
|
|
9635
|
+
role: "status",
|
|
9636
|
+
"aria-live": "polite",
|
|
9637
|
+
children: [
|
|
9638
|
+
/* @__PURE__ */ jsx("span", { class: "flex h-4 w-4 items-center justify-center rounded-full bg-amber-500 text-amber-800 dark:text-amber-900 text-xs font-bold", children: "!" }),
|
|
9639
|
+
/* @__PURE__ */ jsx(
|
|
9640
|
+
"a",
|
|
9641
|
+
{
|
|
9642
|
+
"x-bind:href": "latestReleaseUrl",
|
|
9643
|
+
target: "_blank",
|
|
9644
|
+
rel: "noopener noreferrer",
|
|
9645
|
+
class: "hover:text-amber-800 dark:hover:text-amber-200 transition-colors",
|
|
9646
|
+
children: /* @__PURE__ */ jsx("span", { class: "mr-1", children: "Update available" })
|
|
9647
|
+
}
|
|
9648
|
+
)
|
|
9649
|
+
]
|
|
9650
|
+
}
|
|
9651
|
+
) })
|
|
9652
|
+
] })
|
|
9653
|
+
] })
|
|
9654
|
+
}
|
|
9655
|
+
),
|
|
9656
|
+
/* @__PURE__ */ jsx("div", { class: "container max-w-2xl mx-auto px-4 py-6", children: /* @__PURE__ */ jsx("main", { children }) }),
|
|
8980
9657
|
/* @__PURE__ */ jsx("script", { type: "module", src: "/assets/main.js" })
|
|
8981
9658
|
] })
|
|
8982
9659
|
] });
|
|
@@ -9069,7 +9746,7 @@ const VersionBadge = ({ version: version2 }) => {
|
|
|
9069
9746
|
if (!version2) {
|
|
9070
9747
|
return null;
|
|
9071
9748
|
}
|
|
9072
|
-
return /* @__PURE__ */ jsx("span", { class: "bg-
|
|
9749
|
+
return /* @__PURE__ */ jsx("span", { class: "bg-primary-100 text-primary-800 text-xs font-medium me-2 px-1.5 py-0.5 rounded dark:bg-primary-900 dark:text-primary-300", children: /* @__PURE__ */ jsx("span", { safe: true, children: version2 }) });
|
|
9073
9750
|
};
|
|
9074
9751
|
function getStatusClasses(status) {
|
|
9075
9752
|
const baseClasses = "px-1.5 py-0.5 text-xs font-medium rounded";
|
|
@@ -9478,7 +10155,7 @@ const ScrapeFormContent = ({
|
|
|
9478
10155
|
"x-model": "url",
|
|
9479
10156
|
"x-on:input": "checkUrlPath",
|
|
9480
10157
|
"x-on:paste": "$nextTick(() => checkUrlPath())",
|
|
9481
|
-
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10158
|
+
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9482
10159
|
}
|
|
9483
10160
|
),
|
|
9484
10161
|
/* @__PURE__ */ jsx(
|
|
@@ -9519,7 +10196,7 @@ const ScrapeFormContent = ({
|
|
|
9519
10196
|
name: "library",
|
|
9520
10197
|
id: "library",
|
|
9521
10198
|
required: true,
|
|
9522
|
-
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10199
|
+
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9523
10200
|
}
|
|
9524
10201
|
)
|
|
9525
10202
|
] }),
|
|
@@ -9541,7 +10218,7 @@ const ScrapeFormContent = ({
|
|
|
9541
10218
|
type: "text",
|
|
9542
10219
|
name: "version",
|
|
9543
10220
|
id: "version",
|
|
9544
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10221
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9545
10222
|
}
|
|
9546
10223
|
)
|
|
9547
10224
|
] }),
|
|
@@ -9568,7 +10245,7 @@ const ScrapeFormContent = ({
|
|
|
9568
10245
|
id: "maxPages",
|
|
9569
10246
|
min: "1",
|
|
9570
10247
|
placeholder: "1000",
|
|
9571
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10248
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9572
10249
|
}
|
|
9573
10250
|
)
|
|
9574
10251
|
] }),
|
|
@@ -9592,7 +10269,7 @@ const ScrapeFormContent = ({
|
|
|
9592
10269
|
id: "maxDepth",
|
|
9593
10270
|
min: "0",
|
|
9594
10271
|
placeholder: "3",
|
|
9595
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10272
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9596
10273
|
}
|
|
9597
10274
|
)
|
|
9598
10275
|
] }),
|
|
@@ -9625,7 +10302,7 @@ const ScrapeFormContent = ({
|
|
|
9625
10302
|
{
|
|
9626
10303
|
name: "scope",
|
|
9627
10304
|
id: "scope",
|
|
9628
|
-
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-
|
|
10305
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
9629
10306
|
children: [
|
|
9630
10307
|
/* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
|
|
9631
10308
|
/* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
|
|
@@ -9653,7 +10330,7 @@ const ScrapeFormContent = ({
|
|
|
9653
10330
|
id: "includePatterns",
|
|
9654
10331
|
rows: "2",
|
|
9655
10332
|
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
9656
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10333
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
9657
10334
|
}
|
|
9658
10335
|
)
|
|
9659
10336
|
] }),
|
|
@@ -9676,7 +10353,7 @@ const ScrapeFormContent = ({
|
|
|
9676
10353
|
id: "excludePatterns",
|
|
9677
10354
|
rows: "5",
|
|
9678
10355
|
safe: true,
|
|
9679
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-
|
|
10356
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
9680
10357
|
children: defaultExcludePatternsText
|
|
9681
10358
|
}
|
|
9682
10359
|
),
|
|
@@ -9708,7 +10385,7 @@ const ScrapeFormContent = ({
|
|
|
9708
10385
|
{
|
|
9709
10386
|
name: "scrapeMode",
|
|
9710
10387
|
id: "scrapeMode",
|
|
9711
|
-
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-
|
|
10388
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-primary-500 focus:border-primary-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
9712
10389
|
children: [
|
|
9713
10390
|
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
9714
10391
|
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
@@ -9767,7 +10444,7 @@ const ScrapeFormContent = ({
|
|
|
9767
10444
|
"button",
|
|
9768
10445
|
{
|
|
9769
10446
|
type: "button",
|
|
9770
|
-
class: "mt-1 px-2 py-0.5 bg-
|
|
10447
|
+
class: "mt-1 px-2 py-0.5 bg-primary-100 dark:bg-primary-900 text-primary-700 dark:text-primary-200 rounded text-xs",
|
|
9771
10448
|
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
9772
10449
|
children: "+ Add Header"
|
|
9773
10450
|
}
|
|
@@ -9782,7 +10459,7 @@ const ScrapeFormContent = ({
|
|
|
9782
10459
|
name: "followRedirects",
|
|
9783
10460
|
type: "checkbox",
|
|
9784
10461
|
checked: true,
|
|
9785
|
-
class: "h-4 w-4 text-
|
|
10462
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
9786
10463
|
}
|
|
9787
10464
|
),
|
|
9788
10465
|
/* @__PURE__ */ jsx(
|
|
@@ -9802,7 +10479,7 @@ const ScrapeFormContent = ({
|
|
|
9802
10479
|
name: "ignoreErrors",
|
|
9803
10480
|
type: "checkbox",
|
|
9804
10481
|
checked: true,
|
|
9805
|
-
class: "h-4 w-4 text-
|
|
10482
|
+
class: "h-4 w-4 text-primary-600 focus:ring-primary-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
9806
10483
|
}
|
|
9807
10484
|
),
|
|
9808
10485
|
/* @__PURE__ */ jsx(
|
|
@@ -9820,7 +10497,7 @@ const ScrapeFormContent = ({
|
|
|
9820
10497
|
"button",
|
|
9821
10498
|
{
|
|
9822
10499
|
type: "submit",
|
|
9823
|
-
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-
|
|
10500
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-primary-600 hover:bg-primary-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-primary-500",
|
|
9824
10501
|
children: "Queue Job"
|
|
9825
10502
|
}
|
|
9826
10503
|
) })
|
|
@@ -10037,6 +10714,7 @@ const VersionDetailsRow = ({
|
|
|
10037
10714
|
"span",
|
|
10038
10715
|
{
|
|
10039
10716
|
"x-show": `$store.confirmingAction.type === 'version-delete' && $store.confirmingAction.id === '${libraryName}:${versionParam}' && !$store.confirmingAction.isDeleting`,
|
|
10717
|
+
class: "mx-1",
|
|
10040
10718
|
children: [
|
|
10041
10719
|
"Confirm?",
|
|
10042
10720
|
/* @__PURE__ */ jsx("span", { class: "sr-only", children: "Confirm delete" })
|
|
@@ -10061,34 +10739,48 @@ const VersionDetailsRow = ({
|
|
|
10061
10739
|
)
|
|
10062
10740
|
);
|
|
10063
10741
|
};
|
|
10064
|
-
const LibraryDetailCard = ({ library }) =>
|
|
10065
|
-
|
|
10066
|
-
|
|
10067
|
-
|
|
10068
|
-
|
|
10069
|
-
|
|
10070
|
-
|
|
10071
|
-
|
|
10072
|
-
|
|
10073
|
-
progress: v.progress,
|
|
10074
|
-
counts: {
|
|
10075
|
-
documents: v.documentCount,
|
|
10076
|
-
uniqueUrls: v.uniqueUrlCount
|
|
10077
|
-
},
|
|
10078
|
-
indexedAt: v.indexedAt,
|
|
10079
|
-
sourceUrl: v.sourceUrl ?? void 0
|
|
10080
|
-
};
|
|
10081
|
-
return /* @__PURE__ */ jsx(
|
|
10082
|
-
VersionDetailsRow,
|
|
10742
|
+
const LibraryDetailCard = ({ library }) => {
|
|
10743
|
+
const versions = library.versions?.reverse() || [];
|
|
10744
|
+
const latestVersion = versions[0];
|
|
10745
|
+
return (
|
|
10746
|
+
// Use Flowbite Card structure with updated padding and border, and white background
|
|
10747
|
+
/* @__PURE__ */ jsxs("div", { class: "block p-4 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600 mb-4", children: [
|
|
10748
|
+
/* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white mb-1", children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name }) }),
|
|
10749
|
+
latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400", children: /* @__PURE__ */ jsx(
|
|
10750
|
+
"a",
|
|
10083
10751
|
{
|
|
10084
|
-
|
|
10085
|
-
|
|
10086
|
-
|
|
10752
|
+
href: latestVersion.sourceUrl,
|
|
10753
|
+
target: "_blank",
|
|
10754
|
+
class: "hover:underline",
|
|
10755
|
+
safe: true,
|
|
10756
|
+
children: latestVersion.sourceUrl
|
|
10087
10757
|
}
|
|
10088
|
-
)
|
|
10089
|
-
|
|
10090
|
-
|
|
10091
|
-
|
|
10758
|
+
) }) : null,
|
|
10759
|
+
/* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
|
|
10760
|
+
const adapted = {
|
|
10761
|
+
id: -1,
|
|
10762
|
+
ref: { library: library.name, version: v.version },
|
|
10763
|
+
status: v.status,
|
|
10764
|
+
progress: v.progress,
|
|
10765
|
+
counts: {
|
|
10766
|
+
documents: v.documentCount,
|
|
10767
|
+
uniqueUrls: v.uniqueUrlCount
|
|
10768
|
+
},
|
|
10769
|
+
indexedAt: v.indexedAt,
|
|
10770
|
+
sourceUrl: v.sourceUrl ?? void 0
|
|
10771
|
+
};
|
|
10772
|
+
return /* @__PURE__ */ jsx(
|
|
10773
|
+
VersionDetailsRow,
|
|
10774
|
+
{
|
|
10775
|
+
libraryName: library.name,
|
|
10776
|
+
version: adapted,
|
|
10777
|
+
showDelete: false
|
|
10778
|
+
}
|
|
10779
|
+
);
|
|
10780
|
+
}) : /* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." }) })
|
|
10781
|
+
] })
|
|
10782
|
+
);
|
|
10783
|
+
};
|
|
10092
10784
|
const LibrarySearchCard = ({ library }) => {
|
|
10093
10785
|
return /* @__PURE__ */ jsxs("div", { class: "block p-4 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600 mb-4", children: [
|
|
10094
10786
|
/* @__PURE__ */ jsxs("h2", { class: "text-xl font-semibold mb-2 text-gray-900 dark:text-white", safe: true, children: [
|
|
@@ -10251,37 +10943,51 @@ function registerLibraryDetailRoutes(server, listLibrariesTool, searchTool) {
|
|
|
10251
10943
|
}
|
|
10252
10944
|
);
|
|
10253
10945
|
}
|
|
10254
|
-
const LibraryItem = ({ library }) =>
|
|
10255
|
-
|
|
10256
|
-
|
|
10257
|
-
|
|
10258
|
-
|
|
10259
|
-
|
|
10260
|
-
|
|
10261
|
-
|
|
10262
|
-
|
|
10263
|
-
|
|
10264
|
-
|
|
10265
|
-
|
|
10266
|
-
|
|
10267
|
-
|
|
10268
|
-
|
|
10269
|
-
|
|
10270
|
-
|
|
10271
|
-
|
|
10272
|
-
|
|
10273
|
-
|
|
10274
|
-
|
|
10275
|
-
|
|
10276
|
-
|
|
10277
|
-
}
|
|
10278
|
-
|
|
10279
|
-
|
|
10280
|
-
|
|
10281
|
-
|
|
10282
|
-
|
|
10283
|
-
|
|
10284
|
-
|
|
10946
|
+
const LibraryItem = ({ library }) => {
|
|
10947
|
+
const versions = library.versions?.reverse() || [];
|
|
10948
|
+
const latestVersion = versions[0];
|
|
10949
|
+
return (
|
|
10950
|
+
// Use Flowbite Card structure with updated padding and border, and white background
|
|
10951
|
+
/* @__PURE__ */ jsxs("div", { class: "block px-4 py-2 bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-300 dark:border-gray-600", children: [
|
|
10952
|
+
/* @__PURE__ */ jsx("h3", { class: "text-lg font-medium text-gray-900 dark:text-white", children: /* @__PURE__ */ jsx(
|
|
10953
|
+
"a",
|
|
10954
|
+
{
|
|
10955
|
+
href: `/libraries/${encodeURIComponent(library.name)}`,
|
|
10956
|
+
class: "hover:underline",
|
|
10957
|
+
children: /* @__PURE__ */ jsx("span", { safe: true, children: library.name })
|
|
10958
|
+
}
|
|
10959
|
+
) }),
|
|
10960
|
+
latestVersion?.sourceUrl ? /* @__PURE__ */ jsx("div", { class: "text-sm text-gray-500 dark:text-gray-400", children: /* @__PURE__ */ jsx(
|
|
10961
|
+
"a",
|
|
10962
|
+
{
|
|
10963
|
+
href: latestVersion.sourceUrl,
|
|
10964
|
+
target: "_blank",
|
|
10965
|
+
class: "hover:underline",
|
|
10966
|
+
safe: true,
|
|
10967
|
+
children: latestVersion.sourceUrl
|
|
10968
|
+
}
|
|
10969
|
+
) }) : null,
|
|
10970
|
+
/* @__PURE__ */ jsx("div", { class: "mt-2", children: versions.length > 0 ? versions.map((v) => {
|
|
10971
|
+
const adapted = {
|
|
10972
|
+
id: -1,
|
|
10973
|
+
ref: { library: library.name, version: v.version },
|
|
10974
|
+
status: v.status,
|
|
10975
|
+
progress: v.progress,
|
|
10976
|
+
counts: {
|
|
10977
|
+
documents: v.documentCount,
|
|
10978
|
+
uniqueUrls: v.uniqueUrlCount
|
|
10979
|
+
},
|
|
10980
|
+
indexedAt: v.indexedAt,
|
|
10981
|
+
sourceUrl: v.sourceUrl ?? void 0
|
|
10982
|
+
};
|
|
10983
|
+
return /* @__PURE__ */ jsx(VersionDetailsRow, { libraryName: library.name, version: adapted });
|
|
10984
|
+
}) : (
|
|
10985
|
+
// Display message if no versions are indexed
|
|
10986
|
+
/* @__PURE__ */ jsx("p", { class: "text-sm text-gray-500 dark:text-gray-400 italic", children: "No versions indexed." })
|
|
10987
|
+
) })
|
|
10988
|
+
] })
|
|
10989
|
+
);
|
|
10990
|
+
};
|
|
10285
10991
|
const LibraryList = ({ libraries }) => {
|
|
10286
10992
|
return /* @__PURE__ */ jsx(Fragment, { children: /* @__PURE__ */ jsx("div", { class: "space-y-2", children: libraries.map((library) => /* @__PURE__ */ jsx(LibraryItem, { library })) }) });
|
|
10287
10993
|
};
|
|
@@ -13064,7 +13770,7 @@ async function fetchUrlAction(url, options) {
|
|
|
13064
13770
|
hasHeaders: options.header.length > 0
|
|
13065
13771
|
});
|
|
13066
13772
|
const headers = parseHeaders(options.header);
|
|
13067
|
-
const fetchUrlTool = new FetchUrlTool(new
|
|
13773
|
+
const fetchUrlTool = new FetchUrlTool(new AutoDetectFetcher());
|
|
13068
13774
|
const content = await fetchUrlTool.execute({
|
|
13069
13775
|
url,
|
|
13070
13776
|
followRedirects: options.followRedirects,
|