@zhafron/mcp-web-search 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 tickernelz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,140 @@
1
+ # mcp-web-search
2
+
3
+ MCP server: web search, Wikipedia summaries, and URL content extraction. No API keys required.
4
+
5
+ Version: 1.0.0
6
+
7
+ ## Features
8
+
9
+ - search_web - Two-tier web search (DuckDuckGo HTML / Puppeteer/Bing)
10
+ - fetch_url - Extract content from URLs with semantic truncation
11
+ - summarize_url - Fetch and summarize URL content
12
+ - wiki_get - Wikipedia summary by language
13
+ - wiki_multi - Wikipedia summaries in multiple languages
14
+
15
+ ## Requirements
16
+
17
+ - Node.js 18+
18
+ - Windows/macOS/Linux
19
+ - Chrome/Chromium (for deep search mode)
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ npm install
25
+ ```
26
+
27
+ ## Chrome Installation
28
+
29
+ | OS | Command |
30
+ |----|---------|
31
+ | Ubuntu/Debian | sudo apt install chromium-browser |
32
+ | Fedora | sudo dnf install chromium |
33
+ | Arch | sudo pacman -S chromium |
34
+ | macOS | brew install --cask google-chrome |
35
+
36
+ Custom path: `export CHROME_PATH=/path/to/chrome`
37
+
38
+ ## Commands
39
+
40
+ ```bash
41
+ npm run dev # Development
42
+ npm run build # Build
43
+ npm run start # Production
44
+ npm test # Run tests
45
+ npm run format # Format code
46
+ ```
47
+
48
+ ## Environment Variables
49
+
50
+ | Variable | Default | Description |
51
+ |----------|---------|-------------|
52
+ | USER_AGENT | mcp-web-search/1.0 | User agent string |
53
+ | HTTP_TIMEOUT | 15000 | Request timeout (ms) |
54
+ | MAX_RESULTS | 10 | Default search limit |
55
+ | LANG_DEFAULT | en | Default language |
56
+ | MAX_BYTES | 20971520 | Max download size |
57
+ | CHROME_PATH | auto-detect | Chrome executable path |
58
+
59
+ SSRF Protection: Blocks localhost, 127.0.0.1, ::1, .local domains.
60
+
61
+ ## Tool Reference
62
+
63
+ ### search_web
64
+
65
+ Two-tier web search.
66
+
67
+ Input: `{ q: string, limit?: number, lang?: string, mode?: "fast"|"deep"|"auto" }`
68
+
69
+ Output: `{ items: Array<{ title, url, snippet?, source }>, modeUsed, enginesUsed, escalated }`
70
+
71
+ Example: `{ "q": "Node.js LTS", "mode": "fast", "limit": 5 }`
72
+
73
+ ### fetch_url
74
+
75
+ Extract content with intelligent truncation.
76
+
77
+ Input: `{ url: string, mode?: "compact"|"standard"|"full", max_length?: number, format?: "markdown"|"text"|"html" }`
78
+
79
+ | Mode | Characters | Tokens | Use Case |
80
+ |------|------------|--------|----------|
81
+ | compact | ~3000 | ~750 | Quick summaries |
82
+ | standard | ~8000 | ~2000 | Balanced (default) |
83
+ | full | unlimited | - | Full content |
84
+
85
+ max_length: Exact character limit (1000-100000), overrides mode.
86
+
87
+ format: Output format (markdown, text, html). Default: markdown.
88
+
89
+ Truncation: Semantic chunking prioritizes headings, code blocks, conclusions.
90
+
91
+ Output: `{ markdown?, text?, format, url, title?, truncated?, original_length?, truncation_ratio? }`
92
+
93
+ Examples:
94
+ - `{ "url": "https://example.com", "mode": "compact" }`
95
+ - `{ "url": "https://example.com", "format": "text" }`
96
+ - `{ "url": "https://example.com", "format": "markdown" }`
97
+ - `{ "url": "https://example.com", "max_length": 5000 }`
98
+
99
+ ### summarize_url
100
+
101
+ Fetch and summarize URL content.
102
+
103
+ Input: `{ url: string }`
104
+
105
+ ### wiki_get
106
+
107
+ Wikipedia summary by language.
108
+
109
+ Input: `{ title: string, lang?: string }`
110
+
111
+ Output: `{ lang, title, url, description?, extract?, thumbnailUrl? }`
112
+
113
+ ### wiki_multi
114
+
115
+ Wikipedia summaries in multiple languages.
116
+
117
+ Input: `{ term: string, baseLang?: string, langs?: string[] }`
118
+
119
+ ## Quick Examples
120
+
121
+ ```
122
+ search_web: { "q": "App Intents", "mode": "deep", "limit": 5 }
123
+ fetch_url: { "url": "https://example.com", "mode": "compact" }
124
+ summarize_url: { "url": "https://python.org/pep-8" }
125
+ wiki_get: { "title": "Lambda calculus", "lang": "en" }
126
+ wiki_multi: { "term": "AI", "langs": ["en", "es", "fr"] }
127
+ ```
128
+
129
+ ## Troubleshooting
130
+
131
+ | Issue | Solution |
132
+ |-------|----------|
133
+ | Chrome not found | Install Chrome or set CHROME_PATH |
134
+ | CAPTCHA/blocks | Reduce frequency, use fast mode |
135
+ | Timeout | Increase HTTP_TIMEOUT, check MAX_BYTES |
136
+ | Blocked URL | SSRF protection, public URLs only |
137
+
138
+ ## License
139
+
140
+ MIT
@@ -0,0 +1,66 @@
1
+ import { existsSync } from "fs";
2
+ import { platform } from "os";
3
+ export class ChromeNotFoundError extends Error {
4
+ constructor(message) {
5
+ super(message);
6
+ this.name = "ChromeNotFoundError";
7
+ }
8
+ }
9
+ function getDefaultChromePaths() {
10
+ const plat = platform();
11
+ if (plat === "win32") {
12
+ return [
13
+ process.env.LOCALAPPDATA + "\\Google\\Chrome\\Application\\chrome.exe",
14
+ process.env.PROGRAMFILES + "\\Google\\Chrome\\Application\\chrome.exe",
15
+ process.env["PROGRAMFILES(X86)"] + "\\Google\\Chrome\\Application\\chrome.exe",
16
+ process.env.LOCALAPPDATA + "\\Chromium\\Application\\chrome.exe",
17
+ process.env.PROGRAMFILES + "\\Chromium\\Application\\chrome.exe",
18
+ process.env["PROGRAMFILES(X86)"] + "\\Chromium\\Application\\chrome.exe"
19
+ ].filter(Boolean);
20
+ }
21
+ if (plat === "darwin") {
22
+ return [
23
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
24
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
25
+ process.env.HOME + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
26
+ process.env.HOME + "/Applications/Chromium.app/Contents/MacOS/Chromium"
27
+ ].filter(Boolean);
28
+ }
29
+ return [
30
+ "/usr/bin/google-chrome",
31
+ "/usr/bin/google-chrome-stable",
32
+ "/usr/bin/chromium",
33
+ "/usr/bin/chromium-browser",
34
+ "/snap/bin/chromium",
35
+ "/usr/local/bin/chrome",
36
+ "/usr/local/bin/chromium"
37
+ ];
38
+ }
39
+ export function findChrome() {
40
+ if (process.env.CHROME_PATH) {
41
+ if (existsSync(process.env.CHROME_PATH)) {
42
+ return process.env.CHROME_PATH;
43
+ }
44
+ throw new ChromeNotFoundError(`Chrome not found at CHROME_PATH: ${process.env.CHROME_PATH}`);
45
+ }
46
+ const paths = getDefaultChromePaths();
47
+ for (const path of paths) {
48
+ if (existsSync(path)) {
49
+ return path;
50
+ }
51
+ }
52
+ const plat = platform();
53
+ let installInstructions = "";
54
+ if (plat === "win32") {
55
+ installInstructions = "Download from: https://www.google.com/chrome/";
56
+ }
57
+ else if (plat === "darwin") {
58
+ installInstructions =
59
+ "Install via: brew install --cask google-chrome\nOr download from: https://www.google.com/chrome/";
60
+ }
61
+ else {
62
+ installInstructions =
63
+ "Install via:\n Ubuntu/Debian: sudo apt install chromium-browser\n Fedora: sudo dnf install chromium\n Arch: sudo pacman -S chromium\nOr download from: https://www.google.com/chrome/";
64
+ }
65
+ throw new ChromeNotFoundError(`Chrome/Chromium not found on system.\n\n${installInstructions}\n\nAlternatively, set CHROME_PATH environment variable to your Chrome executable.`);
66
+ }
@@ -0,0 +1,164 @@
1
+ import { JSDOM } from "jsdom";
2
+ import puppeteer from "puppeteer-core";
3
+ import { findChrome } from "./chrome.js";
4
+ function uaHeaders(lang = process.env.LANG_DEFAULT || "en") {
5
+ const ua = process.env.USER_AGENT || "mcp-web-search/1.0";
6
+ const acceptLang = lang === "en" ? "en-US,en;q=0.9" : `${lang};q=0.9,en;q=0.8`;
7
+ return { "User-Agent": ua, "Accept-Language": acceptLang };
8
+ }
9
+ function toMs(env, def) {
10
+ const n = Number(env);
11
+ return Number.isFinite(n) && n > 0 ? n : def;
12
+ }
13
+ async function fetchWithTimeout(input, init = {}, timeoutMs = 15000) {
14
+ const controller = new AbortController();
15
+ const t = setTimeout(() => controller.abort(), timeoutMs);
16
+ try {
17
+ return await fetch(input, { ...init, signal: controller.signal });
18
+ }
19
+ finally {
20
+ clearTimeout(t);
21
+ }
22
+ }
23
+ const HTTP_TIMEOUT = toMs(process.env.HTTP_TIMEOUT, 15000);
24
+ function decodeDuckDuckGoRedirect(href) {
25
+ try {
26
+ const u = new URL(href, "https://duckduckgo.com/");
27
+ if (u.hostname === "duckduckgo.com" && u.pathname.startsWith("/l/")) {
28
+ const real = u.searchParams.get("uddg");
29
+ if (real)
30
+ return decodeURIComponent(real);
31
+ }
32
+ return u.toString();
33
+ }
34
+ catch {
35
+ return href;
36
+ }
37
+ }
38
+ async function ddgHtmlSearch(q, limit, lang) {
39
+ const url = new URL("https://html.duckduckgo.com/html/");
40
+ url.searchParams.set("q", q);
41
+ const res = await fetchWithTimeout(url, { headers: uaHeaders(lang) }, HTTP_TIMEOUT);
42
+ if (!res.ok)
43
+ throw new Error(`DuckDuckGo HTML ${res.status}`);
44
+ const html = await res.text();
45
+ const dom = new JSDOM(html, { url: "https://duckduckgo.com/?q=" + encodeURIComponent(q) });
46
+ const doc = dom.window.document;
47
+ const anchors = Array.from(doc.querySelectorAll("a.result__a"));
48
+ const snippets = Array.from(doc.querySelectorAll(".result__snippet"));
49
+ const items = [];
50
+ for (let i = 0; i < anchors.length && items.length < limit; i++) {
51
+ const a = anchors[i];
52
+ const title = (a.textContent || "").trim();
53
+ const href = decodeDuckDuckGoRedirect(a.getAttribute("href") || "");
54
+ if (!title || !href)
55
+ continue;
56
+ const sn = (snippets[i]?.textContent || "").trim() || undefined;
57
+ try {
58
+ const u = new URL(href);
59
+ items.push({ title, url: u.toString(), snippet: sn, source: "ddg_html" });
60
+ }
61
+ catch { }
62
+ }
63
+ return items;
64
+ }
65
+ async function bingPuppeteerSearch(q, limit, lang) {
66
+ const chromePath = findChrome();
67
+ const browser = await puppeteer.launch({
68
+ executablePath: chromePath,
69
+ headless: true,
70
+ args: [
71
+ "--no-sandbox",
72
+ "--disable-setuid-sandbox",
73
+ "--disable-dev-shm-usage",
74
+ "--disable-accelerated-2d-canvas",
75
+ "--no-first-run",
76
+ "--no-zygote",
77
+ "--disable-gpu"
78
+ ]
79
+ });
80
+ try {
81
+ const page = await browser.newPage();
82
+ await page.setUserAgent((process.env.USER_AGENT || "mcp-web-search/1.0") + " Puppeteer");
83
+ await page.setExtraHTTPHeaders({
84
+ "Accept-Language": lang === "en" ? "en-US,en;q=0.9" : `${lang};q=0.9,en;q=0.8`
85
+ });
86
+ const url = new URL("https://www.bing.com/search");
87
+ url.searchParams.set("q", q);
88
+ if (lang)
89
+ url.searchParams.set("setlang", lang);
90
+ await page.goto(url.toString(), { waitUntil: "domcontentloaded", timeout: 30000 });
91
+ const results = await page.evaluate(maxResults => {
92
+ const items = [];
93
+ const cards = document.querySelectorAll("li.b_algo");
94
+ for (const card of Array.from(cards)) {
95
+ const anchor = card.querySelector("h2 a");
96
+ if (!anchor)
97
+ continue;
98
+ const title = anchor.textContent?.trim() || "";
99
+ const href = anchor.getAttribute("href");
100
+ if (!href || !title)
101
+ continue;
102
+ let snippet = "";
103
+ const captionP = card.querySelector("div.b_caption p");
104
+ if (captionP) {
105
+ snippet = captionP.textContent?.trim() || "";
106
+ }
107
+ else {
108
+ const snippetDiv = card.querySelector("div.b_snippet");
109
+ if (snippetDiv) {
110
+ snippet = snippetDiv.textContent?.trim() || "";
111
+ }
112
+ }
113
+ try {
114
+ new URL(href);
115
+ items.push({ title, url: href, snippet: snippet || undefined });
116
+ }
117
+ catch { }
118
+ if (items.length >= maxResults)
119
+ break;
120
+ }
121
+ return items;
122
+ }, limit);
123
+ return results.map(r => ({ ...r, source: "bing_puppeteer" }));
124
+ }
125
+ finally {
126
+ await browser.close();
127
+ }
128
+ }
129
+ export async function runTwoTierSearch(opts) {
130
+ const { q } = opts;
131
+ const limit = Math.max(1, Math.min(Number(opts.limit ?? (Number(process.env.MAX_RESULTS) || 10)), 50));
132
+ const lang = opts.lang ?? (process.env.LANG_DEFAULT || "en");
133
+ const mode = opts.mode ?? "auto";
134
+ const enginesUsed = [];
135
+ const diagnostics = {};
136
+ if (mode === "fast") {
137
+ const fast = await ddgHtmlSearch(q, limit, lang);
138
+ enginesUsed.push("ddg_html");
139
+ diagnostics["fastCount"] = fast.length;
140
+ return { items: fast, modeUsed: "fast", enginesUsed, escalated: false, diagnostics };
141
+ }
142
+ if (mode === "deep") {
143
+ const deep = await bingPuppeteerSearch(q, limit, lang);
144
+ enginesUsed.push("bing_puppeteer");
145
+ diagnostics["deepCount"] = deep.length;
146
+ return { items: deep, modeUsed: "deep", enginesUsed, escalated: false, diagnostics };
147
+ }
148
+ const fast = await ddgHtmlSearch(q, limit, lang);
149
+ enginesUsed.push("ddg_html");
150
+ diagnostics["fastCount"] = fast.length;
151
+ if (fast.length < Math.min(3, limit)) {
152
+ const deep = await bingPuppeteerSearch(q, limit, lang);
153
+ enginesUsed.push("bing_puppeteer");
154
+ diagnostics["deepCount"] = deep.length;
155
+ return {
156
+ items: [...fast, ...deep].slice(0, limit),
157
+ modeUsed: "auto",
158
+ enginesUsed,
159
+ escalated: true,
160
+ diagnostics
161
+ };
162
+ }
163
+ return { items: fast, modeUsed: "auto", enginesUsed, escalated: false, diagnostics };
164
+ }
@@ -0,0 +1,187 @@
1
+ import { JSDOM } from "jsdom";
2
+ import { Readability } from "@mozilla/readability";
3
+ import { extractWithReadabilityAlt } from "./extractors/readability-alt.js";
4
+ import { htmlToMarkdown } from "./extractors/markdown.js";
5
+ import { applySmartTruncation } from "./extractors/truncation.js";
6
+ function uaHeaders() {
7
+ const ua = process.env.USER_AGENT || "mcp-web-search/1.0";
8
+ const lang = process.env.LANG_DEFAULT || "en";
9
+ const accept = lang === "en" ? "en-US,en;q=0.9" : `${lang};q=0.9,en;q=0.8`;
10
+ return { "User-Agent": ua, "Accept-Language": accept };
11
+ }
12
+ function toMs(env, def) {
13
+ const n = Number(env);
14
+ return Number.isFinite(n) && n > 0 ? n : def;
15
+ }
16
+ async function fetchWithTimeout(input, init = {}, timeoutMs = 15000) {
17
+ const controller = new AbortController();
18
+ const t = setTimeout(() => controller.abort(), timeoutMs);
19
+ try {
20
+ return await fetch(input, { ...init, signal: controller.signal });
21
+ }
22
+ finally {
23
+ clearTimeout(t);
24
+ }
25
+ }
26
+ const HTTP_TIMEOUT = toMs(process.env.HTTP_TIMEOUT, 15000);
27
+ const MAX_BYTES = toMs(process.env.MAX_BYTES, 20 * 1024 * 1024);
28
+ function isBlockedHost(hostname) {
29
+ const lower = hostname.toLowerCase();
30
+ if (lower === "localhost" || lower === "127.0.0.1" || lower === "::1")
31
+ return true;
32
+ if (lower.endsWith(".local") || lower.endsWith(".localhost"))
33
+ return true;
34
+ return false;
35
+ }
36
+ function fallbackExtraction(html, url) {
37
+ try {
38
+ const dom = new JSDOM(html, { url });
39
+ const reader = new Readability(dom.window.document);
40
+ const article = reader.parse();
41
+ if (article) {
42
+ return {
43
+ title: article.title ?? undefined,
44
+ byline: article.byline ?? undefined,
45
+ siteName: article.siteName ?? undefined,
46
+ text: article.textContent ?? ""
47
+ };
48
+ }
49
+ const text = dom.window.document.body.textContent || "";
50
+ return { text, title: dom.window.document.title };
51
+ }
52
+ catch {
53
+ return { text: "" };
54
+ }
55
+ }
56
+ export async function fetchAndExtract(url, options) {
57
+ const u = new URL(url);
58
+ if (isBlockedHost(u.hostname)) {
59
+ throw new Error("Blocked localhost/private URL");
60
+ }
61
+ const res = await fetchWithTimeout(u.toString(), { redirect: "follow", headers: uaHeaders() }, HTTP_TIMEOUT);
62
+ if (!res.ok)
63
+ throw new Error(`Fetch ${res.status} for ${url}`);
64
+ const lenHeader = res.headers.get("content-length");
65
+ const len = Number(lenHeader || "0");
66
+ if (len > 0 && len > MAX_BYTES)
67
+ throw new Error(`Content too large: ${len} bytes`);
68
+ const ct = res.headers.get("content-type") || "";
69
+ const buf = Buffer.from(await res.arrayBuffer());
70
+ if (buf.byteLength > MAX_BYTES)
71
+ throw new Error(`Content too large (downloaded)`);
72
+ if (ct.includes("application/pdf") || u.pathname.toLowerCase().endsWith(".pdf")) {
73
+ const pdfParse = (await import("pdf-parse")).default;
74
+ const data = await pdfParse(buf);
75
+ const text = data.text || "";
76
+ const truncationResult = applySmartTruncation(text, "text", options);
77
+ return {
78
+ text: truncationResult.content,
79
+ url,
80
+ title: data.info?.Title,
81
+ length: data.numpages,
82
+ format: "text",
83
+ truncated: truncationResult.truncated,
84
+ original_length: truncationResult.original_length,
85
+ truncation_ratio: truncationResult.truncated
86
+ ? truncationResult.final_length / truncationResult.original_length
87
+ : undefined
88
+ };
89
+ }
90
+ const html = buf.toString("utf8");
91
+ const extracted = extractWithReadabilityAlt(html, url);
92
+ const requestedFormat = options?.format || "markdown";
93
+ const shouldReturnMarkdown = requestedFormat === "markdown";
94
+ const shouldReturnText = requestedFormat === "text";
95
+ const shouldReturnHtml = requestedFormat === "html";
96
+ if (extracted && extracted.textContent && extracted.textContent.length > 0) {
97
+ const markdown = htmlToMarkdown(extracted.content);
98
+ if (shouldReturnMarkdown && markdown) {
99
+ const truncationResult = applySmartTruncation(markdown, "markdown", options);
100
+ return {
101
+ title: extracted.title || undefined,
102
+ markdown: truncationResult.content,
103
+ url,
104
+ length: extracted.length,
105
+ format: "markdown",
106
+ truncated: truncationResult.truncated,
107
+ original_length: truncationResult.original_length,
108
+ truncation_ratio: truncationResult.truncated
109
+ ? truncationResult.final_length / truncationResult.original_length
110
+ : undefined
111
+ };
112
+ }
113
+ if (shouldReturnText) {
114
+ const truncationResult = applySmartTruncation(extracted.textContent, "text", options);
115
+ return {
116
+ title: extracted.title || undefined,
117
+ text: truncationResult.content,
118
+ url,
119
+ length: extracted.length,
120
+ format: "text",
121
+ truncated: truncationResult.truncated,
122
+ original_length: truncationResult.original_length,
123
+ truncation_ratio: truncationResult.truncated
124
+ ? truncationResult.final_length / truncationResult.original_length
125
+ : undefined
126
+ };
127
+ }
128
+ if (shouldReturnHtml && extracted.content) {
129
+ const truncationResult = applySmartTruncation(extracted.content, "markdown", options);
130
+ return {
131
+ title: extracted.title || undefined,
132
+ markdown: truncationResult.content,
133
+ url,
134
+ length: extracted.length,
135
+ format: "markdown",
136
+ truncated: truncationResult.truncated,
137
+ original_length: truncationResult.original_length,
138
+ truncation_ratio: truncationResult.truncated
139
+ ? truncationResult.final_length / truncationResult.original_length
140
+ : undefined
141
+ };
142
+ }
143
+ if (markdown) {
144
+ const truncationResult = applySmartTruncation(markdown, "markdown", options);
145
+ return {
146
+ title: extracted.title || undefined,
147
+ markdown: truncationResult.content,
148
+ url,
149
+ length: extracted.length,
150
+ format: "markdown",
151
+ truncated: truncationResult.truncated,
152
+ original_length: truncationResult.original_length,
153
+ truncation_ratio: truncationResult.truncated
154
+ ? truncationResult.final_length / truncationResult.original_length
155
+ : undefined
156
+ };
157
+ }
158
+ const truncationResult = applySmartTruncation(extracted.textContent, "text", options);
159
+ return {
160
+ title: extracted.title || undefined,
161
+ text: truncationResult.content,
162
+ url,
163
+ length: extracted.length,
164
+ format: "text",
165
+ truncated: truncationResult.truncated,
166
+ original_length: truncationResult.original_length,
167
+ truncation_ratio: truncationResult.truncated
168
+ ? truncationResult.final_length / truncationResult.original_length
169
+ : undefined
170
+ };
171
+ }
172
+ const fallback = fallbackExtraction(html, url);
173
+ const truncationResult = applySmartTruncation(fallback.text, "text", options);
174
+ return {
175
+ title: fallback.title,
176
+ byline: fallback.byline,
177
+ siteName: fallback.siteName,
178
+ text: truncationResult.content,
179
+ url,
180
+ format: "text",
181
+ truncated: truncationResult.truncated,
182
+ original_length: truncationResult.original_length,
183
+ truncation_ratio: truncationResult.truncated
184
+ ? truncationResult.final_length / truncationResult.original_length
185
+ : undefined
186
+ };
187
+ }
@@ -0,0 +1,40 @@
1
+ import TurndownService from "turndown";
2
+ const turndownService = new TurndownService({
3
+ headingStyle: "atx",
4
+ codeBlockStyle: "fenced",
5
+ bulletListMarker: "-",
6
+ emDelimiter: "*",
7
+ strongDelimiter: "**",
8
+ linkStyle: "inlined"
9
+ });
10
+ turndownService.addRule("removeEmptyElements", {
11
+ filter: (node) => {
12
+ return node.textContent?.trim() === "" && !["IMG", "BR", "HR"].includes(node.nodeName);
13
+ },
14
+ replacement: () => ""
15
+ });
16
+ turndownService.addRule("preserveCodeBlocks", {
17
+ filter: ["pre", "code"],
18
+ replacement: (content, node) => {
19
+ if (node.nodeName === "PRE") {
20
+ const code = node.querySelector("code");
21
+ const lang = code?.className.match(/language-(\w+)/)?.[1] || "";
22
+ return `\n\`\`\`${lang}\n${content}\n\`\`\`\n`;
23
+ }
24
+ return `\`${content}\``;
25
+ }
26
+ });
27
+ export function htmlToMarkdown(html) {
28
+ try {
29
+ if (!html || html.trim().length === 0)
30
+ return null;
31
+ const markdown = turndownService.turndown(html);
32
+ if (!markdown || markdown.trim().length === 0)
33
+ return null;
34
+ return markdown.trim();
35
+ }
36
+ catch (error) {
37
+ console.error("Markdown conversion failed:", error);
38
+ return null;
39
+ }
40
+ }
@@ -0,0 +1,110 @@
1
+ import { JSDOM } from "jsdom";
2
+ import { DEFAULT_CONFIG } from "./types.js";
3
+ function scoreNode(node, config) {
4
+ let score = 0;
5
+ const text = node.textContent || "";
6
+ const textLength = text.trim().length;
7
+ if (textLength < config.minTextLength)
8
+ return 0;
9
+ score += textLength * 0.1;
10
+ const tagName = node.tagName.toLowerCase();
11
+ const boost = config.tagBoosts[tagName] || 1.0;
12
+ score *= boost;
13
+ const className = node.className || "";
14
+ const id = node.id || "";
15
+ if (config.ignoreClasses.test(className) || config.ignoreClasses.test(id)) {
16
+ return 0;
17
+ }
18
+ const density = textLength / (node.children.length + 1);
19
+ score += density * 0.5;
20
+ const pCount = node.querySelectorAll("p").length;
21
+ score += pCount * 5;
22
+ return score;
23
+ }
24
+ function cleanNode(node) {
25
+ const toRemove = ["script", "style", "noscript", "iframe", "object", "embed"];
26
+ toRemove.forEach(tag => {
27
+ node.querySelectorAll(tag).forEach(el => el.remove());
28
+ });
29
+ node.querySelectorAll("*").forEach(el => {
30
+ const className = el.className || "";
31
+ const id = el.id || "";
32
+ if (/ads|advertisement|sponsor|promo/i.test(className) || /ads|advertisement/i.test(id)) {
33
+ el.remove();
34
+ }
35
+ });
36
+ }
37
+ function extractTextContent(node) {
38
+ let text = "";
39
+ function traverse(el) {
40
+ if (el.nodeType === 3) {
41
+ const content = el.textContent?.trim();
42
+ if (content)
43
+ text += content + " ";
44
+ }
45
+ else if (el.nodeType === 1) {
46
+ const element = el;
47
+ if (["P", "DIV", "BR", "H1", "H2", "H3", "H4", "H5", "H6"].includes(element.tagName)) {
48
+ text += "\n";
49
+ }
50
+ element.childNodes.forEach(child => traverse(child));
51
+ }
52
+ }
53
+ traverse(node);
54
+ return text.replace(/\s+/g, " ").trim();
55
+ }
56
+ export function extractWithReadabilityAlt(html, url, config = DEFAULT_CONFIG) {
57
+ try {
58
+ const dom = new JSDOM(html, { url });
59
+ const doc = dom.window.document;
60
+ const title = doc.title || "";
61
+ const candidates = Array.from(doc.querySelectorAll("article, section, main, div, [role='main']"));
62
+ if (candidates.length === 0) {
63
+ const body = doc.body;
64
+ if (!body)
65
+ return null;
66
+ cleanNode(body);
67
+ const textContent = extractTextContent(body);
68
+ return {
69
+ title,
70
+ textContent,
71
+ content: body.innerHTML,
72
+ length: textContent.length
73
+ };
74
+ }
75
+ let bestNode = null;
76
+ let bestScore = 0;
77
+ for (const candidate of candidates) {
78
+ const score = scoreNode(candidate, config);
79
+ if (score > bestScore) {
80
+ bestScore = score;
81
+ bestNode = candidate;
82
+ }
83
+ }
84
+ if (!bestNode || bestScore === 0) {
85
+ const body = doc.body;
86
+ if (!body)
87
+ return null;
88
+ cleanNode(body);
89
+ const textContent = extractTextContent(body);
90
+ return {
91
+ title,
92
+ textContent,
93
+ content: body.innerHTML,
94
+ length: textContent.length
95
+ };
96
+ }
97
+ cleanNode(bestNode);
98
+ const textContent = extractTextContent(bestNode);
99
+ return {
100
+ title,
101
+ textContent,
102
+ content: bestNode.innerHTML,
103
+ length: textContent.length
104
+ };
105
+ }
106
+ catch (error) {
107
+ console.error("Content extraction failed:", error);
108
+ return null;
109
+ }
110
+ }
@@ -0,0 +1,246 @@
1
+ const MODE_LIMITS = {
2
+ compact: 3000,
3
+ standard: 8000,
4
+ full: Infinity
5
+ };
6
+ const KEYWORDS = [
7
+ "summary",
8
+ "conclusion",
9
+ "important",
10
+ "overview",
11
+ "introduction",
12
+ "key",
13
+ "main",
14
+ "abstract"
15
+ ];
16
+ export function applySmartTruncation(content, format, options) {
17
+ const mode = options?.mode || "standard";
18
+ const maxLength = options?.max_length || MODE_LIMITS[mode] || MODE_LIMITS.standard;
19
+ if (maxLength === Infinity || content.length <= maxLength) {
20
+ return {
21
+ content,
22
+ truncated: false,
23
+ original_length: content.length,
24
+ final_length: content.length
25
+ };
26
+ }
27
+ if (format === "markdown") {
28
+ return truncateMarkdown(content, maxLength);
29
+ }
30
+ else {
31
+ return truncateText(content, maxLength);
32
+ }
33
+ }
34
+ export function truncateMarkdown(content, maxLength) {
35
+ const chunks = parseMarkdownChunks(content);
36
+ if (chunks.length === 0) {
37
+ return {
38
+ content: balancedTruncate(content, maxLength),
39
+ truncated: true,
40
+ original_length: content.length,
41
+ final_length: Math.min(content.length, maxLength)
42
+ };
43
+ }
44
+ chunks.forEach((chunk, idx) => {
45
+ chunk.score = scoreChunk(chunk, chunks.length);
46
+ chunk.position = idx;
47
+ });
48
+ const selected = selectChunks(chunks, maxLength);
49
+ const assembled = assembleChunks(selected);
50
+ return {
51
+ content: assembled,
52
+ truncated: true,
53
+ original_length: content.length,
54
+ final_length: assembled.length,
55
+ chunks_selected: selected.length,
56
+ chunks_total: chunks.length
57
+ };
58
+ }
59
+ export function truncateText(content, maxLength) {
60
+ const chunks = parseSentences(content);
61
+ if (chunks.length === 0) {
62
+ return {
63
+ content: balancedTruncate(content, maxLength),
64
+ truncated: true,
65
+ original_length: content.length,
66
+ final_length: Math.min(content.length, maxLength)
67
+ };
68
+ }
69
+ chunks.forEach((chunk, idx) => {
70
+ chunk.score = scoreChunk(chunk, chunks.length);
71
+ chunk.position = idx;
72
+ });
73
+ const selected = selectChunks(chunks, maxLength);
74
+ const assembled = assembleChunks(selected);
75
+ return {
76
+ content: assembled,
77
+ truncated: true,
78
+ original_length: content.length,
79
+ final_length: assembled.length,
80
+ chunks_selected: selected.length,
81
+ chunks_total: chunks.length
82
+ };
83
+ }
84
+ export function parseMarkdownChunks(markdown) {
85
+ const chunks = [];
86
+ const lines = markdown.split("\n");
87
+ let currentChunk = [];
88
+ let currentType = "paragraph";
89
+ let inCodeBlock = false;
90
+ const flushChunk = () => {
91
+ if (currentChunk.length > 0) {
92
+ const content = currentChunk.join("\n").trim();
93
+ if (content) {
94
+ chunks.push({
95
+ content,
96
+ type: currentType,
97
+ position: 0,
98
+ score: 0,
99
+ length: content.length
100
+ });
101
+ }
102
+ currentChunk = [];
103
+ }
104
+ };
105
+ for (const line of lines) {
106
+ if (line.startsWith("```")) {
107
+ if (inCodeBlock) {
108
+ currentChunk.push(line);
109
+ flushChunk();
110
+ inCodeBlock = false;
111
+ currentType = "paragraph";
112
+ }
113
+ else {
114
+ flushChunk();
115
+ inCodeBlock = true;
116
+ currentType = "code";
117
+ currentChunk.push(line);
118
+ }
119
+ continue;
120
+ }
121
+ if (inCodeBlock) {
122
+ currentChunk.push(line);
123
+ continue;
124
+ }
125
+ if (line.match(/^#{1,6}\s/)) {
126
+ flushChunk();
127
+ currentType = "heading";
128
+ currentChunk.push(line);
129
+ flushChunk();
130
+ currentType = "paragraph";
131
+ }
132
+ else if (line.match(/^[\s]*[-*+]\s/) || line.match(/^[\s]*\d+\.\s/)) {
133
+ if (currentType !== "list") {
134
+ flushChunk();
135
+ currentType = "list";
136
+ }
137
+ currentChunk.push(line);
138
+ }
139
+ else if (line.trim() === "") {
140
+ if (currentType === "list") {
141
+ flushChunk();
142
+ currentType = "paragraph";
143
+ }
144
+ }
145
+ else {
146
+ if (currentType === "list") {
147
+ flushChunk();
148
+ currentType = "paragraph";
149
+ }
150
+ currentChunk.push(line);
151
+ }
152
+ }
153
+ flushChunk();
154
+ return chunks;
155
+ }
156
+ export function parseSentences(text) {
157
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [];
158
+ return sentences.map(sentence => ({
159
+ content: sentence.trim(),
160
+ type: "text",
161
+ position: 0,
162
+ score: 0,
163
+ length: sentence.trim().length
164
+ }));
165
+ }
166
+ export function scoreChunk(chunk, totalChunks) {
167
+ let score = 0;
168
+ if (chunk.type === "heading") {
169
+ score += 20;
170
+ }
171
+ else if (chunk.type === "code") {
172
+ score += 10;
173
+ }
174
+ else if (chunk.type === "list") {
175
+ score += 5;
176
+ }
177
+ const positionRatio = chunk.position / Math.max(totalChunks - 1, 1);
178
+ if (positionRatio <= 0.15) {
179
+ score += 15;
180
+ }
181
+ else if (positionRatio >= 0.85) {
182
+ score += 12;
183
+ }
184
+ if (chunk.length >= 100 && chunk.length <= 1000) {
185
+ score += 5;
186
+ }
187
+ else if (chunk.length > 300) {
188
+ score += 3;
189
+ }
190
+ const lowerContent = chunk.content.toLowerCase();
191
+ for (const keyword of KEYWORDS) {
192
+ if (lowerContent.includes(keyword)) {
193
+ score += 5;
194
+ }
195
+ }
196
+ if (chunk.length < 50) {
197
+ score -= 10;
198
+ }
199
+ return score;
200
+ }
201
+ export function selectChunks(chunks, maxLength) {
202
+ const selected = [];
203
+ let currentLength = 0;
204
+ const firstHeading = chunks.find(c => c.type === "heading");
205
+ if (firstHeading) {
206
+ selected.push(firstHeading);
207
+ currentLength += firstHeading.length + 5;
208
+ }
209
+ const sortedChunks = [...chunks]
210
+ .filter(c => !selected.includes(c))
211
+ .sort((a, b) => b.score - a.score);
212
+ for (const chunk of sortedChunks) {
213
+ const chunkLength = chunk.length + 5;
214
+ if (currentLength + chunkLength <= maxLength) {
215
+ selected.push(chunk);
216
+ currentLength += chunkLength;
217
+ }
218
+ }
219
+ return selected.sort((a, b) => a.position - b.position);
220
+ }
221
+ export function assembleChunks(chunks) {
222
+ if (chunks.length === 0)
223
+ return "";
224
+ const parts = [];
225
+ let lastPosition = -1;
226
+ for (const chunk of chunks) {
227
+ if (lastPosition >= 0 && chunk.position > lastPosition + 1) {
228
+ parts.push("[...]");
229
+ }
230
+ parts.push(chunk.content);
231
+ lastPosition = chunk.position;
232
+ }
233
+ return parts.join("\n\n");
234
+ }
235
+ export function balancedTruncate(text, maxLength) {
236
+ if (text.length <= maxLength)
237
+ return text;
238
+ const startLen = Math.floor(maxLength * 0.4);
239
+ const middleLen = Math.floor(maxLength * 0.3);
240
+ const endLen = maxLength - startLen - middleLen - 10;
241
+ const start = text.slice(0, startLen);
242
+ const middleStart = Math.floor((text.length - middleLen) / 2);
243
+ const middle = text.slice(middleStart, middleStart + middleLen);
244
+ const end = text.slice(-endLen);
245
+ return `${start}\n[...]\n${middle}\n[...]\n${end}`;
246
+ }
@@ -0,0 +1,7 @@
1
+ export const DEFAULT_CONFIG = {
2
+ ignoreClasses: /nav|sidebar|ads|advertisement|footer|header|menu|comment/i,
3
+ minTextLength: 50,
4
+ columnMinText: 30,
5
+ columnThreshold: 0.25,
6
+ tagBoosts: { article: 1.7, main: 1.5, section: 1.3 }
7
+ };
@@ -0,0 +1,92 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+ import { runTwoTierSearch } from "./engines.js";
5
+ import { fetchAndExtract } from "./extract.js";
6
+ const toInt = (v, def) => {
7
+ const n = Number(v);
8
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : def;
9
+ };
10
+ const DEFAULT_LIMIT = toInt(process.env.MAX_RESULTS, 10);
11
+ const server = new McpServer({ name: "mcp-web-search", version: "1.0.0" });
12
+ server.registerTool("search_web", {
13
+ title: "Web Search (Fast: DuckDuckGo, Deep: Puppeteer/Bing)",
14
+ description: "Two-tier web search: runs fast DuckDuckGo HTML search by default, escalates to Puppeteer/Bing if results are insufficient. No API keys required.",
15
+ inputSchema: {
16
+ q: z.string(),
17
+ limit: z.number().int().min(1).max(50).default(DEFAULT_LIMIT).optional(),
18
+ lang: z.string().default("en").optional(),
19
+ mode: z.enum(["fast", "deep", "auto"]).default("auto").optional()
20
+ }
21
+ }, async ({ q, limit = DEFAULT_LIMIT, lang = "en", mode = "auto" }) => {
22
+ const res = await runTwoTierSearch({ q, limit: Math.min(Math.max(1, limit), 50), lang, mode });
23
+ const payload = { ...res, items: res.items.slice(0, limit) };
24
+ return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }] };
25
+ });
26
+ server.registerTool("fetch_url", {
27
+ title: "Fetch and Extract URL Content",
28
+ description: "Fetches content from a URL (HTML/PDF) and extracts readable text. Supports truncation modes: compact (~3000 chars), standard (~8000 chars, default), full (no truncation). Output formats: markdown (default), text, html.",
29
+ inputSchema: {
30
+ url: z.string().url(),
31
+ mode: z.enum(["compact", "standard", "full"]).optional(),
32
+ max_length: z.number().int().min(1000).max(100000).optional(),
33
+ format: z.enum(["markdown", "text", "html"]).optional()
34
+ }
35
+ }, async ({ url, mode, max_length, format }) => {
36
+ const doc = await fetchAndExtract(url, { mode, max_length, format });
37
+ return { content: [{ type: "text", text: JSON.stringify(doc, null, 2) }] };
38
+ });
39
+ server.registerTool("summarize_url", {
40
+ title: "Summarize URL Content",
41
+ description: "Fetches content from a URL and generates a concise summary.",
42
+ inputSchema: { url: z.string().url() }
43
+ }, async ({ url }) => {
44
+ const doc = await fetchAndExtract(url);
45
+ try {
46
+ const content = doc.markdown || doc.text || "";
47
+ const prompt = `Provide a concise summary (<=10 sentences) of the following content:\n\nTitle: ${doc.title || "(none)"}\nURL: ${doc.url}\n\n--- Content ---\n${content.slice(0, 12000)}`;
48
+ const resp = await server.server.createMessage({
49
+ messages: [{ role: "user", content: { type: "text", text: prompt } }],
50
+ maxTokens: 800
51
+ });
52
+ const text = resp.content && resp.content.type === "text"
53
+ ? resp.content.text
54
+ : "(unable to generate summary)";
55
+ return { content: [{ type: "text", text }] };
56
+ }
57
+ catch {
58
+ const fallback = (doc.markdown || doc.text || "").slice(0, 2000);
59
+ return { content: [{ type: "text", text: fallback || "(no content to summarize)" }] };
60
+ }
61
+ });
62
+ server.registerTool("wiki_get", {
63
+ title: "Wikipedia: Get Summary",
64
+ description: "Retrieves a Wikipedia summary for a given title. Supports multiple languages (default: en).",
65
+ inputSchema: { title: z.string(), lang: z.string().default("en").optional() }
66
+ }, async ({ title, lang = "en" }) => {
67
+ const { wikiGetSummary } = await import("./wikipedia.js");
68
+ const summary = await wikiGetSummary(title, lang);
69
+ return { content: [{ type: "text", text: JSON.stringify(summary, null, 2) }] };
70
+ });
71
+ server.registerTool("wiki_multi", {
72
+ title: "Wikipedia: Multi-Language Summary",
73
+ description: "Retrieves Wikipedia summaries in multiple languages for a given term. Uses langlinks to map titles accurately across languages.",
74
+ inputSchema: {
75
+ term: z.string(),
76
+ baseLang: z.string().default("en").optional(),
77
+ langs: z.array(z.string()).default(["en"]).optional()
78
+ }
79
+ }, async ({ term, baseLang = "en", langs = ["en"] }) => {
80
+ const { wikiGetMultiSummary } = await import("./wikipedia.js");
81
+ const out = await wikiGetMultiSummary(term, baseLang, langs);
82
+ return { content: [{ type: "text", text: JSON.stringify(out, null, 2) }] };
83
+ });
84
+ async function main() {
85
+ const transport = new StdioServerTransport();
86
+ await server.connect(transport);
87
+ console.error("mcp-web-search ready (stdio)...");
88
+ }
89
+ main().catch(err => {
90
+ console.error(err);
91
+ process.exit(1);
92
+ });
@@ -0,0 +1,102 @@
1
+ function uaHeaders(lang = process.env.LANG_DEFAULT || "en") {
2
+ const ua = process.env.USER_AGENT || "mcp-web-search/1.0";
3
+ const accept = lang === "en" ? "en-US,en;q=0.9" : `${lang};q=0.9,en;q=0.8`;
4
+ return { "User-Agent": ua, "Accept-Language": accept };
5
+ }
6
+ function toMs(env, def) {
7
+ const n = Number(env);
8
+ return Number.isFinite(n) && n > 0 ? n : def;
9
+ }
10
+ async function fetchWithTimeout(input, init = {}, timeoutMs = 15000) {
11
+ const controller = new AbortController();
12
+ const t = setTimeout(() => controller.abort(), timeoutMs);
13
+ try {
14
+ return await fetch(input, { ...init, signal: controller.signal });
15
+ }
16
+ finally {
17
+ clearTimeout(t);
18
+ }
19
+ }
20
+ export async function wikiGetSummary(title, lang = "en") {
21
+ const base = `https://${lang}.wikipedia.org`;
22
+ const sumUrl = new URL(`${base}/api/rest_v1/page/summary/${encodeURIComponent(title)}`);
23
+ try {
24
+ const sres = await fetchWithTimeout(sumUrl, { headers: uaHeaders(lang) }, toMs(process.env.HTTP_TIMEOUT, 15000));
25
+ if (!sres.ok) {
26
+ return { lang, title, url: `${base}/wiki/${encodeURIComponent(title)}` };
27
+ }
28
+ const s = (await sres.json());
29
+ return {
30
+ lang,
31
+ title: s.title ?? title,
32
+ url: s.content_urls?.desktop?.page ?? `${base}/wiki/${encodeURIComponent(title)}`,
33
+ description: s.description,
34
+ extract: s.extract,
35
+ thumbnailUrl: s.thumbnail?.source
36
+ };
37
+ }
38
+ catch {
39
+ return { lang, title, url: `${base}/wiki/${encodeURIComponent(title)}` };
40
+ }
41
+ }
42
+ async function wikiGetLanglinks(baseTitle, baseLang) {
43
+ const base = `https://${baseLang}.wikipedia.org/w/api.php`;
44
+ const url = new URL(base);
45
+ url.searchParams.set("action", "query");
46
+ url.searchParams.set("titles", baseTitle);
47
+ url.searchParams.set("prop", "langlinks");
48
+ url.searchParams.set("lllimit", "max");
49
+ url.searchParams.set("format", "json");
50
+ try {
51
+ const res = await fetchWithTimeout(url, { headers: uaHeaders(baseLang) }, toMs(process.env.HTTP_TIMEOUT, 15000));
52
+ if (!res.ok)
53
+ return {};
54
+ const data = (await res.json());
55
+ const pages = data?.query?.pages;
56
+ const first = pages && Object.values(pages)[0];
57
+ const ll = first?.langlinks || [];
58
+ const map = {};
59
+ for (const item of ll)
60
+ map[item.lang] = item["*"];
61
+ return map;
62
+ }
63
+ catch {
64
+ return {};
65
+ }
66
+ }
67
+ export async function wikiGetMultiSummary(term, baseLang = "en", langs = ["en"]) {
68
+ const want = Array.from(new Set(langs.map(s => s.trim().toLowerCase()).filter(Boolean)));
69
+ if (!want.includes(baseLang))
70
+ want.unshift(baseLang);
71
+ const base = await wikiGetSummary(term, baseLang);
72
+ const langlinks = await wikiGetLanglinks(base.title, baseLang);
73
+ const items = {};
74
+ const resolved = {};
75
+ items[baseLang] = base;
76
+ resolved[baseLang] = { title: base.title, source: "base" };
77
+ const tasks = want
78
+ .filter(l => l !== baseLang)
79
+ .map(async (l) => {
80
+ let title;
81
+ let source = "none";
82
+ if (langlinks[l]) {
83
+ title = langlinks[l];
84
+ source = "langlinks";
85
+ }
86
+ else {
87
+ title = term;
88
+ source = "direct";
89
+ }
90
+ try {
91
+ const sum = await wikiGetSummary(title, l);
92
+ items[l] = sum;
93
+ resolved[l] = { title: sum.title, source };
94
+ }
95
+ catch {
96
+ items[l] = null;
97
+ resolved[l] = { title, source: "none" };
98
+ }
99
+ });
100
+ await Promise.all(tasks);
101
+ return { baseLang, base, items, resolved };
102
+ }
package/package.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "name": "@zhafron/mcp-web-search",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "description": "MCP server: DuckDuckGo HTML search, Wikipedia summaries, and URL content extraction — no API keys required.",
6
+ "main": "dist/src/server.js",
7
+ "scripts": {
8
+ "build": "tsc -p .",
9
+ "start": "node dist/src/server.js",
10
+ "dev": "tsx src/server.ts",
11
+ "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
12
+ "test:watch": "node --experimental-vm-modules node_modules/jest/bin/jest.js --watch",
13
+ "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
14
+ "format:check": "prettier --check \"src/**/*.ts\" \"test/**/*.ts\""
15
+ },
16
+ "keywords": [
17
+ "mcp",
18
+ "modelcontextprotocol",
19
+ "web",
20
+ "search",
21
+ "extraction",
22
+ "html",
23
+ "pdf",
24
+ "readability",
25
+ "turndown",
26
+ "ai"
27
+ ],
28
+ "author": "tickernelz",
29
+ "license": "MIT",
30
+ "repository": {
31
+ "type": "git",
32
+ "url": "git+https://github.com/tickernelz/mcp-web-search.git"
33
+ },
34
+ "publishConfig": {
35
+ "access": "public"
36
+ },
37
+ "files": [
38
+ "dist",
39
+ "package.json",
40
+ "README.md",
41
+ "LICENSE"
42
+ ],
43
+ "dependencies": {
44
+ "@modelcontextprotocol/sdk": "^1.17.0",
45
+ "@mozilla/readability": "^0.6.0",
46
+ "jsdom": "^24.1.0",
47
+ "pdf-parse": "^1.1.1",
48
+ "puppeteer-core": "^23.11.1",
49
+ "turndown": "^7.2.2",
50
+ "zod": "^3.23.8"
51
+ },
52
+ "devDependencies": {
53
+ "@jest/globals": "^29.7.0",
54
+ "@types/jest": "^29.5.14",
55
+ "@types/jsdom": "^21.1.7",
56
+ "@types/node": "^20.11.30",
57
+ "@types/turndown": "^5.0.6",
58
+ "jest": "^29.7.0",
59
+ "prettier": "^3.8.0",
60
+ "ts-jest": "^29.2.5",
61
+ "tsx": "^4.19.0",
62
+ "typescript": "^5.5.4"
63
+ }
64
+ }