Package not found. Please check the package name and try again.

@kassol/mcp-searxng 1.0.3-custom.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ import { isIP } from "node:net";
2
+ import { NodeHtmlMarkdown } from "node-html-markdown";
3
+ import { createProxyAgent, createDefaultAgent, ProxyType } from "./proxy.js";
4
+ import { logMessage } from "./logging.js";
5
+ import { urlCache } from "./cache.js";
6
+ import { getHttpSecurityConfig } from "./http-security.js";
7
+ import { mergeHeaders, parseHeadersFromEnv } from "./headers.js";
8
+ import { createURLFormatError, createURLSecurityPolicyError, createNetworkError, createServerError, createContentError, createConversionError, createTimeoutError, createEmptyContentWarning, createUnexpectedError } from "./error-handler.js";
9
+ function isPrivateHostname(hostname) {
10
+ const lower = hostname.toLowerCase().replace(/\.+$/, "");
11
+ return lower === "localhost" || lower.endsWith(".localhost");
12
+ }
13
+ function isPrivateIpv4(hostname) {
14
+ if (isIP(hostname) !== 4) {
15
+ return false;
16
+ }
17
+ return (hostname.startsWith("10.") ||
18
+ hostname.startsWith("127.") ||
19
+ hostname.startsWith("192.168.") ||
20
+ /^172\.(1[6-9]|2\d|3[0-1])\./.test(hostname) ||
21
+ hostname.startsWith("169.254."));
22
+ }
23
+ function isPrivateIPv6(hostname) {
24
+ // url.hostname wraps IPv6 in brackets (e.g. "[::1]") — strip them first
25
+ const addr = (hostname.startsWith("[") && hostname.endsWith("]")
26
+ ? hostname.slice(1, -1)
27
+ : hostname).toLowerCase();
28
+ if (isIP(addr) !== 6)
29
+ return false;
30
+ if (addr === "::1")
31
+ return true; // loopback
32
+ if (addr === "::")
33
+ return true; // unspecified
34
+ if (/^f[cd]/i.test(addr))
35
+ return true; // ULA fc00::/7
36
+ if (/^fe[89ab][0-9a-f]:/i.test(addr))
37
+ return true; // link-local fe80::/10
38
+ // IPv4-mapped ::ffff:<ipv4> — delegate to the IPv4 check
39
+ const mapped = addr.match(/^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/);
40
+ if (mapped)
41
+ return isPrivateIpv4(mapped[1]);
42
+ return false;
43
+ }
44
+ function assertUrlAllowed(url) {
45
+ const security = getHttpSecurityConfig();
46
+ if (!security.harden || security.allowPrivateUrls) {
47
+ return;
48
+ }
49
+ if (isPrivateHostname(url.hostname) || isPrivateIpv4(url.hostname) || isPrivateIPv6(url.hostname)) {
50
+ throw createURLSecurityPolicyError(url.toString());
51
+ }
52
+ }
53
+ function applyCharacterPagination(content, startChar = 0, maxLength) {
54
+ if (startChar >= content.length) {
55
+ return "";
56
+ }
57
+ const start = Math.max(0, startChar);
58
+ const end = maxLength ? Math.min(content.length, start + maxLength) : content.length;
59
+ return content.slice(start, end);
60
+ }
61
+ function escapeRegExp(str) {
62
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
63
+ }
64
+ function extractSection(markdownContent, sectionHeading) {
65
+ const lines = markdownContent.split('\n');
66
+ const sectionRegex = new RegExp(`^#{1,6}\\s*.*${escapeRegExp(sectionHeading)}.*$`, 'i');
67
+ let startIndex = -1;
68
+ let currentLevel = 0;
69
+ // Find the section start
70
+ for (let i = 0; i < lines.length; i++) {
71
+ const line = lines[i];
72
+ if (sectionRegex.test(line)) {
73
+ startIndex = i;
74
+ currentLevel = (line.match(/^#+/) || [''])[0].length;
75
+ break;
76
+ }
77
+ }
78
+ if (startIndex === -1) {
79
+ return "";
80
+ }
81
+ // Find the section end (next heading of same or higher level)
82
+ let endIndex = lines.length;
83
+ for (let i = startIndex + 1; i < lines.length; i++) {
84
+ const line = lines[i];
85
+ const match = line.match(/^#+/);
86
+ if (match && match[0].length <= currentLevel) {
87
+ endIndex = i;
88
+ break;
89
+ }
90
+ }
91
+ return lines.slice(startIndex, endIndex).join('\n');
92
+ }
93
+ function extractParagraphRange(markdownContent, range) {
94
+ const paragraphs = markdownContent.split('\n\n').filter(p => p.trim().length > 0);
95
+ // Parse range (e.g., "1-5", "3", "10-")
96
+ const rangeMatch = range.match(/^(\d+)(?:-(\d*))?$/);
97
+ if (!rangeMatch) {
98
+ return "";
99
+ }
100
+ const start = parseInt(rangeMatch[1]) - 1; // Convert to 0-based index
101
+ const endStr = rangeMatch[2];
102
+ if (start < 0 || start >= paragraphs.length) {
103
+ return "";
104
+ }
105
+ if (endStr === undefined) {
106
+ // Single paragraph (e.g., "3")
107
+ return paragraphs[start] || "";
108
+ }
109
+ else if (endStr === "") {
110
+ // Range to end (e.g., "10-")
111
+ return paragraphs.slice(start).join('\n\n');
112
+ }
113
+ else {
114
+ // Specific range (e.g., "1-5")
115
+ const end = parseInt(endStr);
116
+ return paragraphs.slice(start, end).join('\n\n');
117
+ }
118
+ }
119
+ function extractHeadings(markdownContent) {
120
+ const lines = markdownContent.split('\n');
121
+ const headings = lines.filter(line => /^#{1,6}\s/.test(line));
122
+ if (headings.length === 0) {
123
+ return "No headings found in the content.";
124
+ }
125
+ return headings.join('\n');
126
+ }
127
+ function applyPaginationOptions(markdownContent, options) {
128
+ let result = markdownContent;
129
+ // Apply heading extraction first if requested
130
+ if (options.readHeadings) {
131
+ return extractHeadings(result);
132
+ }
133
+ // Apply section extraction
134
+ if (options.section) {
135
+ result = extractSection(result, options.section);
136
+ if (result === "") {
137
+ return `Section "${options.section}" not found in the content.`;
138
+ }
139
+ }
140
+ // Apply paragraph range filtering
141
+ if (options.paragraphRange) {
142
+ result = extractParagraphRange(result, options.paragraphRange);
143
+ if (result === "") {
144
+ return `Paragraph range "${options.paragraphRange}" is invalid or out of bounds.`;
145
+ }
146
+ }
147
+ // Apply character-based pagination last
148
+ if (options.startChar !== undefined || options.maxLength !== undefined) {
149
+ result = applyCharacterPagination(result, options.startChar, options.maxLength);
150
+ }
151
+ return result;
152
+ }
153
+ export async function fetchAndConvertToMarkdown(mcpServer, url, timeoutMs = 10000, paginationOptions = {}) {
154
+ const startTime = Date.now();
155
+ logMessage(mcpServer, "info", `Fetching URL: ${url}`);
156
+ // Check cache first
157
+ const cachedEntry = urlCache.get(url);
158
+ if (cachedEntry) {
159
+ logMessage(mcpServer, "info", `Using cached content for URL: ${url}`);
160
+ const result = applyPaginationOptions(cachedEntry.markdownContent, paginationOptions);
161
+ const duration = Date.now() - startTime;
162
+ logMessage(mcpServer, "info", `Processed cached URL: ${url} (${result.length} chars in ${duration}ms)`);
163
+ return result;
164
+ }
165
+ // Validate URL format
166
+ let parsedUrl;
167
+ try {
168
+ parsedUrl = new URL(url);
169
+ }
170
+ catch (error) {
171
+ logMessage(mcpServer, "error", `Invalid URL format: ${url}`);
172
+ throw createURLFormatError(url);
173
+ }
174
+ assertUrlAllowed(parsedUrl);
175
+ // Create an AbortController instance
176
+ const controller = new AbortController();
177
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
178
+ try {
179
+ // Prepare request options with proxy support
180
+ const requestOptions = {
181
+ signal: controller.signal,
182
+ };
183
+ // Add proxy or default dispatcher (includes system CA certs for TLS)
184
+ const proxyAgent = createProxyAgent(url, ProxyType.URL_READER);
185
+ const dispatcher = proxyAgent ?? createDefaultAgent();
186
+ if (dispatcher) {
187
+ requestOptions.dispatcher = dispatcher;
188
+ }
189
+ // Add User-Agent header if configured (URL_READER_USER_AGENT takes priority over USER_AGENT)
190
+ const userAgent = process.env.URL_READER_USER_AGENT || process.env.USER_AGENT;
191
+ if (userAgent) {
192
+ requestOptions.headers = {
193
+ ...requestOptions.headers,
194
+ 'User-Agent': userAgent
195
+ };
196
+ }
197
+ const additionalHeaders = parseHeadersFromEnv("URL_READER_HEADERS");
198
+ if (Object.keys(additionalHeaders).length > 0) {
199
+ requestOptions.headers = mergeHeaders(requestOptions.headers, additionalHeaders);
200
+ }
201
+ let response;
202
+ try {
203
+ // Fetch the URL with the abort signal
204
+ response = await fetch(url, requestOptions);
205
+ }
206
+ catch (error) {
207
+ const context = {
208
+ url,
209
+ proxyAgent: !!dispatcher,
210
+ timeout: timeoutMs
211
+ };
212
+ throw createNetworkError(error, context);
213
+ }
214
+ if (!response.ok) {
215
+ let responseBody;
216
+ try {
217
+ responseBody = await response.text();
218
+ }
219
+ catch {
220
+ responseBody = '[Could not read response body]';
221
+ }
222
+ const context = { url };
223
+ throw createServerError(response.status, response.statusText, responseBody, context);
224
+ }
225
+ // Retrieve HTML content
226
+ let htmlContent;
227
+ try {
228
+ htmlContent = await response.text();
229
+ }
230
+ catch (error) {
231
+ throw createContentError(`Failed to read website content: ${error.message || 'Unknown error reading content'}`, url);
232
+ }
233
+ if (!htmlContent || htmlContent.trim().length === 0) {
234
+ throw createContentError("Website returned empty content.", url);
235
+ }
236
+ // Convert HTML to Markdown
237
+ let markdownContent;
238
+ try {
239
+ markdownContent = NodeHtmlMarkdown.translate(htmlContent);
240
+ }
241
+ catch (error) {
242
+ throw createConversionError(error, url, htmlContent);
243
+ }
244
+ if (!markdownContent || markdownContent.trim().length === 0) {
245
+ logMessage(mcpServer, "warning", `Empty content after conversion: ${url}`);
246
+ // DON'T cache empty/failed conversions - return warning directly
247
+ return createEmptyContentWarning(url, htmlContent.length, htmlContent);
248
+ }
249
+ // Only cache successful markdown conversion
250
+ urlCache.set(url, htmlContent, markdownContent);
251
+ // Apply pagination options
252
+ const result = applyPaginationOptions(markdownContent, paginationOptions);
253
+ const duration = Date.now() - startTime;
254
+ logMessage(mcpServer, "info", `Successfully fetched and converted URL: ${url} (${result.length} chars in ${duration}ms)`);
255
+ return result;
256
+ }
257
+ catch (error) {
258
+ if (error.name === "AbortError") {
259
+ logMessage(mcpServer, "error", `Timeout fetching URL: ${url} (${timeoutMs}ms)`);
260
+ throw createTimeoutError(timeoutMs, url);
261
+ }
262
+ // Re-throw our enhanced errors
263
+ if (error.name === 'MCPSearXNGError') {
264
+ logMessage(mcpServer, "error", `Error fetching URL: ${url} - ${error.message}`);
265
+ throw error;
266
+ }
267
+ // Catch any unexpected errors
268
+ logMessage(mcpServer, "error", `Unexpected error fetching URL: ${url}`, error);
269
+ const context = { url };
270
+ throw createUnexpectedError(error, context);
271
+ }
272
+ finally {
273
+ // Clean up the timeout to prevent memory leaks
274
+ clearTimeout(timeoutId);
275
+ }
276
+ }
package/package.json ADDED
@@ -0,0 +1,72 @@
1
+ {
2
+ "name": "@kassol/mcp-searxng",
3
+ "version": "1.0.3-custom.0",
4
+ "mcpName": "io.github.kassol/mcp-searxng",
5
+ "description": "MCP server for SearXNG integration",
6
+ "license": "MIT",
7
+ "author": "Ihor Sokoliuk (https://github.com/ihor-sokoliuk)",
8
+ "homepage": "https://github.com/kassol/mcp-searxng",
9
+ "bugs": "https://github.com/kassol/mcp-searxng/issues",
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/kassol/mcp-searxng.git"
13
+ },
14
+ "keywords": [
15
+ "mcp",
16
+ "modelcontextprotocol",
17
+ "searxng",
18
+ "search",
19
+ "web-search",
20
+ "claude",
21
+ "ai",
22
+ "pagination",
23
+ "smithery",
24
+ "url-reader"
25
+ ],
26
+ "type": "module",
27
+ "bin": {
28
+ "mcp-searxng": "dist/index.js"
29
+ },
30
+ "main": "dist/index.js",
31
+ "files": [
32
+ "dist"
33
+ ],
34
+ "publishConfig": {
35
+ "access": "public"
36
+ },
37
+ "engines": {
38
+ "node": ">=20"
39
+ },
40
+ "scripts": {
41
+ "build": "tsc && shx chmod +x dist/*.js",
42
+ "watch": "tsc --watch",
43
+ "test": "cross-env SEARXNG_URL=https://test-searx.example.com tsx __tests__/run-all.ts",
44
+ "test:coverage": "cross-env SEARXNG_URL=https://test-searx.example.com c8 --reporter=text tsx __tests__/run-all.ts",
45
+ "bootstrap": "npm install && npm run build",
46
+ "inspector": "DANGEROUSLY_OMIT_AUTH=true npx @modelcontextprotocol/inspector node dist/index.js",
47
+ "lint": "eslint src __tests__",
48
+ "postversion": "TAG=$(node scripts/update-version.js | tail -1) && git add src/index.ts .mcp/server.json && git commit --amend --no-edit && git tag -f $TAG"
49
+ },
50
+ "dependencies": {
51
+ "@modelcontextprotocol/sdk": "1.29.0",
52
+ "@types/cors": "^2.8.19",
53
+ "@types/express": "^5.0.6",
54
+ "cors": "^2.8.6",
55
+ "express": "^5.2.1",
56
+ "node-html-markdown": "^2.0.0",
57
+ "undici": "^7.24.0"
58
+ },
59
+ "devDependencies": {
60
+ "@types/node": "^22.17.2",
61
+ "@types/supertest": "^7.2.0",
62
+ "@typescript-eslint/eslint-plugin": "^8.58.0",
63
+ "@typescript-eslint/parser": "^8.58.0",
64
+ "c8": "^11.0.0",
65
+ "cross-env": "^10.1.0",
66
+ "eslint": "^10.1.0",
67
+ "shx": "^0.4.0",
68
+ "supertest": "^7.2.2",
69
+ "tsx": "^4.21.0",
70
+ "typescript": "^5.8.3"
71
+ }
72
+ }