Package not found. Please check the package name and try again.
@kassol/mcp-searxng 1.0.3-custom.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +255 -0
- package/dist/cache.d.ts +26 -0
- package/dist/cache.js +68 -0
- package/dist/error-handler.d.ts +29 -0
- package/dist/error-handler.js +148 -0
- package/dist/headers.d.ts +3 -0
- package/dist/headers.js +77 -0
- package/dist/http-security.d.ts +15 -0
- package/dist/http-security.js +52 -0
- package/dist/http-server.d.ts +3 -0
- package/dist/http-server.js +185 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +252 -0
- package/dist/logging.d.ts +6 -0
- package/dist/logging.js +35 -0
- package/dist/proxy.d.ts +40 -0
- package/dist/proxy.js +215 -0
- package/dist/resources.d.ts +2 -0
- package/dist/resources.js +114 -0
- package/dist/search.d.ts +2 -0
- package/dist/search.js +133 -0
- package/dist/tls-config.d.ts +19 -0
- package/dist/tls-config.js +49 -0
- package/dist/types.d.ts +18 -0
- package/dist/types.js +87 -0
- package/dist/url-reader.d.ts +10 -0
- package/dist/url-reader.js +276 -0
- package/package.json +72 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import { isIP } from "node:net";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import { createProxyAgent, createDefaultAgent, ProxyType } from "./proxy.js";
|
|
4
|
+
import { logMessage } from "./logging.js";
|
|
5
|
+
import { urlCache } from "./cache.js";
|
|
6
|
+
import { getHttpSecurityConfig } from "./http-security.js";
|
|
7
|
+
import { mergeHeaders, parseHeadersFromEnv } from "./headers.js";
|
|
8
|
+
import { createURLFormatError, createURLSecurityPolicyError, createNetworkError, createServerError, createContentError, createConversionError, createTimeoutError, createEmptyContentWarning, createUnexpectedError } from "./error-handler.js";
|
|
9
|
+
function isPrivateHostname(hostname) {
|
|
10
|
+
const lower = hostname.toLowerCase().replace(/\.+$/, "");
|
|
11
|
+
return lower === "localhost" || lower.endsWith(".localhost");
|
|
12
|
+
}
|
|
13
|
+
function isPrivateIpv4(hostname) {
|
|
14
|
+
if (isIP(hostname) !== 4) {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
return (hostname.startsWith("10.") ||
|
|
18
|
+
hostname.startsWith("127.") ||
|
|
19
|
+
hostname.startsWith("192.168.") ||
|
|
20
|
+
/^172\.(1[6-9]|2\d|3[0-1])\./.test(hostname) ||
|
|
21
|
+
hostname.startsWith("169.254."));
|
|
22
|
+
}
|
|
23
|
+
function isPrivateIPv6(hostname) {
|
|
24
|
+
// url.hostname wraps IPv6 in brackets (e.g. "[::1]") — strip them first
|
|
25
|
+
const addr = (hostname.startsWith("[") && hostname.endsWith("]")
|
|
26
|
+
? hostname.slice(1, -1)
|
|
27
|
+
: hostname).toLowerCase();
|
|
28
|
+
if (isIP(addr) !== 6)
|
|
29
|
+
return false;
|
|
30
|
+
if (addr === "::1")
|
|
31
|
+
return true; // loopback
|
|
32
|
+
if (addr === "::")
|
|
33
|
+
return true; // unspecified
|
|
34
|
+
if (/^f[cd]/i.test(addr))
|
|
35
|
+
return true; // ULA fc00::/7
|
|
36
|
+
if (/^fe[89ab][0-9a-f]:/i.test(addr))
|
|
37
|
+
return true; // link-local fe80::/10
|
|
38
|
+
// IPv4-mapped ::ffff:<ipv4> — delegate to the IPv4 check
|
|
39
|
+
const mapped = addr.match(/^::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/);
|
|
40
|
+
if (mapped)
|
|
41
|
+
return isPrivateIpv4(mapped[1]);
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
function assertUrlAllowed(url) {
|
|
45
|
+
const security = getHttpSecurityConfig();
|
|
46
|
+
if (!security.harden || security.allowPrivateUrls) {
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
if (isPrivateHostname(url.hostname) || isPrivateIpv4(url.hostname) || isPrivateIPv6(url.hostname)) {
|
|
50
|
+
throw createURLSecurityPolicyError(url.toString());
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function applyCharacterPagination(content, startChar = 0, maxLength) {
|
|
54
|
+
if (startChar >= content.length) {
|
|
55
|
+
return "";
|
|
56
|
+
}
|
|
57
|
+
const start = Math.max(0, startChar);
|
|
58
|
+
const end = maxLength ? Math.min(content.length, start + maxLength) : content.length;
|
|
59
|
+
return content.slice(start, end);
|
|
60
|
+
}
|
|
61
|
+
function escapeRegExp(str) {
|
|
62
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
63
|
+
}
|
|
64
|
+
function extractSection(markdownContent, sectionHeading) {
|
|
65
|
+
const lines = markdownContent.split('\n');
|
|
66
|
+
const sectionRegex = new RegExp(`^#{1,6}\\s*.*${escapeRegExp(sectionHeading)}.*$`, 'i');
|
|
67
|
+
let startIndex = -1;
|
|
68
|
+
let currentLevel = 0;
|
|
69
|
+
// Find the section start
|
|
70
|
+
for (let i = 0; i < lines.length; i++) {
|
|
71
|
+
const line = lines[i];
|
|
72
|
+
if (sectionRegex.test(line)) {
|
|
73
|
+
startIndex = i;
|
|
74
|
+
currentLevel = (line.match(/^#+/) || [''])[0].length;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if (startIndex === -1) {
|
|
79
|
+
return "";
|
|
80
|
+
}
|
|
81
|
+
// Find the section end (next heading of same or higher level)
|
|
82
|
+
let endIndex = lines.length;
|
|
83
|
+
for (let i = startIndex + 1; i < lines.length; i++) {
|
|
84
|
+
const line = lines[i];
|
|
85
|
+
const match = line.match(/^#+/);
|
|
86
|
+
if (match && match[0].length <= currentLevel) {
|
|
87
|
+
endIndex = i;
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return lines.slice(startIndex, endIndex).join('\n');
|
|
92
|
+
}
|
|
93
|
+
function extractParagraphRange(markdownContent, range) {
|
|
94
|
+
const paragraphs = markdownContent.split('\n\n').filter(p => p.trim().length > 0);
|
|
95
|
+
// Parse range (e.g., "1-5", "3", "10-")
|
|
96
|
+
const rangeMatch = range.match(/^(\d+)(?:-(\d*))?$/);
|
|
97
|
+
if (!rangeMatch) {
|
|
98
|
+
return "";
|
|
99
|
+
}
|
|
100
|
+
const start = parseInt(rangeMatch[1]) - 1; // Convert to 0-based index
|
|
101
|
+
const endStr = rangeMatch[2];
|
|
102
|
+
if (start < 0 || start >= paragraphs.length) {
|
|
103
|
+
return "";
|
|
104
|
+
}
|
|
105
|
+
if (endStr === undefined) {
|
|
106
|
+
// Single paragraph (e.g., "3")
|
|
107
|
+
return paragraphs[start] || "";
|
|
108
|
+
}
|
|
109
|
+
else if (endStr === "") {
|
|
110
|
+
// Range to end (e.g., "10-")
|
|
111
|
+
return paragraphs.slice(start).join('\n\n');
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
// Specific range (e.g., "1-5")
|
|
115
|
+
const end = parseInt(endStr);
|
|
116
|
+
return paragraphs.slice(start, end).join('\n\n');
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
function extractHeadings(markdownContent) {
|
|
120
|
+
const lines = markdownContent.split('\n');
|
|
121
|
+
const headings = lines.filter(line => /^#{1,6}\s/.test(line));
|
|
122
|
+
if (headings.length === 0) {
|
|
123
|
+
return "No headings found in the content.";
|
|
124
|
+
}
|
|
125
|
+
return headings.join('\n');
|
|
126
|
+
}
|
|
127
|
+
function applyPaginationOptions(markdownContent, options) {
|
|
128
|
+
let result = markdownContent;
|
|
129
|
+
// Apply heading extraction first if requested
|
|
130
|
+
if (options.readHeadings) {
|
|
131
|
+
return extractHeadings(result);
|
|
132
|
+
}
|
|
133
|
+
// Apply section extraction
|
|
134
|
+
if (options.section) {
|
|
135
|
+
result = extractSection(result, options.section);
|
|
136
|
+
if (result === "") {
|
|
137
|
+
return `Section "${options.section}" not found in the content.`;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// Apply paragraph range filtering
|
|
141
|
+
if (options.paragraphRange) {
|
|
142
|
+
result = extractParagraphRange(result, options.paragraphRange);
|
|
143
|
+
if (result === "") {
|
|
144
|
+
return `Paragraph range "${options.paragraphRange}" is invalid or out of bounds.`;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Apply character-based pagination last
|
|
148
|
+
if (options.startChar !== undefined || options.maxLength !== undefined) {
|
|
149
|
+
result = applyCharacterPagination(result, options.startChar, options.maxLength);
|
|
150
|
+
}
|
|
151
|
+
return result;
|
|
152
|
+
}
|
|
153
|
+
export async function fetchAndConvertToMarkdown(mcpServer, url, timeoutMs = 10000, paginationOptions = {}) {
|
|
154
|
+
const startTime = Date.now();
|
|
155
|
+
logMessage(mcpServer, "info", `Fetching URL: ${url}`);
|
|
156
|
+
// Check cache first
|
|
157
|
+
const cachedEntry = urlCache.get(url);
|
|
158
|
+
if (cachedEntry) {
|
|
159
|
+
logMessage(mcpServer, "info", `Using cached content for URL: ${url}`);
|
|
160
|
+
const result = applyPaginationOptions(cachedEntry.markdownContent, paginationOptions);
|
|
161
|
+
const duration = Date.now() - startTime;
|
|
162
|
+
logMessage(mcpServer, "info", `Processed cached URL: ${url} (${result.length} chars in ${duration}ms)`);
|
|
163
|
+
return result;
|
|
164
|
+
}
|
|
165
|
+
// Validate URL format
|
|
166
|
+
let parsedUrl;
|
|
167
|
+
try {
|
|
168
|
+
parsedUrl = new URL(url);
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
logMessage(mcpServer, "error", `Invalid URL format: ${url}`);
|
|
172
|
+
throw createURLFormatError(url);
|
|
173
|
+
}
|
|
174
|
+
assertUrlAllowed(parsedUrl);
|
|
175
|
+
// Create an AbortController instance
|
|
176
|
+
const controller = new AbortController();
|
|
177
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
178
|
+
try {
|
|
179
|
+
// Prepare request options with proxy support
|
|
180
|
+
const requestOptions = {
|
|
181
|
+
signal: controller.signal,
|
|
182
|
+
};
|
|
183
|
+
// Add proxy or default dispatcher (includes system CA certs for TLS)
|
|
184
|
+
const proxyAgent = createProxyAgent(url, ProxyType.URL_READER);
|
|
185
|
+
const dispatcher = proxyAgent ?? createDefaultAgent();
|
|
186
|
+
if (dispatcher) {
|
|
187
|
+
requestOptions.dispatcher = dispatcher;
|
|
188
|
+
}
|
|
189
|
+
// Add User-Agent header if configured (URL_READER_USER_AGENT takes priority over USER_AGENT)
|
|
190
|
+
const userAgent = process.env.URL_READER_USER_AGENT || process.env.USER_AGENT;
|
|
191
|
+
if (userAgent) {
|
|
192
|
+
requestOptions.headers = {
|
|
193
|
+
...requestOptions.headers,
|
|
194
|
+
'User-Agent': userAgent
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
const additionalHeaders = parseHeadersFromEnv("URL_READER_HEADERS");
|
|
198
|
+
if (Object.keys(additionalHeaders).length > 0) {
|
|
199
|
+
requestOptions.headers = mergeHeaders(requestOptions.headers, additionalHeaders);
|
|
200
|
+
}
|
|
201
|
+
let response;
|
|
202
|
+
try {
|
|
203
|
+
// Fetch the URL with the abort signal
|
|
204
|
+
response = await fetch(url, requestOptions);
|
|
205
|
+
}
|
|
206
|
+
catch (error) {
|
|
207
|
+
const context = {
|
|
208
|
+
url,
|
|
209
|
+
proxyAgent: !!dispatcher,
|
|
210
|
+
timeout: timeoutMs
|
|
211
|
+
};
|
|
212
|
+
throw createNetworkError(error, context);
|
|
213
|
+
}
|
|
214
|
+
if (!response.ok) {
|
|
215
|
+
let responseBody;
|
|
216
|
+
try {
|
|
217
|
+
responseBody = await response.text();
|
|
218
|
+
}
|
|
219
|
+
catch {
|
|
220
|
+
responseBody = '[Could not read response body]';
|
|
221
|
+
}
|
|
222
|
+
const context = { url };
|
|
223
|
+
throw createServerError(response.status, response.statusText, responseBody, context);
|
|
224
|
+
}
|
|
225
|
+
// Retrieve HTML content
|
|
226
|
+
let htmlContent;
|
|
227
|
+
try {
|
|
228
|
+
htmlContent = await response.text();
|
|
229
|
+
}
|
|
230
|
+
catch (error) {
|
|
231
|
+
throw createContentError(`Failed to read website content: ${error.message || 'Unknown error reading content'}`, url);
|
|
232
|
+
}
|
|
233
|
+
if (!htmlContent || htmlContent.trim().length === 0) {
|
|
234
|
+
throw createContentError("Website returned empty content.", url);
|
|
235
|
+
}
|
|
236
|
+
// Convert HTML to Markdown
|
|
237
|
+
let markdownContent;
|
|
238
|
+
try {
|
|
239
|
+
markdownContent = NodeHtmlMarkdown.translate(htmlContent);
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
throw createConversionError(error, url, htmlContent);
|
|
243
|
+
}
|
|
244
|
+
if (!markdownContent || markdownContent.trim().length === 0) {
|
|
245
|
+
logMessage(mcpServer, "warning", `Empty content after conversion: ${url}`);
|
|
246
|
+
// DON'T cache empty/failed conversions - return warning directly
|
|
247
|
+
return createEmptyContentWarning(url, htmlContent.length, htmlContent);
|
|
248
|
+
}
|
|
249
|
+
// Only cache successful markdown conversion
|
|
250
|
+
urlCache.set(url, htmlContent, markdownContent);
|
|
251
|
+
// Apply pagination options
|
|
252
|
+
const result = applyPaginationOptions(markdownContent, paginationOptions);
|
|
253
|
+
const duration = Date.now() - startTime;
|
|
254
|
+
logMessage(mcpServer, "info", `Successfully fetched and converted URL: ${url} (${result.length} chars in ${duration}ms)`);
|
|
255
|
+
return result;
|
|
256
|
+
}
|
|
257
|
+
catch (error) {
|
|
258
|
+
if (error.name === "AbortError") {
|
|
259
|
+
logMessage(mcpServer, "error", `Timeout fetching URL: ${url} (${timeoutMs}ms)`);
|
|
260
|
+
throw createTimeoutError(timeoutMs, url);
|
|
261
|
+
}
|
|
262
|
+
// Re-throw our enhanced errors
|
|
263
|
+
if (error.name === 'MCPSearXNGError') {
|
|
264
|
+
logMessage(mcpServer, "error", `Error fetching URL: ${url} - ${error.message}`);
|
|
265
|
+
throw error;
|
|
266
|
+
}
|
|
267
|
+
// Catch any unexpected errors
|
|
268
|
+
logMessage(mcpServer, "error", `Unexpected error fetching URL: ${url}`, error);
|
|
269
|
+
const context = { url };
|
|
270
|
+
throw createUnexpectedError(error, context);
|
|
271
|
+
}
|
|
272
|
+
finally {
|
|
273
|
+
// Clean up the timeout to prevent memory leaks
|
|
274
|
+
clearTimeout(timeoutId);
|
|
275
|
+
}
|
|
276
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kassol/mcp-searxng",
|
|
3
|
+
"version": "1.0.3-custom.0",
|
|
4
|
+
"mcpName": "io.github.kassol/mcp-searxng",
|
|
5
|
+
"description": "MCP server for SearXNG integration",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Ihor Sokoliuk (https://github.com/ihor-sokoliuk)",
|
|
8
|
+
"homepage": "https://github.com/kassol/mcp-searxng",
|
|
9
|
+
"bugs": "https://github.com/kassol/mcp-searxng/issues",
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "git+https://github.com/kassol/mcp-searxng.git"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"mcp",
|
|
16
|
+
"modelcontextprotocol",
|
|
17
|
+
"searxng",
|
|
18
|
+
"search",
|
|
19
|
+
"web-search",
|
|
20
|
+
"claude",
|
|
21
|
+
"ai",
|
|
22
|
+
"pagination",
|
|
23
|
+
"smithery",
|
|
24
|
+
"url-reader"
|
|
25
|
+
],
|
|
26
|
+
"type": "module",
|
|
27
|
+
"bin": {
|
|
28
|
+
"mcp-searxng": "dist/index.js"
|
|
29
|
+
},
|
|
30
|
+
"main": "dist/index.js",
|
|
31
|
+
"files": [
|
|
32
|
+
"dist"
|
|
33
|
+
],
|
|
34
|
+
"publishConfig": {
|
|
35
|
+
"access": "public"
|
|
36
|
+
},
|
|
37
|
+
"engines": {
|
|
38
|
+
"node": ">=20"
|
|
39
|
+
},
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsc && shx chmod +x dist/*.js",
|
|
42
|
+
"watch": "tsc --watch",
|
|
43
|
+
"test": "cross-env SEARXNG_URL=https://test-searx.example.com tsx __tests__/run-all.ts",
|
|
44
|
+
"test:coverage": "cross-env SEARXNG_URL=https://test-searx.example.com c8 --reporter=text tsx __tests__/run-all.ts",
|
|
45
|
+
"bootstrap": "npm install && npm run build",
|
|
46
|
+
"inspector": "DANGEROUSLY_OMIT_AUTH=true npx @modelcontextprotocol/inspector node dist/index.js",
|
|
47
|
+
"lint": "eslint src __tests__",
|
|
48
|
+
"postversion": "TAG=$(node scripts/update-version.js | tail -1) && git add src/index.ts .mcp/server.json && git commit --amend --no-edit && git tag -f $TAG"
|
|
49
|
+
},
|
|
50
|
+
"dependencies": {
|
|
51
|
+
"@modelcontextprotocol/sdk": "1.29.0",
|
|
52
|
+
"@types/cors": "^2.8.19",
|
|
53
|
+
"@types/express": "^5.0.6",
|
|
54
|
+
"cors": "^2.8.6",
|
|
55
|
+
"express": "^5.2.1",
|
|
56
|
+
"node-html-markdown": "^2.0.0",
|
|
57
|
+
"undici": "^7.24.0"
|
|
58
|
+
},
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"@types/node": "^22.17.2",
|
|
61
|
+
"@types/supertest": "^7.2.0",
|
|
62
|
+
"@typescript-eslint/eslint-plugin": "^8.58.0",
|
|
63
|
+
"@typescript-eslint/parser": "^8.58.0",
|
|
64
|
+
"c8": "^11.0.0",
|
|
65
|
+
"cross-env": "^10.1.0",
|
|
66
|
+
"eslint": "^10.1.0",
|
|
67
|
+
"shx": "^0.4.0",
|
|
68
|
+
"supertest": "^7.2.2",
|
|
69
|
+
"tsx": "^4.21.0",
|
|
70
|
+
"typescript": "^5.8.3"
|
|
71
|
+
}
|
|
72
|
+
}
|