oremus-web-search 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +204 -0
- package/dist/config.js +23 -0
- package/dist/index.js +106 -0
- package/dist/resources.js +28 -0
- package/dist/searxng.js +48 -0
- package/dist/streamableHttpMcpClient.js +157 -0
- package/dist/types.js +85 -0
- package/package.json +44 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Oremus Labs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# oremus-web-search
|
|
2
|
+
|
|
3
|
+
An MCP server that exposes:
|
|
4
|
+
|
|
5
|
+
- `web_search`: web search via a configurable SearXNG instance (JSON API).
|
|
6
|
+
- `fetch_and_extract`: main-content extraction via a configurable Trafilatura MCP server (Streamable HTTP).
|
|
7
|
+
- `rotate_vpn`: asks Trafilatura to rotate its VPN/proxy egress.
|
|
8
|
+
|
|
9
|
+
This is designed to be run with `npx` as an MCP server (stdio transport).
|
|
10
|
+
|
|
11
|
+
## Why this exists
|
|
12
|
+
|
|
13
|
+
- SearXNG is great for finding URLs.
|
|
14
|
+
- Trafilatura is great at extracting clean article text and metadata.
|
|
15
|
+
- This server provides a single MCP endpoint that combines both.
|
|
16
|
+
|
|
17
|
+
## Install / Run
|
|
18
|
+
|
|
19
|
+
### Option A (recommended): no-token install via GitHub Release tarball
|
|
20
|
+
|
|
21
|
+
This avoids GitHub Packages auth requirements and “just works” with `npx`:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
SEARXNG_URL="https://search.oremuslabs.app" \\
|
|
25
|
+
TRAFILATURA_MCP_URL="https://trafilatura.oremuslabs.app/mcp" \\
|
|
26
|
+
npx -y https://github.com/Oremus-Labs/web-search-mcp/releases/latest/download/web-search-mcp.tgz
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
If you want a pinned version, use the versioned asset under the tag, e.g.:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npx -y https://github.com/Oremus-Labs/web-search-mcp/releases/download/v0.1.1/oremus-labs-web-search-mcp-0.1.1.tgz
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Option B: npm (no token required)
|
|
36
|
+
|
|
37
|
+
Once published to the public npm registry, this should work without any auth:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
SEARXNG_URL="https://search.oremuslabs.app" \\
|
|
41
|
+
TRAFILATURA_MCP_URL="https://trafilatura.oremuslabs.app/mcp" \\
|
|
42
|
+
npx -y oremus-web-search
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Option C: GitHub Packages
|
|
46
|
+
|
|
47
|
+
GitHub Packages’ npm registry typically requires authentication (`read:packages`) to install.
|
|
48
|
+
|
|
49
|
+
## Configuration
|
|
50
|
+
|
|
51
|
+
Required environment variables:
|
|
52
|
+
|
|
53
|
+
- `SEARXNG_URL`
|
|
54
|
+
- Base URL for your SearXNG instance.
|
|
55
|
+
- The server calls `${SEARXNG_URL}/search?format=json&...`.
|
|
56
|
+
- You may also set `SEARXNG_URL` to the full `/search` endpoint.
|
|
57
|
+
- `TRAFILATURA_MCP_URL`
|
|
58
|
+
- Full MCP endpoint URL for Trafilatura (must include the MCP path), e.g. `http://...:8090/mcp`.
|
|
59
|
+
|
|
60
|
+
Optional environment variables:
|
|
61
|
+
|
|
62
|
+
- `USER_AGENT` (default: `oremus-web-search`)
|
|
63
|
+
- `TRAFILATURA_BEARER_TOKEN` (adds `Authorization: Bearer ...` when calling Trafilatura MCP)
|
|
64
|
+
|
|
65
|
+
## Tools
|
|
66
|
+
|
|
67
|
+
### `web_search`
|
|
68
|
+
|
|
69
|
+
Input (matches the common SearXNG MCP shape):
|
|
70
|
+
|
|
71
|
+
- `query` (string, required)
|
|
72
|
+
- `pageno` (number, optional)
|
|
73
|
+
- `time_range` (`day|month|year`, optional)
|
|
74
|
+
- `language` (string, optional)
|
|
75
|
+
- `safesearch` (`0|1|2`, optional)
|
|
76
|
+
|
|
77
|
+
Output:
|
|
78
|
+
|
|
79
|
+
- A single `text` block formatted as:
|
|
80
|
+
- `Title: ...`
|
|
81
|
+
- `Description: ...`
|
|
82
|
+
- `URL: ...`
|
|
83
|
+
- `Relevance Score: ...`
|
|
84
|
+
|
|
85
|
+
### `fetch_and_extract`
|
|
86
|
+
|
|
87
|
+
Input:
|
|
88
|
+
|
|
89
|
+
- `url` (string, required)
|
|
90
|
+
- `include_comments` (boolean, optional)
|
|
91
|
+
- `include_tables` (boolean, optional)
|
|
92
|
+
- `use_proxy` (boolean, optional)
|
|
93
|
+
|
|
94
|
+
Output:
|
|
95
|
+
|
|
96
|
+
- Pass-through of the Trafilatura MCP server tool result (typically a single `text` block containing JSON).
|
|
97
|
+
|
|
98
|
+
### `rotate_vpn`
|
|
99
|
+
|
|
100
|
+
Input:
|
|
101
|
+
|
|
102
|
+
- none
|
|
103
|
+
|
|
104
|
+
Output:
|
|
105
|
+
|
|
106
|
+
- Pass-through of the Trafilatura MCP server tool result.
|
|
107
|
+
|
|
108
|
+
Notes:
|
|
109
|
+
|
|
110
|
+
- This tool is intentionally exposed through Trafilatura (in-cluster) so you don't need to expose a public REST endpoint for VPN rotation.
|
|
111
|
+
- Rotation is disruptive to in-flight requests; only call it when you’re getting blocked/rate-limited.
|
|
112
|
+
|
|
113
|
+
## Local development
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
cd web-search-mcp
|
|
117
|
+
npm install
|
|
118
|
+
npm run build
|
|
119
|
+
SEARXNG_URL="http://127.0.0.1:18080" TRAFILATURA_MCP_URL="http://127.0.0.1:18090/mcp" npm run inspector
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Kubernetes access (typical)
|
|
123
|
+
|
|
124
|
+
If your Trafilatura MCP server is only exposed as an in-cluster Service, run it through a port-forward:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
kubectl -n searxng port-forward svc/searxng-trafilatura-mcp 18090:8090
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Then set:
|
|
131
|
+
|
|
132
|
+
- `TRAFILATURA_MCP_URL=http://127.0.0.1:18090/mcp`
|
|
133
|
+
|
|
134
|
+
## Use in Codex CLI
|
|
135
|
+
|
|
136
|
+
Add a server entry to `~/.codex/config.toml`:
|
|
137
|
+
|
|
138
|
+
```toml
|
|
139
|
+
[mcp_servers.web_search]
|
|
140
|
+
command = "npx"
|
|
141
|
+
args = ["-y", "https://github.com/Oremus-Labs/web-search-mcp/releases/latest/download/web-search-mcp.tgz"]
|
|
142
|
+
env = { "SEARXNG_URL" = "https://search.oremuslabs.app", "TRAFILATURA_MCP_URL" = "https://trafilatura.oremuslabs.app/mcp" }
|
|
143
|
+
startup_timeout_sec = 30
|
|
144
|
+
tool_timeout_sec = 120
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
If you published to npm and want the simplest setup:
|
|
148
|
+
|
|
149
|
+
```toml
|
|
150
|
+
[mcp_servers.web_search]
|
|
151
|
+
command = "npx"
|
|
152
|
+
args = ["-y", "oremus-web-search"]
|
|
153
|
+
env = { "SEARXNG_URL" = "https://search.oremuslabs.app", "TRAFILATURA_MCP_URL" = "https://trafilatura.oremuslabs.app/mcp" }
|
|
154
|
+
startup_timeout_sec = 30
|
|
155
|
+
tool_timeout_sec = 120
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Restart Codex CLI after editing.
|
|
159
|
+
|
|
160
|
+
## Use in Claude Code
|
|
161
|
+
|
|
162
|
+
Add a server entry to your Claude Code MCP config (commonly `.mcp.json` in your project root, or wherever you keep your Claude configuration):
|
|
163
|
+
|
|
164
|
+
### Option A (Release tarball)
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
{
|
|
168
|
+
"mcpServers": {
|
|
169
|
+
"web-search": {
|
|
170
|
+
"command": "npx",
|
|
171
|
+
"args": [
|
|
172
|
+
"-y",
|
|
173
|
+
"https://github.com/Oremus-Labs/web-search-mcp/releases/latest/download/web-search-mcp.tgz"
|
|
174
|
+
],
|
|
175
|
+
"env": {
|
|
176
|
+
"SEARXNG_URL": "https://search.oremuslabs.app",
|
|
177
|
+
"TRAFILATURA_MCP_URL": "https://trafilatura.oremuslabs.app/mcp"
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Option B (npm)
|
|
185
|
+
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"mcpServers": {
|
|
189
|
+
"web-search": {
|
|
190
|
+
"command": "npx",
|
|
191
|
+
"args": ["-y", "oremus-web-search"],
|
|
192
|
+
"env": {
|
|
193
|
+
"SEARXNG_URL": "https://search.oremuslabs.app",
|
|
194
|
+
"TRAFILATURA_MCP_URL": "https://trafilatura.oremuslabs.app/mcp"
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Notes
|
|
202
|
+
|
|
203
|
+
- This server uses stdio transport (default) so it works with MCP clients that launch subprocesses.
|
|
204
|
+
- Trafilatura is called through its MCP Streamable HTTP endpoint; this repo’s Kubernetes deployment exposes it as `svc/searxng-trafilatura-mcp` in namespace `searxng`.
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export function loadConfig(env) {
|
|
2
|
+
const searxngUrl = (env.SEARXNG_URL || "").trim();
|
|
3
|
+
const trafilaturaMcpUrl = (env.TRAFILATURA_MCP_URL || "").trim();
|
|
4
|
+
if (!searxngUrl) {
|
|
5
|
+
throw new Error("Missing required env var: SEARXNG_URL");
|
|
6
|
+
}
|
|
7
|
+
if (!trafilaturaMcpUrl) {
|
|
8
|
+
throw new Error("Missing required env var: TRAFILATURA_MCP_URL");
|
|
9
|
+
}
|
|
10
|
+
return {
|
|
11
|
+
searxngUrl,
|
|
12
|
+
searxngUserAgent: (env.USER_AGENT || "oremus-web-search").trim(),
|
|
13
|
+
trafilaturaMcpUrl,
|
|
14
|
+
trafilaturaBearerToken: (env.TRAFILATURA_BEARER_TOKEN || "").trim() || undefined,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export function normalizeSearxngSearchEndpoint(searxngUrl) {
|
|
18
|
+
const trimmed = searxngUrl.replace(/\/+$/, "");
|
|
19
|
+
if (trimmed.endsWith("/search")) {
|
|
20
|
+
return trimmed;
|
|
21
|
+
}
|
|
22
|
+
return `${trimmed}/search`;
|
|
23
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
5
|
+
import { loadConfig } from "./config.js";
|
|
6
|
+
import { StreamableHttpMcpClient } from "./streamableHttpMcpClient.js";
|
|
7
|
+
import { webSearch } from "./searxng.js";
|
|
8
|
+
import { FETCH_AND_EXTRACT_TOOL, ROTATE_VPN_TOOL, WEB_SEARCH_TOOL, isFetchAndExtractArgs, isRotateVpnArgs, isWebSearchArgs, } from "./types.js";
|
|
9
|
+
import { createConfigResource, createHelpResource } from "./resources.js";
|
|
10
|
+
const packageVersion = "0.1.3";
|
|
11
|
+
async function main() {
|
|
12
|
+
const config = loadConfig(process.env);
|
|
13
|
+
const trafilaturaClient = new StreamableHttpMcpClient({
|
|
14
|
+
url: config.trafilaturaMcpUrl,
|
|
15
|
+
bearerToken: config.trafilaturaBearerToken,
|
|
16
|
+
});
|
|
17
|
+
const server = new Server({ name: "oremus-web-search", version: packageVersion }, {
|
|
18
|
+
capabilities: {
|
|
19
|
+
logging: {},
|
|
20
|
+
resources: {},
|
|
21
|
+
tools: { listChanged: false },
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
25
|
+
return { tools: [WEB_SEARCH_TOOL, FETCH_AND_EXTRACT_TOOL, ROTATE_VPN_TOOL] };
|
|
26
|
+
});
|
|
27
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
28
|
+
const { name, arguments: args } = request.params;
|
|
29
|
+
if (name === "web_search") {
|
|
30
|
+
if (!isWebSearchArgs(args))
|
|
31
|
+
throw new Error("Invalid arguments for web_search");
|
|
32
|
+
const result = await webSearch(config.searxngUrl, config.searxngUserAgent, args);
|
|
33
|
+
return { content: [{ type: "text", text: result }] };
|
|
34
|
+
}
|
|
35
|
+
if (name === "fetch_and_extract") {
|
|
36
|
+
if (!isFetchAndExtractArgs(args))
|
|
37
|
+
throw new Error("Invalid arguments for fetch_and_extract");
|
|
38
|
+
const result = await trafilaturaClient.callTool("fetch_and_extract", {
|
|
39
|
+
url: args.url,
|
|
40
|
+
include_comments: args.include_comments ?? false,
|
|
41
|
+
include_tables: args.include_tables ?? false,
|
|
42
|
+
use_proxy: args.use_proxy ?? true,
|
|
43
|
+
});
|
|
44
|
+
return { content: result.content };
|
|
45
|
+
}
|
|
46
|
+
if (name === "rotate_vpn") {
|
|
47
|
+
if (!isRotateVpnArgs(args))
|
|
48
|
+
throw new Error("Invalid arguments for rotate_vpn");
|
|
49
|
+
const result = await trafilaturaClient.callTool("rotate_vpn", {});
|
|
50
|
+
return { content: result.content };
|
|
51
|
+
}
|
|
52
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
53
|
+
});
|
|
54
|
+
server.setRequestHandler(ListResourcesRequestSchema, async () => {
|
|
55
|
+
return {
|
|
56
|
+
resources: [
|
|
57
|
+
{
|
|
58
|
+
uri: "config://server-config",
|
|
59
|
+
mimeType: "application/json",
|
|
60
|
+
name: "Server Configuration",
|
|
61
|
+
description: "Current server configuration and environment variables",
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
uri: "help://usage-guide",
|
|
65
|
+
mimeType: "text/markdown",
|
|
66
|
+
name: "Usage Guide",
|
|
67
|
+
description: "How to use this server effectively",
|
|
68
|
+
},
|
|
69
|
+
],
|
|
70
|
+
};
|
|
71
|
+
});
|
|
72
|
+
server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
73
|
+
const { uri } = request.params;
|
|
74
|
+
if (uri === "config://server-config") {
|
|
75
|
+
return {
|
|
76
|
+
contents: [{ uri, mimeType: "application/json", text: createConfigResource(config) }],
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
if (uri === "help://usage-guide") {
|
|
80
|
+
return {
|
|
81
|
+
contents: [{ uri, mimeType: "text/markdown", text: createHelpResource() }],
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
throw new Error(`Unknown resource: ${uri}`);
|
|
85
|
+
});
|
|
86
|
+
if (process.stdin.isTTY) {
|
|
87
|
+
console.log(`oremus-web-search v${packageVersion} - Ready`);
|
|
88
|
+
console.log(`SearXNG URL: ${config.searxngUrl}`);
|
|
89
|
+
console.log(`Trafilatura MCP URL: ${config.trafilaturaMcpUrl}`);
|
|
90
|
+
console.log("Waiting for MCP client connection via STDIO...\n");
|
|
91
|
+
}
|
|
92
|
+
const transport = new StdioServerTransport();
|
|
93
|
+
await server.connect(transport);
|
|
94
|
+
}
|
|
95
|
+
process.on("uncaughtException", (error) => {
|
|
96
|
+
console.error("Uncaught Exception:", error);
|
|
97
|
+
process.exit(1);
|
|
98
|
+
});
|
|
99
|
+
process.on("unhandledRejection", (reason) => {
|
|
100
|
+
console.error("Unhandled Rejection:", reason);
|
|
101
|
+
process.exit(1);
|
|
102
|
+
});
|
|
103
|
+
main().catch((error) => {
|
|
104
|
+
console.error("Failed to start server:", error);
|
|
105
|
+
process.exit(1);
|
|
106
|
+
});
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export function createConfigResource(config) {
|
|
2
|
+
return JSON.stringify({
|
|
3
|
+
searxngUrl: config.searxngUrl,
|
|
4
|
+
searxngUserAgent: config.searxngUserAgent,
|
|
5
|
+
trafilaturaMcpUrl: config.trafilaturaMcpUrl,
|
|
6
|
+
trafilaturaBearerTokenConfigured: Boolean(config.trafilaturaBearerToken),
|
|
7
|
+
}, null, 2);
|
|
8
|
+
}
|
|
9
|
+
export function createHelpResource() {
|
|
10
|
+
return `# oremus-web-search
|
|
11
|
+
|
|
12
|
+
This MCP server exposes tools:
|
|
13
|
+
|
|
14
|
+
- \`web_search\` → calls the configured SearXNG instance JSON API.
|
|
15
|
+
- \`fetch_and_extract\` → delegates to the configured Trafilatura MCP server (Streamable HTTP).
|
|
16
|
+
- \`rotate_vpn\` → asks Trafilatura to rotate its VPN/proxy egress.
|
|
17
|
+
|
|
18
|
+
## Required environment variables
|
|
19
|
+
|
|
20
|
+
- \`SEARXNG_URL\`: Base URL of your SearXNG instance (e.g. \`https://search.oremuslabs.app\`).
|
|
21
|
+
- \`TRAFILATURA_MCP_URL\`: MCP endpoint URL for Trafilatura (must include the MCP path, e.g. \`http://...:8090/mcp\`).
|
|
22
|
+
|
|
23
|
+
## Optional environment variables
|
|
24
|
+
|
|
25
|
+
- \`USER_AGENT\`: User-Agent string for SearXNG HTTP requests.
|
|
26
|
+
- \`TRAFILATURA_BEARER_TOKEN\`: If your Trafilatura MCP endpoint requires Authorization.
|
|
27
|
+
`;
|
|
28
|
+
}
|
package/dist/searxng.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { normalizeSearxngSearchEndpoint } from "./config.js";
|
|
2
|
+
function formatResults(results) {
|
|
3
|
+
return results
|
|
4
|
+
.map((r) => {
|
|
5
|
+
const title = r.title ?? "";
|
|
6
|
+
const description = r.content ?? "";
|
|
7
|
+
const url = r.url ?? "";
|
|
8
|
+
const score = typeof r.score === "number" ? r.score.toFixed(3) : r.score ?? "";
|
|
9
|
+
return [
|
|
10
|
+
`Title: ${title}`,
|
|
11
|
+
`Description: ${description}`,
|
|
12
|
+
`URL: ${url}`,
|
|
13
|
+
`Relevance Score: ${score}`,
|
|
14
|
+
].join("\n");
|
|
15
|
+
})
|
|
16
|
+
.join("\n\n");
|
|
17
|
+
}
|
|
18
|
+
export async function webSearch(searxngUrl, userAgent, args) {
|
|
19
|
+
const endpoint = normalizeSearxngSearchEndpoint(searxngUrl);
|
|
20
|
+
const url = new URL(endpoint);
|
|
21
|
+
url.searchParams.set("q", args.query);
|
|
22
|
+
url.searchParams.set("format", "json");
|
|
23
|
+
if (args.pageno !== undefined)
|
|
24
|
+
url.searchParams.set("pageno", String(args.pageno));
|
|
25
|
+
if (args.time_range)
|
|
26
|
+
url.searchParams.set("time_range", args.time_range);
|
|
27
|
+
if (args.language)
|
|
28
|
+
url.searchParams.set("language", args.language);
|
|
29
|
+
if (args.safesearch)
|
|
30
|
+
url.searchParams.set("safesearch", args.safesearch);
|
|
31
|
+
const resp = await fetch(url.toString(), {
|
|
32
|
+
method: "GET",
|
|
33
|
+
headers: {
|
|
34
|
+
"User-Agent": userAgent,
|
|
35
|
+
Accept: "application/json",
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
if (!resp.ok) {
|
|
39
|
+
const text = await resp.text().catch(() => "");
|
|
40
|
+
throw new Error(`SearXNG HTTP ${resp.status}: ${text.slice(0, 500)}`);
|
|
41
|
+
}
|
|
42
|
+
const data = (await resp.json());
|
|
43
|
+
const results = data.results ?? [];
|
|
44
|
+
if (!results.length) {
|
|
45
|
+
return "No results.";
|
|
46
|
+
}
|
|
47
|
+
return formatResults(results);
|
|
48
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
function contentTypeBase(value) {
|
|
2
|
+
if (!value)
|
|
3
|
+
return "";
|
|
4
|
+
return value.split(";")[0]?.trim().toLowerCase() ?? "";
|
|
5
|
+
}
|
|
6
|
+
async function readAllText(stream) {
|
|
7
|
+
if (!stream)
|
|
8
|
+
return "";
|
|
9
|
+
const reader = stream.getReader();
|
|
10
|
+
const decoder = new TextDecoder();
|
|
11
|
+
let out = "";
|
|
12
|
+
while (true) {
|
|
13
|
+
const { done, value } = await reader.read();
|
|
14
|
+
if (done)
|
|
15
|
+
break;
|
|
16
|
+
out += decoder.decode(value, { stream: true });
|
|
17
|
+
}
|
|
18
|
+
out += decoder.decode();
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
function parseSseMessages(sseText) {
|
|
22
|
+
const events = sseText.split(/\r?\n\r?\n/).map((e) => e.trim()).filter(Boolean);
|
|
23
|
+
const messages = [];
|
|
24
|
+
for (const event of events) {
|
|
25
|
+
const lines = event.split(/\r?\n/);
|
|
26
|
+
for (const line of lines) {
|
|
27
|
+
if (line.startsWith("data:")) {
|
|
28
|
+
const data = line.slice("data:".length).trim();
|
|
29
|
+
if (data)
|
|
30
|
+
messages.push(data);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return messages;
|
|
35
|
+
}
|
|
36
|
+
export class StreamableHttpMcpClient {
|
|
37
|
+
url;
|
|
38
|
+
authorization;
|
|
39
|
+
sessionId;
|
|
40
|
+
protocolVersion;
|
|
41
|
+
nextId = 1;
|
|
42
|
+
initializing;
|
|
43
|
+
constructor(options) {
|
|
44
|
+
this.url = options.url;
|
|
45
|
+
this.authorization = options.bearerToken ? `Bearer ${options.bearerToken}` : undefined;
|
|
46
|
+
}
|
|
47
|
+
headers(extra) {
|
|
48
|
+
const headers = {
|
|
49
|
+
Accept: "application/json, text/event-stream",
|
|
50
|
+
"Content-Type": "application/json",
|
|
51
|
+
...(extra ?? {}),
|
|
52
|
+
};
|
|
53
|
+
if (this.authorization)
|
|
54
|
+
headers.Authorization = this.authorization;
|
|
55
|
+
if (this.sessionId)
|
|
56
|
+
headers["mcp-session-id"] = this.sessionId;
|
|
57
|
+
if (this.protocolVersion)
|
|
58
|
+
headers["mcp-protocol-version"] = this.protocolVersion;
|
|
59
|
+
return headers;
|
|
60
|
+
}
|
|
61
|
+
async post(message) {
|
|
62
|
+
const resp = await fetch(this.url, {
|
|
63
|
+
method: "POST",
|
|
64
|
+
headers: this.headers(),
|
|
65
|
+
body: JSON.stringify(message),
|
|
66
|
+
});
|
|
67
|
+
const newSessionId = resp.headers.get("mcp-session-id");
|
|
68
|
+
if (newSessionId)
|
|
69
|
+
this.sessionId = newSessionId;
|
|
70
|
+
const ct = contentTypeBase(resp.headers.get("content-type"));
|
|
71
|
+
if (resp.status >= 400) {
|
|
72
|
+
const body = await resp.text().catch(() => "");
|
|
73
|
+
throw new Error(`MCP HTTP ${resp.status}: ${body.slice(0, 500)}`);
|
|
74
|
+
}
|
|
75
|
+
// Notifications often return 202 with no body.
|
|
76
|
+
if (!("id" in message)) {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
if (ct === "application/json") {
|
|
80
|
+
return (await resp.json());
|
|
81
|
+
}
|
|
82
|
+
if (ct === "text/event-stream") {
|
|
83
|
+
const sseText = await readAllText(resp.body);
|
|
84
|
+
const datas = parseSseMessages(sseText);
|
|
85
|
+
for (const data of datas) {
|
|
86
|
+
try {
|
|
87
|
+
const parsed = JSON.parse(data);
|
|
88
|
+
if (parsed && parsed.id === message.id) {
|
|
89
|
+
return parsed;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
// ignore malformed lines
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
throw new Error("MCP SSE response did not include a matching JSON-RPC response");
|
|
97
|
+
}
|
|
98
|
+
const body = await resp.text().catch(() => "");
|
|
99
|
+
throw new Error(`Unexpected MCP response content-type '${ct}': ${body.slice(0, 200)}`);
|
|
100
|
+
}
|
|
101
|
+
async ensureInitialized() {
|
|
102
|
+
if (this.initializing)
|
|
103
|
+
return this.initializing;
|
|
104
|
+
if (this.sessionId && this.protocolVersion)
|
|
105
|
+
return;
|
|
106
|
+
this.initializing = (async () => {
|
|
107
|
+
const initId = this.nextId++;
|
|
108
|
+
const initReq = {
|
|
109
|
+
jsonrpc: "2.0",
|
|
110
|
+
id: initId,
|
|
111
|
+
method: "initialize",
|
|
112
|
+
params: {
|
|
113
|
+
protocolVersion: "2025-11-25",
|
|
114
|
+
capabilities: {},
|
|
115
|
+
clientInfo: { name: "oremus-web-search", version: "0.1.3" },
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
const initResp = await this.post(initReq);
|
|
119
|
+
if (!initResp)
|
|
120
|
+
throw new Error("Initialize returned no response");
|
|
121
|
+
if (initResp.error)
|
|
122
|
+
throw new Error(`Initialize error: ${initResp.error.message}`);
|
|
123
|
+
const result = initResp.result;
|
|
124
|
+
const negotiated = result?.protocolVersion;
|
|
125
|
+
if (typeof negotiated === "string")
|
|
126
|
+
this.protocolVersion = negotiated;
|
|
127
|
+
const initialized = {
|
|
128
|
+
jsonrpc: "2.0",
|
|
129
|
+
method: "notifications/initialized",
|
|
130
|
+
params: {},
|
|
131
|
+
};
|
|
132
|
+
await this.post(initialized);
|
|
133
|
+
})().finally(() => {
|
|
134
|
+
this.initializing = undefined;
|
|
135
|
+
});
|
|
136
|
+
return this.initializing;
|
|
137
|
+
}
|
|
138
|
+
async callTool(name, args) {
|
|
139
|
+
await this.ensureInitialized();
|
|
140
|
+
const id = this.nextId++;
|
|
141
|
+
const req = {
|
|
142
|
+
jsonrpc: "2.0",
|
|
143
|
+
id,
|
|
144
|
+
method: "tools/call",
|
|
145
|
+
params: {
|
|
146
|
+
name,
|
|
147
|
+
arguments: args,
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
const resp = await this.post(req);
|
|
151
|
+
if (!resp)
|
|
152
|
+
throw new Error("tools/call returned no response");
|
|
153
|
+
if (resp.error)
|
|
154
|
+
throw new Error(`tools/call error: ${resp.error.message}`);
|
|
155
|
+
return resp.result;
|
|
156
|
+
}
|
|
157
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export function isWebSearchArgs(args) {
|
|
2
|
+
return (typeof args === "object" &&
|
|
3
|
+
args !== null &&
|
|
4
|
+
"query" in args &&
|
|
5
|
+
typeof args.query === "string");
|
|
6
|
+
}
|
|
7
|
+
export const WEB_SEARCH_TOOL = {
|
|
8
|
+
name: "web_search",
|
|
9
|
+
description: "Performs a web search using a SearXNG instance. Returns a formatted text block similar to `searxng_web_search`.",
|
|
10
|
+
inputSchema: {
|
|
11
|
+
type: "object",
|
|
12
|
+
properties: {
|
|
13
|
+
query: {
|
|
14
|
+
type: "string",
|
|
15
|
+
description: "The search query.",
|
|
16
|
+
},
|
|
17
|
+
pageno: {
|
|
18
|
+
type: "number",
|
|
19
|
+
description: "Search page number (starts at 1).",
|
|
20
|
+
default: 1,
|
|
21
|
+
},
|
|
22
|
+
time_range: {
|
|
23
|
+
type: "string",
|
|
24
|
+
description: "Time range of search (day, month, year).",
|
|
25
|
+
enum: ["day", "month", "year"],
|
|
26
|
+
},
|
|
27
|
+
language: {
|
|
28
|
+
type: "string",
|
|
29
|
+
description: "Language code for search results (e.g., 'en', 'fr', 'de'). Default is instance-dependent.",
|
|
30
|
+
default: "all",
|
|
31
|
+
},
|
|
32
|
+
safesearch: {
|
|
33
|
+
type: "string",
|
|
34
|
+
description: "Safe search filter level (0: None, 1: Moderate, 2: Strict).",
|
|
35
|
+
enum: ["0", "1", "2"],
|
|
36
|
+
default: "0",
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
required: ["query"],
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
export function isFetchAndExtractArgs(args) {
|
|
43
|
+
return (typeof args === "object" &&
|
|
44
|
+
args !== null &&
|
|
45
|
+
"url" in args &&
|
|
46
|
+
typeof args.url === "string");
|
|
47
|
+
}
|
|
48
|
+
export const FETCH_AND_EXTRACT_TOOL = {
|
|
49
|
+
name: "fetch_and_extract",
|
|
50
|
+
description: "Fetches a URL and extracts the main content and metadata using the configured Trafilatura MCP server.",
|
|
51
|
+
inputSchema: {
|
|
52
|
+
type: "object",
|
|
53
|
+
properties: {
|
|
54
|
+
url: { type: "string", description: "The URL of the page to extract." },
|
|
55
|
+
include_comments: {
|
|
56
|
+
type: "boolean",
|
|
57
|
+
description: "Whether to include comment sections at the bottom of articles.",
|
|
58
|
+
default: false,
|
|
59
|
+
},
|
|
60
|
+
include_tables: {
|
|
61
|
+
type: "boolean",
|
|
62
|
+
description: "Extract text from HTML <table> elements.",
|
|
63
|
+
default: false,
|
|
64
|
+
},
|
|
65
|
+
use_proxy: {
|
|
66
|
+
type: "boolean",
|
|
67
|
+
description: "Whether the Trafilatura service should route via its proxy pool.",
|
|
68
|
+
default: true,
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
required: ["url"],
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
export function isRotateVpnArgs(args) {
|
|
75
|
+
return args === undefined || (typeof args === "object" && args !== null);
|
|
76
|
+
}
|
|
77
|
+
export const ROTATE_VPN_TOOL = {
|
|
78
|
+
name: "rotate_vpn",
|
|
79
|
+
description: "Requests the Trafilatura service to rotate its VPN/proxy egress (useful when rate-limited).",
|
|
80
|
+
inputSchema: {
|
|
81
|
+
type: "object",
|
|
82
|
+
properties: {},
|
|
83
|
+
additionalProperties: false,
|
|
84
|
+
},
|
|
85
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "oremus-web-search",
|
|
3
|
+
"version": "0.1.3",
|
|
4
|
+
"description": "MCP server that combines SearXNG web search with Trafilatura extraction",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "git+ssh://git@github.com/Oremus-Labs/web-search-mcp.git"
|
|
10
|
+
},
|
|
11
|
+
"homepage": "https://github.com/Oremus-Labs/web-search-mcp",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/Oremus-Labs/web-search-mcp/issues"
|
|
14
|
+
},
|
|
15
|
+
"bin": {
|
|
16
|
+
"oremus-web-search": "dist/index.js"
|
|
17
|
+
},
|
|
18
|
+
"main": "dist/index.js",
|
|
19
|
+
"files": [
|
|
20
|
+
"dist",
|
|
21
|
+
"README.md",
|
|
22
|
+
"LICENSE"
|
|
23
|
+
],
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=20"
|
|
26
|
+
},
|
|
27
|
+
"scripts": {
|
|
28
|
+
"build": "tsc && shx chmod +x dist/*.js",
|
|
29
|
+
"clean": "shx rm -rf dist",
|
|
30
|
+
"dev": "tsx src/index.ts",
|
|
31
|
+
"inspector": "npx @modelcontextprotocol/inspector node dist/index.js",
|
|
32
|
+
"prepack": "npm run clean && npm run build",
|
|
33
|
+
"smoke": "tsx scripts/smoke.ts"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@modelcontextprotocol/sdk": "1.25.2"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/node": "^22.17.2",
|
|
40
|
+
"shx": "^0.4.0",
|
|
41
|
+
"tsx": "^4.20.5",
|
|
42
|
+
"typescript": "^5.8.3"
|
|
43
|
+
}
|
|
44
|
+
}
|