oremus-web-search 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/dist/index.js +10 -1
- package/dist/streamableHttpMcpClient.js +48 -19
- package/dist/types.js +36 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,6 +8,13 @@ An MCP server that exposes:
|
|
|
8
8
|
|
|
9
9
|
This is designed to be run with `npx` as an MCP server (stdio transport).
|
|
10
10
|
|
|
11
|
+
## Client setup (Codex / Claude / others)
|
|
12
|
+
|
|
13
|
+
- Codex CLI: see “Use in Codex CLI” below.
|
|
14
|
+
- Claude Code: see `web-search-mcp/CLAUDE.md:1` or copy `web-search-mcp/.mcp.json.example:1` to your project as `.mcp.json`.
|
|
15
|
+
- Copilot instructions: see `web-search-mcp/.github/copilot-instructions.md:1`.
|
|
16
|
+
- Gemini instructions: see `web-search-mcp/GEMINI.md:1`.
|
|
17
|
+
|
|
11
18
|
## Why this exists
|
|
12
19
|
|
|
13
20
|
- SearXNG is great for finding URLs.
|
|
@@ -90,6 +97,13 @@ Input:
|
|
|
90
97
|
- `include_comments` (boolean, optional)
|
|
91
98
|
- `include_tables` (boolean, optional)
|
|
92
99
|
- `use_proxy` (boolean, optional)
|
|
100
|
+
- `max_chars` (number, optional): cap returned text fields
|
|
101
|
+
- `start_char` (number, optional): paging offset used with `max_chars`
|
|
102
|
+
- `plain_text_fallback` (boolean, optional): if text/plain + extraction is empty, return raw body as text
|
|
103
|
+
- `rewrite_github_blob_to_raw` (boolean, optional): rewrite GitHub `.../blob/...` URLs to `raw.githubusercontent.com/...`
|
|
104
|
+
- `fetch_timeout_seconds` (number, optional): per-attempt HTTP timeout
|
|
105
|
+
- `max_fetch_bytes` (number, optional): cap download size (may truncate HTML)
|
|
106
|
+
- `max_total_seconds` (number, optional): best-effort overall time budget
|
|
93
107
|
|
|
94
108
|
Output:
|
|
95
109
|
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import { StreamableHttpMcpClient } from "./streamableHttpMcpClient.js";
|
|
|
7
7
|
import { webSearch } from "./searxng.js";
|
|
8
8
|
import { FETCH_AND_EXTRACT_TOOL, ROTATE_VPN_TOOL, WEB_SEARCH_TOOL, isFetchAndExtractArgs, isRotateVpnArgs, isWebSearchArgs, } from "./types.js";
|
|
9
9
|
import { createConfigResource, createHelpResource } from "./resources.js";
|
|
10
|
-
const packageVersion = "0.1.
|
|
10
|
+
const packageVersion = "0.1.5";
|
|
11
11
|
async function main() {
|
|
12
12
|
const config = loadConfig(process.env);
|
|
13
13
|
const trafilaturaClient = new StreamableHttpMcpClient({
|
|
@@ -40,6 +40,15 @@ async function main() {
|
|
|
40
40
|
include_comments: args.include_comments ?? false,
|
|
41
41
|
include_tables: args.include_tables ?? false,
|
|
42
42
|
use_proxy: args.use_proxy ?? true,
|
|
43
|
+
max_chars: args.max_chars,
|
|
44
|
+
start_char: args.start_char,
|
|
45
|
+
max_fetch_bytes: args.max_fetch_bytes,
|
|
46
|
+
fetch_timeout_seconds: args.fetch_timeout_seconds,
|
|
47
|
+
user_agent: args.user_agent,
|
|
48
|
+
accept_language: args.accept_language,
|
|
49
|
+
plain_text_fallback: args.plain_text_fallback,
|
|
50
|
+
rewrite_github_blob_to_raw: args.rewrite_github_blob_to_raw,
|
|
51
|
+
max_total_seconds: args.max_total_seconds,
|
|
43
52
|
});
|
|
44
53
|
return { content: result.content };
|
|
45
54
|
}
|
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
class McpHttpError extends Error {
|
|
2
|
+
status;
|
|
3
|
+
body;
|
|
4
|
+
constructor(status, body) {
|
|
5
|
+
super(`MCP HTTP ${status}: ${body.slice(0, 500)}`);
|
|
6
|
+
this.name = "McpHttpError";
|
|
7
|
+
this.status = status;
|
|
8
|
+
this.body = body;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
1
11
|
function contentTypeBase(value) {
|
|
2
12
|
if (!value)
|
|
3
13
|
return "";
|
|
@@ -44,6 +54,11 @@ export class StreamableHttpMcpClient {
|
|
|
44
54
|
this.url = options.url;
|
|
45
55
|
this.authorization = options.bearerToken ? `Bearer ${options.bearerToken}` : undefined;
|
|
46
56
|
}
|
|
57
|
+
resetSession() {
|
|
58
|
+
this.sessionId = undefined;
|
|
59
|
+
this.protocolVersion = undefined;
|
|
60
|
+
this.initializing = undefined;
|
|
61
|
+
}
|
|
47
62
|
headers(extra) {
|
|
48
63
|
const headers = {
|
|
49
64
|
Accept: "application/json, text/event-stream",
|
|
@@ -70,7 +85,7 @@ export class StreamableHttpMcpClient {
|
|
|
70
85
|
const ct = contentTypeBase(resp.headers.get("content-type"));
|
|
71
86
|
if (resp.status >= 400) {
|
|
72
87
|
const body = await resp.text().catch(() => "");
|
|
73
|
-
throw new
|
|
88
|
+
throw new McpHttpError(resp.status, body);
|
|
74
89
|
}
|
|
75
90
|
// Notifications often return 202 with no body.
|
|
76
91
|
if (!("id" in message)) {
|
|
@@ -110,9 +125,9 @@ export class StreamableHttpMcpClient {
|
|
|
110
125
|
id: initId,
|
|
111
126
|
method: "initialize",
|
|
112
127
|
params: {
|
|
113
|
-
protocolVersion: "2025-
|
|
128
|
+
protocolVersion: "2025-03-26",
|
|
114
129
|
capabilities: {},
|
|
115
|
-
clientInfo: { name: "oremus-web-search", version: "0.1.
|
|
130
|
+
clientInfo: { name: "oremus-web-search", version: "0.1.5" },
|
|
116
131
|
},
|
|
117
132
|
};
|
|
118
133
|
const initResp = await this.post(initReq);
|
|
@@ -136,22 +151,36 @@ export class StreamableHttpMcpClient {
|
|
|
136
151
|
return this.initializing;
|
|
137
152
|
}
|
|
138
153
|
async callTool(name, args) {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
154
|
+
const attemptCall = async () => {
|
|
155
|
+
await this.ensureInitialized();
|
|
156
|
+
const id = this.nextId++;
|
|
157
|
+
const req = {
|
|
158
|
+
jsonrpc: "2.0",
|
|
159
|
+
id,
|
|
160
|
+
method: "tools/call",
|
|
161
|
+
params: {
|
|
162
|
+
name,
|
|
163
|
+
arguments: args,
|
|
164
|
+
},
|
|
165
|
+
};
|
|
166
|
+
const resp = await this.post(req);
|
|
167
|
+
if (!resp)
|
|
168
|
+
throw new Error("tools/call returned no response");
|
|
169
|
+
if (resp.error)
|
|
170
|
+
throw new Error(`tools/call error: ${resp.error.message}`);
|
|
171
|
+
return resp.result;
|
|
149
172
|
};
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
173
|
+
try {
|
|
174
|
+
return await attemptCall();
|
|
175
|
+
}
|
|
176
|
+
catch (err) {
|
|
177
|
+
if (err instanceof McpHttpError &&
|
|
178
|
+
err.status === 400 &&
|
|
179
|
+
/no valid session id/i.test(err.body)) {
|
|
180
|
+
this.resetSession();
|
|
181
|
+
return await attemptCall();
|
|
182
|
+
}
|
|
183
|
+
throw err;
|
|
184
|
+
}
|
|
156
185
|
}
|
|
157
186
|
}
|
package/dist/types.js
CHANGED
|
@@ -67,6 +67,42 @@ export const FETCH_AND_EXTRACT_TOOL = {
|
|
|
67
67
|
description: "Whether the Trafilatura service should route via its proxy pool.",
|
|
68
68
|
default: true,
|
|
69
69
|
},
|
|
70
|
+
max_chars: {
|
|
71
|
+
type: "number",
|
|
72
|
+
description: "Maximum number of characters to return for extracted text fields.",
|
|
73
|
+
},
|
|
74
|
+
start_char: {
|
|
75
|
+
type: "number",
|
|
76
|
+
description: "Starting character offset for extracted text fields (used with max_chars).",
|
|
77
|
+
},
|
|
78
|
+
max_fetch_bytes: {
|
|
79
|
+
type: "number",
|
|
80
|
+
description: "Maximum number of bytes to download before extraction (may truncate HTML).",
|
|
81
|
+
},
|
|
82
|
+
fetch_timeout_seconds: {
|
|
83
|
+
type: "number",
|
|
84
|
+
description: "HTTP fetch timeout in seconds (per attempt).",
|
|
85
|
+
},
|
|
86
|
+
user_agent: {
|
|
87
|
+
type: "string",
|
|
88
|
+
description: "Override User-Agent header for the upstream fetch.",
|
|
89
|
+
},
|
|
90
|
+
accept_language: {
|
|
91
|
+
type: "string",
|
|
92
|
+
description: "Optional Accept-Language header for the upstream fetch.",
|
|
93
|
+
},
|
|
94
|
+
plain_text_fallback: {
|
|
95
|
+
type: "boolean",
|
|
96
|
+
description: "If the upstream is text/plain and extraction is empty, return the raw body as text.",
|
|
97
|
+
},
|
|
98
|
+
rewrite_github_blob_to_raw: {
|
|
99
|
+
type: "boolean",
|
|
100
|
+
description: "If the URL is a GitHub blob page, rewrite to raw.githubusercontent.com before fetching.",
|
|
101
|
+
},
|
|
102
|
+
max_total_seconds: {
|
|
103
|
+
type: "number",
|
|
104
|
+
description: "Maximum total time budget in seconds for all attempts (best-effort).",
|
|
105
|
+
},
|
|
70
106
|
},
|
|
71
107
|
required: ["url"],
|
|
72
108
|
},
|