oremus-web-search 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,13 @@ An MCP server that exposes:
8
8
 
9
9
  This is designed to be run with `npx` as an MCP server (stdio transport).
10
10
 
11
+ ## Client setup (Codex / Claude / others)
12
+
13
+ - Codex CLI: see “Use in Codex CLI” below.
14
+ - Claude Code: see `web-search-mcp/CLAUDE.md:1` or copy `web-search-mcp/.mcp.json.example:1` to your project as `.mcp.json`.
15
+ - Copilot instructions: see `web-search-mcp/.github/copilot-instructions.md:1`.
16
+ - Gemini instructions: see `web-search-mcp/GEMINI.md:1`.
17
+
11
18
  ## Why this exists
12
19
 
13
20
  - SearXNG is great for finding URLs.
@@ -90,6 +97,13 @@ Input:
90
97
  - `include_comments` (boolean, optional)
91
98
  - `include_tables` (boolean, optional)
92
99
  - `use_proxy` (boolean, optional)
100
+ - `max_chars` (number, optional): cap returned text fields
101
+ - `start_char` (number, optional): paging offset used with `max_chars`
102
+ - `plain_text_fallback` (boolean, optional): if text/plain + extraction is empty, return raw body as text
103
+ - `rewrite_github_blob_to_raw` (boolean, optional): rewrite GitHub `.../blob/...` URLs to `raw.githubusercontent.com/...`
104
+ - `fetch_timeout_seconds` (number, optional): per-attempt HTTP timeout
105
+ - `max_fetch_bytes` (number, optional): cap download size (may truncate HTML)
106
+ - `max_total_seconds` (number, optional): best-effort overall time budget
93
107
 
94
108
  Output:
95
109
 
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import { StreamableHttpMcpClient } from "./streamableHttpMcpClient.js";
7
7
  import { webSearch } from "./searxng.js";
8
8
  import { FETCH_AND_EXTRACT_TOOL, ROTATE_VPN_TOOL, WEB_SEARCH_TOOL, isFetchAndExtractArgs, isRotateVpnArgs, isWebSearchArgs, } from "./types.js";
9
9
  import { createConfigResource, createHelpResource } from "./resources.js";
10
- const packageVersion = "0.1.3";
10
+ const packageVersion = "0.1.5";
11
11
  async function main() {
12
12
  const config = loadConfig(process.env);
13
13
  const trafilaturaClient = new StreamableHttpMcpClient({
@@ -40,6 +40,15 @@ async function main() {
40
40
  include_comments: args.include_comments ?? false,
41
41
  include_tables: args.include_tables ?? false,
42
42
  use_proxy: args.use_proxy ?? true,
43
+ max_chars: args.max_chars,
44
+ start_char: args.start_char,
45
+ max_fetch_bytes: args.max_fetch_bytes,
46
+ fetch_timeout_seconds: args.fetch_timeout_seconds,
47
+ user_agent: args.user_agent,
48
+ accept_language: args.accept_language,
49
+ plain_text_fallback: args.plain_text_fallback,
50
+ rewrite_github_blob_to_raw: args.rewrite_github_blob_to_raw,
51
+ max_total_seconds: args.max_total_seconds,
43
52
  });
44
53
  return { content: result.content };
45
54
  }
@@ -1,3 +1,13 @@
1
+ class McpHttpError extends Error {
2
+ status;
3
+ body;
4
+ constructor(status, body) {
5
+ super(`MCP HTTP ${status}: ${body.slice(0, 500)}`);
6
+ this.name = "McpHttpError";
7
+ this.status = status;
8
+ this.body = body;
9
+ }
10
+ }
1
11
  function contentTypeBase(value) {
2
12
  if (!value)
3
13
  return "";
@@ -44,6 +54,11 @@ export class StreamableHttpMcpClient {
44
54
  this.url = options.url;
45
55
  this.authorization = options.bearerToken ? `Bearer ${options.bearerToken}` : undefined;
46
56
  }
57
+ resetSession() {
58
+ this.sessionId = undefined;
59
+ this.protocolVersion = undefined;
60
+ this.initializing = undefined;
61
+ }
47
62
  headers(extra) {
48
63
  const headers = {
49
64
  Accept: "application/json, text/event-stream",
@@ -70,7 +85,7 @@ export class StreamableHttpMcpClient {
70
85
  const ct = contentTypeBase(resp.headers.get("content-type"));
71
86
  if (resp.status >= 400) {
72
87
  const body = await resp.text().catch(() => "");
73
- throw new Error(`MCP HTTP ${resp.status}: ${body.slice(0, 500)}`);
88
+ throw new McpHttpError(resp.status, body);
74
89
  }
75
90
  // Notifications often return 202 with no body.
76
91
  if (!("id" in message)) {
@@ -110,9 +125,9 @@ export class StreamableHttpMcpClient {
110
125
  id: initId,
111
126
  method: "initialize",
112
127
  params: {
113
- protocolVersion: "2025-11-25",
128
+ protocolVersion: "2025-03-26",
114
129
  capabilities: {},
115
- clientInfo: { name: "oremus-web-search", version: "0.1.3" },
130
+ clientInfo: { name: "oremus-web-search", version: "0.1.5" },
116
131
  },
117
132
  };
118
133
  const initResp = await this.post(initReq);
@@ -136,22 +151,36 @@ export class StreamableHttpMcpClient {
136
151
  return this.initializing;
137
152
  }
138
153
  async callTool(name, args) {
139
- await this.ensureInitialized();
140
- const id = this.nextId++;
141
- const req = {
142
- jsonrpc: "2.0",
143
- id,
144
- method: "tools/call",
145
- params: {
146
- name,
147
- arguments: args,
148
- },
154
+ const attemptCall = async () => {
155
+ await this.ensureInitialized();
156
+ const id = this.nextId++;
157
+ const req = {
158
+ jsonrpc: "2.0",
159
+ id,
160
+ method: "tools/call",
161
+ params: {
162
+ name,
163
+ arguments: args,
164
+ },
165
+ };
166
+ const resp = await this.post(req);
167
+ if (!resp)
168
+ throw new Error("tools/call returned no response");
169
+ if (resp.error)
170
+ throw new Error(`tools/call error: ${resp.error.message}`);
171
+ return resp.result;
149
172
  };
150
- const resp = await this.post(req);
151
- if (!resp)
152
- throw new Error("tools/call returned no response");
153
- if (resp.error)
154
- throw new Error(`tools/call error: ${resp.error.message}`);
155
- return resp.result;
173
+ try {
174
+ return await attemptCall();
175
+ }
176
+ catch (err) {
177
+ if (err instanceof McpHttpError &&
178
+ err.status === 400 &&
179
+ /no valid session id/i.test(err.body)) {
180
+ this.resetSession();
181
+ return await attemptCall();
182
+ }
183
+ throw err;
184
+ }
156
185
  }
157
186
  }
package/dist/types.js CHANGED
@@ -67,6 +67,42 @@ export const FETCH_AND_EXTRACT_TOOL = {
67
67
  description: "Whether the Trafilatura service should route via its proxy pool.",
68
68
  default: true,
69
69
  },
70
+ max_chars: {
71
+ type: "number",
72
+ description: "Maximum number of characters to return for extracted text fields.",
73
+ },
74
+ start_char: {
75
+ type: "number",
76
+ description: "Starting character offset for extracted text fields (used with max_chars).",
77
+ },
78
+ max_fetch_bytes: {
79
+ type: "number",
80
+ description: "Maximum number of bytes to download before extraction (may truncate HTML).",
81
+ },
82
+ fetch_timeout_seconds: {
83
+ type: "number",
84
+ description: "HTTP fetch timeout in seconds (per attempt).",
85
+ },
86
+ user_agent: {
87
+ type: "string",
88
+ description: "Override User-Agent header for the upstream fetch.",
89
+ },
90
+ accept_language: {
91
+ type: "string",
92
+ description: "Optional Accept-Language header for the upstream fetch.",
93
+ },
94
+ plain_text_fallback: {
95
+ type: "boolean",
96
+ description: "If the upstream is text/plain and extraction is empty, return the raw body as text.",
97
+ },
98
+ rewrite_github_blob_to_raw: {
99
+ type: "boolean",
100
+ description: "If the URL is a GitHub blob page, rewrite to raw.githubusercontent.com before fetching.",
101
+ },
102
+ max_total_seconds: {
103
+ type: "number",
104
+ description: "Maximum total time budget in seconds for all attempts (best-effort).",
105
+ },
70
106
  },
71
107
  required: ["url"],
72
108
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oremus-web-search",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "MCP server that combines SearXNG web search with Trafilatura extraction",
5
5
  "license": "MIT",
6
6
  "type": "module",