oremus-web-search 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/index.js +10 -1
- package/dist/streamableHttpMcpClient.js +1 -1
- package/dist/types.js +36 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -90,6 +90,13 @@ Input:
|
|
|
90
90
|
- `include_comments` (boolean, optional)
|
|
91
91
|
- `include_tables` (boolean, optional)
|
|
92
92
|
- `use_proxy` (boolean, optional)
|
|
93
|
+
- `max_chars` (number, optional): cap returned text fields
|
|
94
|
+
- `start_char` (number, optional): paging offset used with `max_chars`
|
|
95
|
+
- `plain_text_fallback` (boolean, optional): if text/plain + extraction is empty, return raw body as text
|
|
96
|
+
- `rewrite_github_blob_to_raw` (boolean, optional): rewrite GitHub `.../blob/...` URLs to `raw.githubusercontent.com/...`
|
|
97
|
+
- `fetch_timeout_seconds` (number, optional): per-attempt HTTP timeout
|
|
98
|
+
- `max_fetch_bytes` (number, optional): cap download size (may truncate HTML)
|
|
99
|
+
- `max_total_seconds` (number, optional): best-effort overall time budget
|
|
93
100
|
|
|
94
101
|
Output:
|
|
95
102
|
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import { StreamableHttpMcpClient } from "./streamableHttpMcpClient.js";
|
|
|
7
7
|
import { webSearch } from "./searxng.js";
|
|
8
8
|
import { FETCH_AND_EXTRACT_TOOL, ROTATE_VPN_TOOL, WEB_SEARCH_TOOL, isFetchAndExtractArgs, isRotateVpnArgs, isWebSearchArgs, } from "./types.js";
|
|
9
9
|
import { createConfigResource, createHelpResource } from "./resources.js";
|
|
10
|
-
const packageVersion = "0.1.
|
|
10
|
+
const packageVersion = "0.1.4";
|
|
11
11
|
async function main() {
|
|
12
12
|
const config = loadConfig(process.env);
|
|
13
13
|
const trafilaturaClient = new StreamableHttpMcpClient({
|
|
@@ -40,6 +40,15 @@ async function main() {
|
|
|
40
40
|
include_comments: args.include_comments ?? false,
|
|
41
41
|
include_tables: args.include_tables ?? false,
|
|
42
42
|
use_proxy: args.use_proxy ?? true,
|
|
43
|
+
max_chars: args.max_chars,
|
|
44
|
+
start_char: args.start_char,
|
|
45
|
+
max_fetch_bytes: args.max_fetch_bytes,
|
|
46
|
+
fetch_timeout_seconds: args.fetch_timeout_seconds,
|
|
47
|
+
user_agent: args.user_agent,
|
|
48
|
+
accept_language: args.accept_language,
|
|
49
|
+
plain_text_fallback: args.plain_text_fallback,
|
|
50
|
+
rewrite_github_blob_to_raw: args.rewrite_github_blob_to_raw,
|
|
51
|
+
max_total_seconds: args.max_total_seconds,
|
|
43
52
|
});
|
|
44
53
|
return { content: result.content };
|
|
45
54
|
}
|
|
@@ -112,7 +112,7 @@ export class StreamableHttpMcpClient {
|
|
|
112
112
|
params: {
|
|
113
113
|
protocolVersion: "2025-11-25",
|
|
114
114
|
capabilities: {},
|
|
115
|
-
clientInfo: { name: "oremus-web-search", version: "0.1.
|
|
115
|
+
clientInfo: { name: "oremus-web-search", version: "0.1.4" },
|
|
116
116
|
},
|
|
117
117
|
};
|
|
118
118
|
const initResp = await this.post(initReq);
|
package/dist/types.js
CHANGED
|
@@ -67,6 +67,42 @@ export const FETCH_AND_EXTRACT_TOOL = {
|
|
|
67
67
|
description: "Whether the Trafilatura service should route via its proxy pool.",
|
|
68
68
|
default: true,
|
|
69
69
|
},
|
|
70
|
+
max_chars: {
|
|
71
|
+
type: "number",
|
|
72
|
+
description: "Maximum number of characters to return for extracted text fields.",
|
|
73
|
+
},
|
|
74
|
+
start_char: {
|
|
75
|
+
type: "number",
|
|
76
|
+
description: "Starting character offset for extracted text fields (used with max_chars).",
|
|
77
|
+
},
|
|
78
|
+
max_fetch_bytes: {
|
|
79
|
+
type: "number",
|
|
80
|
+
description: "Maximum number of bytes to download before extraction (may truncate HTML).",
|
|
81
|
+
},
|
|
82
|
+
fetch_timeout_seconds: {
|
|
83
|
+
type: "number",
|
|
84
|
+
description: "HTTP fetch timeout in seconds (per attempt).",
|
|
85
|
+
},
|
|
86
|
+
user_agent: {
|
|
87
|
+
type: "string",
|
|
88
|
+
description: "Override User-Agent header for the upstream fetch.",
|
|
89
|
+
},
|
|
90
|
+
accept_language: {
|
|
91
|
+
type: "string",
|
|
92
|
+
description: "Optional Accept-Language header for the upstream fetch.",
|
|
93
|
+
},
|
|
94
|
+
plain_text_fallback: {
|
|
95
|
+
type: "boolean",
|
|
96
|
+
description: "If the upstream is text/plain and extraction is empty, return the raw body as text.",
|
|
97
|
+
},
|
|
98
|
+
rewrite_github_blob_to_raw: {
|
|
99
|
+
type: "boolean",
|
|
100
|
+
description: "If the URL is a GitHub blob page, rewrite to raw.githubusercontent.com before fetching.",
|
|
101
|
+
},
|
|
102
|
+
max_total_seconds: {
|
|
103
|
+
type: "number",
|
|
104
|
+
description: "Maximum total time budget in seconds for all attempts (best-effort).",
|
|
105
|
+
},
|
|
70
106
|
},
|
|
71
107
|
required: ["url"],
|
|
72
108
|
},
|