@endday/search-mcp 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +4724 -0
- package/dist/search-mcp.js +4715 -0
- package/package.json +14 -14
- package/data/blocklist.generated.js +0 -2
- package/envs.js +0 -129
- package/index.js +0 -6
- package/mcp/search-mcp.js +0 -8
- package/src/content/extract.impl.js +0 -228
- package/src/content/extract.js +0 -1
- package/src/content/fetch.impl.js +0 -400
- package/src/content/fetch.js +0 -1
- package/src/core/crypto.js +0 -7
- package/src/core/errors.impl.js +0 -52
- package/src/core/errors.js +0 -1
- package/src/core/html.impl.js +0 -69
- package/src/core/html.js +0 -1
- package/src/mcp/config.js +0 -75
- package/src/mcp/format.js +0 -44
- package/src/mcp/index.js +0 -10
- package/src/mcp/local/content.js +0 -26
- package/src/mcp/local/search.js +0 -233
- package/src/mcp/schemas.js +0 -132
- package/src/mcp/server.js +0 -97
- package/src/mcp/tools/content.js +0 -31
- package/src/mcp/tools/jinaContent.js +0 -38
- package/src/mcp/tools/newsSearch.js +0 -22
- package/src/mcp/tools/webSearch.js +0 -57
- package/src/platform/auth.impl.js +0 -166
- package/src/platform/auth.js +0 -1
- package/src/platform/cache.impl.js +0 -166
- package/src/platform/cache.js +0 -1
- package/src/platform/health.impl.js +0 -133
- package/src/platform/health.js +0 -1
- package/src/platform/http.impl.js +0 -108
- package/src/platform/http.js +0 -1
- package/src/platform/logger.impl.js +0 -51
- package/src/platform/logger.js +0 -1
- package/src/platform/metrics.impl.js +0 -43
- package/src/platform/metrics.js +0 -1
- package/src/platform/nodeHttpClient.js +0 -104
- package/src/platform/rateLimit.impl.js +0 -141
- package/src/platform/rateLimit.js +0 -1
- package/src/platform/requestContext.impl.js +0 -10
- package/src/platform/requestContext.js +0 -1
- package/src/platform/session.impl.js +0 -198
- package/src/platform/session.js +0 -1
- package/src/platform/stateKv.impl.js +0 -18
- package/src/platform/stateKv.js +0 -1
- package/src/platform/tasks.impl.js +0 -17
- package/src/platform/tasks.js +0 -1
- package/src/routes/requestParams.impl.js +0 -12
- package/src/routes/requestParams.js +0 -1
- package/src/search/engineRegistry.impl.js +0 -117
- package/src/search/engineRegistry.js +0 -1
- package/src/search/engineRequest.impl.js +0 -377
- package/src/search/engineRequest.js +0 -1
- package/src/search/engineUtils.impl.js +0 -227
- package/src/search/engineUtils.js +0 -1
- package/src/search/engines/baidu.impl.js +0 -145
- package/src/search/engines/baidu.js +0 -2
- package/src/search/engines/bing.impl.js +0 -509
- package/src/search/engines/bing.js +0 -2
- package/src/search/engines/brave.impl.js +0 -223
- package/src/search/engines/brave.js +0 -2
- package/src/search/engines/duckduckgo.impl.js +0 -164
- package/src/search/engines/duckduckgo.js +0 -2
- package/src/search/engines/mojeek.impl.js +0 -115
- package/src/search/engines/mojeek.js +0 -2
- package/src/search/engines/qwant.impl.js +0 -188
- package/src/search/engines/qwant.js +0 -2
- package/src/search/engines/startpage.impl.js +0 -237
- package/src/search/engines/startpage.js +0 -2
- package/src/search/engines/toutiao.impl.js +0 -265
- package/src/search/engines/toutiao.js +0 -2
- package/src/search/engines/yahoo.impl.js +0 -379
- package/src/search/engines/yahoo.js +0 -2
- package/src/search/gateway.impl.js +0 -423
- package/src/search/gateway.js +0 -1
- package/src/search/ranking.impl.js +0 -381
- package/src/search/ranking.js +0 -1
- package/src/search/requestPolicy.impl.js +0 -137
- package/src/search/requestPolicy.js +0 -1
- package/src/search/upstreamSession.impl.js +0 -148
- package/src/search/upstreamSession.js +0 -1
- /package/{index.d.ts → dist/index.d.ts} +0 -0
|
@@ -1,400 +0,0 @@
|
|
|
1
|
-
import { ApiError } from "../core/errors.js";
|
|
2
|
-
import { extractPageContent } from "./extract.js";
|
|
3
|
-
import { getRandomBrowserProfile } from "../search/engineUtils.js";
|
|
4
|
-
import { normalizePositiveInteger } from "../routes/requestParams.js";
|
|
5
|
-
import { fetchWithOptionalCurlImpersonate } from "../platform/nodeHttpClient.js";
|
|
6
|
-
|
|
7
|
-
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
8
|
-
const MAX_SAFE_REDIRECTS = 5;
|
|
9
|
-
|
|
10
|
-
function parseIpv4Address(value) {
|
|
11
|
-
const match = String(value || "").match(
|
|
12
|
-
/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/
|
|
13
|
-
);
|
|
14
|
-
|
|
15
|
-
if (!match) {
|
|
16
|
-
return null;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const octets = match.slice(1).map((part) => Number.parseInt(part, 10));
|
|
20
|
-
return {
|
|
21
|
-
octets,
|
|
22
|
-
valid: octets.every((part) => part >= 0 && part <= 255),
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
function isBlockedIpv4Address(octets) {
|
|
27
|
-
const [first, second] = octets;
|
|
28
|
-
|
|
29
|
-
return (
|
|
30
|
-
first === 0 ||
|
|
31
|
-
first === 10 ||
|
|
32
|
-
first === 127 ||
|
|
33
|
-
(first === 100 && second >= 64 && second <= 127) ||
|
|
34
|
-
(first === 169 && second === 254) ||
|
|
35
|
-
(first === 172 && second >= 16 && second <= 31) ||
|
|
36
|
-
(first === 192 && second === 0) ||
|
|
37
|
-
(first === 192 && second === 168) ||
|
|
38
|
-
(first === 198 && (second === 18 || second === 19)) ||
|
|
39
|
-
first >= 224
|
|
40
|
-
);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
function parseIpv6Hextets(value) {
|
|
44
|
-
let normalized = String(value || "")
|
|
45
|
-
.toLowerCase()
|
|
46
|
-
.replace(/^\[|\]$/g, "")
|
|
47
|
-
.split("%")[0];
|
|
48
|
-
|
|
49
|
-
if (!normalized.includes(":")) {
|
|
50
|
-
return null;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
const embeddedIpv4Match = normalized.match(/(\d{1,3}(?:\.\d{1,3}){3})$/);
|
|
54
|
-
if (embeddedIpv4Match) {
|
|
55
|
-
const parsedIpv4 = parseIpv4Address(embeddedIpv4Match[1]);
|
|
56
|
-
if (!parsedIpv4?.valid) {
|
|
57
|
-
return null;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
const [a, b, c, d] = parsedIpv4.octets;
|
|
61
|
-
const replacement = `${((a << 8) | b).toString(16)}:${(
|
|
62
|
-
(c << 8) |
|
|
63
|
-
d
|
|
64
|
-
).toString(16)}`;
|
|
65
|
-
normalized =
|
|
66
|
-
normalized.slice(0, normalized.length - embeddedIpv4Match[1].length) +
|
|
67
|
-
replacement;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const compressionParts = normalized.split("::");
|
|
71
|
-
if (compressionParts.length > 2) {
|
|
72
|
-
return null;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const hasCompression = compressionParts.length === 2;
|
|
76
|
-
const leftParts = compressionParts[0] ? compressionParts[0].split(":") : [];
|
|
77
|
-
const rightParts =
|
|
78
|
-
hasCompression && compressionParts[1] ? compressionParts[1].split(":") : [];
|
|
79
|
-
const parts = [...leftParts, ...rightParts];
|
|
80
|
-
|
|
81
|
-
if (parts.some((part) => !/^[0-9a-f]{1,4}$/.test(part))) {
|
|
82
|
-
return null;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
if (!hasCompression && parts.length !== 8) {
|
|
86
|
-
return null;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const fillCount = hasCompression ? 8 - parts.length : 0;
|
|
90
|
-
if (fillCount < 1 && hasCompression) {
|
|
91
|
-
return null;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
return [
|
|
95
|
-
...leftParts.map((part) => Number.parseInt(part, 16)),
|
|
96
|
-
...Array(fillCount).fill(0),
|
|
97
|
-
...rightParts.map((part) => Number.parseInt(part, 16)),
|
|
98
|
-
];
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function getEmbeddedIpv4FromIpv6(hextets) {
|
|
102
|
-
if (!hextets || hextets.length !== 8) {
|
|
103
|
-
return null;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
const lastIpv4 = [
|
|
107
|
-
hextets[6] >> 8,
|
|
108
|
-
hextets[6] & 255,
|
|
109
|
-
hextets[7] >> 8,
|
|
110
|
-
hextets[7] & 255,
|
|
111
|
-
];
|
|
112
|
-
|
|
113
|
-
const isIpv4Mapped =
|
|
114
|
-
hextets.slice(0, 5).every((part) => part === 0) && hextets[5] === 0xffff;
|
|
115
|
-
const isIpv4Compatible = hextets.slice(0, 6).every((part) => part === 0);
|
|
116
|
-
const isNat64WellKnown =
|
|
117
|
-
hextets[0] === 0x0064 &&
|
|
118
|
-
hextets[1] === 0xff9b &&
|
|
119
|
-
hextets.slice(2, 6).every((part) => part === 0);
|
|
120
|
-
|
|
121
|
-
return isIpv4Mapped || isIpv4Compatible || isNat64WellKnown ? lastIpv4 : null;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
function isBlockedIpv6Address(hextets) {
|
|
125
|
-
if (!hextets || hextets.length !== 8) {
|
|
126
|
-
return false;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const first = hextets[0];
|
|
130
|
-
const embeddedIpv4 = getEmbeddedIpv4FromIpv6(hextets);
|
|
131
|
-
|
|
132
|
-
return (
|
|
133
|
-
hextets.every((part) => part === 0) ||
|
|
134
|
-
hextets.slice(0, 7).every((part) => part === 0) ||
|
|
135
|
-
(first & 0xfe00) === 0xfc00 ||
|
|
136
|
-
(first & 0xffc0) === 0xfe80 ||
|
|
137
|
-
(first & 0xff00) === 0xff00 ||
|
|
138
|
-
(embeddedIpv4 ? isBlockedIpv4Address(embeddedIpv4) : false)
|
|
139
|
-
);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
function isBlockedTargetHostname(hostname) {
|
|
143
|
-
const normalized = hostname.toLowerCase().replace(/^\[|\]$/g, "");
|
|
144
|
-
|
|
145
|
-
if (
|
|
146
|
-
normalized === "localhost" ||
|
|
147
|
-
normalized.endsWith(".localhost") ||
|
|
148
|
-
normalized.endsWith(".local")
|
|
149
|
-
) {
|
|
150
|
-
return true;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
const parsedIpv4 = parseIpv4Address(normalized);
|
|
154
|
-
if (parsedIpv4) {
|
|
155
|
-
return !parsedIpv4.valid || isBlockedIpv4Address(parsedIpv4.octets);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return isBlockedIpv6Address(parseIpv6Hextets(normalized));
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function buildNavigationHeaders(profile) {
|
|
162
|
-
const headers = {
|
|
163
|
-
Accept:
|
|
164
|
-
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
165
|
-
"Accept-Language": "en-US,en;q=0.9",
|
|
166
|
-
"User-Agent": profile.ua,
|
|
167
|
-
"Sec-Fetch-Dest": "document",
|
|
168
|
-
"Sec-Fetch-Mode": "navigate",
|
|
169
|
-
"Sec-Fetch-Site": "none",
|
|
170
|
-
"Sec-Fetch-User": "?1",
|
|
171
|
-
};
|
|
172
|
-
|
|
173
|
-
if (profile.headers?.["sec-ch-ua"]) {
|
|
174
|
-
headers["Sec-Ch-Ua"] = profile.headers["sec-ch-ua"];
|
|
175
|
-
headers["Sec-Ch-Ua-Platform"] = profile.headers["sec-ch-ua-platform"];
|
|
176
|
-
headers["Sec-Ch-Ua-Mobile"] = profile.headers["sec-ch-ua-mobile"];
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return headers;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
export function normalizeTargetUrl(value) {
|
|
183
|
-
const rawUrl = String(value || "").trim();
|
|
184
|
-
|
|
185
|
-
if (!rawUrl) {
|
|
186
|
-
throw new ApiError({
|
|
187
|
-
status: 400,
|
|
188
|
-
code: "MISSING_URL",
|
|
189
|
-
category: "validation",
|
|
190
|
-
message: "Please provide 'url' parameter",
|
|
191
|
-
});
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
let parsedUrl;
|
|
195
|
-
try {
|
|
196
|
-
parsedUrl = new URL(rawUrl);
|
|
197
|
-
} catch (_) {
|
|
198
|
-
throw new ApiError({
|
|
199
|
-
status: 400,
|
|
200
|
-
code: "INVALID_URL",
|
|
201
|
-
category: "validation",
|
|
202
|
-
message: "The 'url' parameter must be a valid URL",
|
|
203
|
-
});
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
|
|
207
|
-
throw new ApiError({
|
|
208
|
-
status: 400,
|
|
209
|
-
code: "INVALID_URL",
|
|
210
|
-
category: "validation",
|
|
211
|
-
message: "Only http and https URLs are supported",
|
|
212
|
-
});
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if (isBlockedTargetHostname(parsedUrl.hostname)) {
|
|
216
|
-
throw new ApiError({
|
|
217
|
-
status: 400,
|
|
218
|
-
code: "INVALID_URL",
|
|
219
|
-
category: "validation",
|
|
220
|
-
message: "Localhost and private network URLs are not supported",
|
|
221
|
-
});
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
return parsedUrl.toString();
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
function getSafeRedirectUrl(response, currentUrl) {
|
|
228
|
-
if (!REDIRECT_STATUSES.has(response.status)) {
|
|
229
|
-
return null;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
const location = response.headers.get("location");
|
|
233
|
-
if (!location) {
|
|
234
|
-
return null;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
return normalizeTargetUrl(new URL(location, currentUrl).toString());
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
async function cancelResponseBody(response) {
|
|
241
|
-
try {
|
|
242
|
-
await response.body?.cancel();
|
|
243
|
-
} catch (_) {
|
|
244
|
-
// best effort
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
async function fetchWithSafeRedirects(targetUrl, init = {}) {
|
|
249
|
-
let currentUrl = normalizeTargetUrl(targetUrl);
|
|
250
|
-
|
|
251
|
-
for (let redirectCount = 0; redirectCount <= MAX_SAFE_REDIRECTS; redirectCount += 1) {
|
|
252
|
-
const response = await fetchWithOptionalCurlImpersonate(currentUrl, {
|
|
253
|
-
...init,
|
|
254
|
-
redirect: "manual",
|
|
255
|
-
});
|
|
256
|
-
const redirectUrl = getSafeRedirectUrl(response, currentUrl);
|
|
257
|
-
|
|
258
|
-
if (!redirectUrl) {
|
|
259
|
-
return response;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
await cancelResponseBody(response);
|
|
263
|
-
currentUrl = redirectUrl;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
throw new ApiError({
|
|
267
|
-
status: 508,
|
|
268
|
-
code: "TOO_MANY_REDIRECTS",
|
|
269
|
-
category: "upstream",
|
|
270
|
-
message: "Target URL redirected too many times",
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
export async function verifySafeRedirectChain(targetUrl) {
|
|
275
|
-
const profile = getRandomBrowserProfile("default");
|
|
276
|
-
const response = await fetchWithSafeRedirects(targetUrl, {
|
|
277
|
-
method: "GET",
|
|
278
|
-
headers: {
|
|
279
|
-
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
280
|
-
Range: "bytes=0-0",
|
|
281
|
-
"User-Agent": profile.ua,
|
|
282
|
-
},
|
|
283
|
-
});
|
|
284
|
-
|
|
285
|
-
await cancelResponseBody(response);
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
async function readResponseTextWithLimit(response, maxBytes) {
|
|
289
|
-
const contentLength = Number.parseInt(
|
|
290
|
-
response.headers.get("content-length") || "0",
|
|
291
|
-
10
|
|
292
|
-
);
|
|
293
|
-
|
|
294
|
-
if (contentLength > maxBytes) {
|
|
295
|
-
throw new ApiError({
|
|
296
|
-
status: 413,
|
|
297
|
-
code: "CONTENT_TOO_LARGE",
|
|
298
|
-
category: "upstream",
|
|
299
|
-
message: `Upstream response is larger than ${maxBytes} bytes`,
|
|
300
|
-
});
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
if (!response.body) {
|
|
304
|
-
const text = await response.text();
|
|
305
|
-
if (new TextEncoder().encode(text).length > maxBytes) {
|
|
306
|
-
throw new ApiError({
|
|
307
|
-
status: 413,
|
|
308
|
-
code: "CONTENT_TOO_LARGE",
|
|
309
|
-
category: "upstream",
|
|
310
|
-
message: `Upstream response is larger than ${maxBytes} bytes`,
|
|
311
|
-
});
|
|
312
|
-
}
|
|
313
|
-
return text;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
const reader = response.body.getReader();
|
|
317
|
-
const decoder = new TextDecoder();
|
|
318
|
-
const chunks = [];
|
|
319
|
-
let receivedBytes = 0;
|
|
320
|
-
|
|
321
|
-
while (true) {
|
|
322
|
-
const { done, value } = await reader.read();
|
|
323
|
-
if (done) {
|
|
324
|
-
break;
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
const chunk = value instanceof Uint8Array ? value : new TextEncoder().encode(value);
|
|
328
|
-
receivedBytes += chunk.byteLength;
|
|
329
|
-
|
|
330
|
-
if (receivedBytes > maxBytes) {
|
|
331
|
-
await reader.cancel();
|
|
332
|
-
throw new ApiError({
|
|
333
|
-
status: 413,
|
|
334
|
-
code: "CONTENT_TOO_LARGE",
|
|
335
|
-
category: "upstream",
|
|
336
|
-
message: `Upstream response is larger than ${maxBytes} bytes`,
|
|
337
|
-
});
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
chunks.push(decoder.decode(chunk, { stream: true }));
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
chunks.push(decoder.decode());
|
|
344
|
-
return chunks.join("");
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
export function normalizeContentMaxBytes(params) {
|
|
348
|
-
return normalizePositiveInteger(params.max_bytes || params.maxBytes, 1_500_000, {
|
|
349
|
-
min: 50_000,
|
|
350
|
-
max: 5_000_000,
|
|
351
|
-
});
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
export async function fetchReadableContent(targetUrl, maxBytes) {
|
|
355
|
-
const normalizedTargetUrl = normalizeTargetUrl(targetUrl);
|
|
356
|
-
const profile = getRandomBrowserProfile("default");
|
|
357
|
-
const contentHeaders = buildNavigationHeaders(profile);
|
|
358
|
-
const upstreamResponse = await fetchWithSafeRedirects(normalizedTargetUrl, {
|
|
359
|
-
headers: contentHeaders,
|
|
360
|
-
});
|
|
361
|
-
const contentType = upstreamResponse.headers.get("content-type") || "";
|
|
362
|
-
|
|
363
|
-
if (!upstreamResponse.ok) {
|
|
364
|
-
throw new ApiError({
|
|
365
|
-
status: upstreamResponse.status >= 500 ? 502 : upstreamResponse.status,
|
|
366
|
-
code: "UPSTREAM_HTTP_ERROR",
|
|
367
|
-
category: "upstream",
|
|
368
|
-
message: `Upstream returned HTTP ${upstreamResponse.status}`,
|
|
369
|
-
details: {
|
|
370
|
-
upstream_status: upstreamResponse.status,
|
|
371
|
-
},
|
|
372
|
-
});
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
if (
|
|
376
|
-
contentType &&
|
|
377
|
-
!/text\/html|application\/xhtml\+xml|application\/xml|text\/xml/i.test(contentType)
|
|
378
|
-
) {
|
|
379
|
-
throw new ApiError({
|
|
380
|
-
status: 415,
|
|
381
|
-
code: "UNSUPPORTED_CONTENT_TYPE",
|
|
382
|
-
category: "upstream",
|
|
383
|
-
message: `Unsupported content type: ${contentType}`,
|
|
384
|
-
});
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
const html = await readResponseTextWithLimit(upstreamResponse, maxBytes);
|
|
388
|
-
|
|
389
|
-
const payload = await extractPageContent(
|
|
390
|
-
html,
|
|
391
|
-
upstreamResponse.url || normalizedTargetUrl
|
|
392
|
-
);
|
|
393
|
-
|
|
394
|
-
return {
|
|
395
|
-
...payload,
|
|
396
|
-
requested_url: normalizedTargetUrl,
|
|
397
|
-
content_type: contentType || null,
|
|
398
|
-
max_bytes: maxBytes,
|
|
399
|
-
};
|
|
400
|
-
}
|
package/src/content/fetch.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./fetch.impl.js";
|
package/src/core/crypto.js
DELETED
package/src/core/errors.impl.js
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
export class ApiError extends Error {
|
|
2
|
-
constructor({
|
|
3
|
-
status = 500,
|
|
4
|
-
code = "INTERNAL_ERROR",
|
|
5
|
-
message = "Unexpected error",
|
|
6
|
-
category = "internal",
|
|
7
|
-
details,
|
|
8
|
-
} = {}) {
|
|
9
|
-
super(message);
|
|
10
|
-
this.name = "ApiError";
|
|
11
|
-
this.status = status;
|
|
12
|
-
this.code = code;
|
|
13
|
-
this.category = category;
|
|
14
|
-
this.details = details;
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export function normalizeError(error, { engine } = {}) {
|
|
19
|
-
if (error instanceof ApiError) {
|
|
20
|
-
return error;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
if (error?.name === "AbortError") {
|
|
24
|
-
return new ApiError({
|
|
25
|
-
status: 504,
|
|
26
|
-
code: "UPSTREAM_TIMEOUT",
|
|
27
|
-
category: "upstream",
|
|
28
|
-
message: engine
|
|
29
|
-
? `${engine} request timed out`
|
|
30
|
-
: "Upstream request timed out",
|
|
31
|
-
details: engine ? { engine } : undefined,
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
return new ApiError({
|
|
36
|
-
status: 502,
|
|
37
|
-
code: "UPSTREAM_ERROR",
|
|
38
|
-
category: "upstream",
|
|
39
|
-
message: error?.message || "Upstream request failed",
|
|
40
|
-
details: engine ? { engine } : undefined,
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export function toErrorPayload(error) {
|
|
45
|
-
const normalized = normalizeError(error);
|
|
46
|
-
return {
|
|
47
|
-
error: normalized.category,
|
|
48
|
-
code: normalized.code,
|
|
49
|
-
message: normalized.message,
|
|
50
|
-
...(normalized.details ? { details: normalized.details } : {}),
|
|
51
|
-
};
|
|
52
|
-
}
|
package/src/core/errors.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./errors.impl.js";
|
package/src/core/html.impl.js
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { parse } from "node-html-parser";
|
|
2
|
-
|
|
3
|
-
export function parseHtml(html) {
|
|
4
|
-
return parse(html, {
|
|
5
|
-
comment: false,
|
|
6
|
-
lowerCaseTagName: false,
|
|
7
|
-
blockTextElements: {
|
|
8
|
-
script: true,
|
|
9
|
-
noscript: false,
|
|
10
|
-
style: false,
|
|
11
|
-
pre: true,
|
|
12
|
-
},
|
|
13
|
-
});
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export function cleanText(input) {
|
|
17
|
-
if (!input) {
|
|
18
|
-
return "";
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
const parsed = parse(`<span>${String(input)}</span>`);
|
|
22
|
-
return parsed.text
|
|
23
|
-
.replace(/\u00a0/g, " ")
|
|
24
|
-
.replace(/\s+/g, " ")
|
|
25
|
-
.trim();
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export function extractBalancedSegment(source, startIndex) {
|
|
29
|
-
const opening = source[startIndex];
|
|
30
|
-
const closing = opening === "{" ? "}" : "]";
|
|
31
|
-
|
|
32
|
-
let depth = 0;
|
|
33
|
-
let inString = false;
|
|
34
|
-
let escaped = false;
|
|
35
|
-
|
|
36
|
-
for (let index = startIndex; index < source.length; index += 1) {
|
|
37
|
-
const char = source[index];
|
|
38
|
-
|
|
39
|
-
if (escaped) {
|
|
40
|
-
escaped = false;
|
|
41
|
-
continue;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
if (char === "\\") {
|
|
45
|
-
escaped = true;
|
|
46
|
-
continue;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (char === '"') {
|
|
50
|
-
inString = !inString;
|
|
51
|
-
continue;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
if (inString) {
|
|
55
|
-
continue;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
if (char === opening) {
|
|
59
|
-
depth += 1;
|
|
60
|
-
} else if (char === closing) {
|
|
61
|
-
depth -= 1;
|
|
62
|
-
if (depth === 0) {
|
|
63
|
-
return source.slice(startIndex, index + 1);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
throw new Error("Unable to extract balanced JSON segment");
|
|
69
|
-
}
|
package/src/core/html.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./html.impl.js";
|
package/src/mcp/config.js
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import { env, setEnv } from "../../envs.js";
|
|
2
|
-
import { createHash } from "node:crypto";
|
|
3
|
-
|
|
4
|
-
function buildDefaultLocalClientId() {
|
|
5
|
-
const seed = [
|
|
6
|
-
process.env.SEARCH_MCP_SESSION_ID || "",
|
|
7
|
-
process.pid,
|
|
8
|
-
process.cwd(),
|
|
9
|
-
].join(":");
|
|
10
|
-
|
|
11
|
-
const digest = createHash("sha256").update(seed).digest("hex").slice(0, 12);
|
|
12
|
-
return `mcp-local:${digest}`;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
function normalizeMode(value) {
|
|
16
|
-
return "local";
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
function applyLocalEnvFromProcess() {
|
|
20
|
-
const overrides = {};
|
|
21
|
-
const keys = [
|
|
22
|
-
"SUPPORTED_ENGINES",
|
|
23
|
-
"DEFAULT_ENGINES",
|
|
24
|
-
"DEFAULT_ENGINES_ZH",
|
|
25
|
-
"DEFAULT_ENGINES_NON_ZH",
|
|
26
|
-
"DEFAULT_LANGUAGE",
|
|
27
|
-
"DEFAULT_TIMEOUT",
|
|
28
|
-
"FALLBACK_MIN_RESULTS",
|
|
29
|
-
"FALLBACK_MIN_CONTRIBUTING_ENGINES",
|
|
30
|
-
"SEARCH_PRIMARY_TIERS",
|
|
31
|
-
"SEARCH_SECONDARY_TIERS",
|
|
32
|
-
"SEARCH_EXPERIMENTAL_TIERS",
|
|
33
|
-
"EDGE_CACHE_TTL_SECONDS",
|
|
34
|
-
"CACHE_TTL_SECONDS",
|
|
35
|
-
"STALE_CACHE_TTL_SECONDS",
|
|
36
|
-
"UPSTREAM_RETRY_ATTEMPTS",
|
|
37
|
-
"UPSTREAM_RETRY_DELAY_MS",
|
|
38
|
-
"UPSTREAM_PRIMARY_RETRY_ATTEMPTS",
|
|
39
|
-
"UPSTREAM_SECONDARY_RETRY_ATTEMPTS",
|
|
40
|
-
"UPSTREAM_EXPERIMENTAL_RETRY_ATTEMPTS",
|
|
41
|
-
"UPSTREAM_MIN_REQUEST_INTERVAL_MS",
|
|
42
|
-
"UPSTREAM_PRIMARY_MIN_REQUEST_INTERVAL_MS",
|
|
43
|
-
"UPSTREAM_SECONDARY_MIN_REQUEST_INTERVAL_MS",
|
|
44
|
-
"UPSTREAM_EXPERIMENTAL_MIN_REQUEST_INTERVAL_MS",
|
|
45
|
-
"HEALTH_FAILURE_THRESHOLD",
|
|
46
|
-
"HEALTH_COOLDOWN_SECONDS",
|
|
47
|
-
];
|
|
48
|
-
|
|
49
|
-
for (const key of keys) {
|
|
50
|
-
if (process.env[key] !== undefined) {
|
|
51
|
-
overrides[key] = process.env[key];
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
overrides.SUPPORTED_ENGINES ||= env.SUPPORTED_ENGINES.join(",");
|
|
56
|
-
setEnv(overrides);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
export function loadMcpConfig() {
|
|
60
|
-
const mode = normalizeMode(process.env.SEARCH_MCP_MODE);
|
|
61
|
-
|
|
62
|
-
applyLocalEnvFromProcess();
|
|
63
|
-
|
|
64
|
-
return {
|
|
65
|
-
mode,
|
|
66
|
-
jinaApiKey: process.env.JINA_API_KEY || "",
|
|
67
|
-
jinaBaseUrl: process.env.JINA_BASE_URL || "https://r.jina.ai/",
|
|
68
|
-
upstreamClient: process.env.SEARCH_MCP_UPSTREAM_CLIENT || "auto",
|
|
69
|
-
localClientId:
|
|
70
|
-
process.env.SEARCH_MCP_CLIENT_ID ||
|
|
71
|
-
process.env.SEARCH_MCP_SESSION_ID ||
|
|
72
|
-
buildDefaultLocalClientId(),
|
|
73
|
-
allEngines: [...env.SUPPORTED_ENGINES],
|
|
74
|
-
};
|
|
75
|
-
}
|
package/src/mcp/format.js
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
export const MAX_CHARS_MIN = 500;
|
|
2
|
-
export const MAX_CHARS_MAX = 20000;
|
|
3
|
-
export const MAX_CHARS_DEFAULT = 4000;
|
|
4
|
-
|
|
5
|
-
export function normalizeMaxChars(value) {
|
|
6
|
-
return Number.isInteger(value)
|
|
7
|
-
? Math.min(Math.max(value, MAX_CHARS_MIN), MAX_CHARS_MAX)
|
|
8
|
-
: MAX_CHARS_DEFAULT;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function truncateText(text, maxChars) {
|
|
12
|
-
return text.length > maxChars
|
|
13
|
-
? text.slice(0, maxChars) + `\n[...${text.length - maxChars} chars truncated]`
|
|
14
|
-
: text;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export function formatSearchResponse(result) {
|
|
18
|
-
const lines = result.results.map((item, index) => {
|
|
19
|
-
const score = item.authority_score ?? 0;
|
|
20
|
-
const type = item.source_type || "unknown";
|
|
21
|
-
const tag = score > 0 || type !== "unknown" ? ` ${type}(${score})` : "";
|
|
22
|
-
const desc = String(item.description || "").slice(0, 120);
|
|
23
|
-
const descPart = desc ? ` | ${desc}` : "";
|
|
24
|
-
const sourcePart = item.source_name ? ` | source: ${item.source_name}` : "";
|
|
25
|
-
const publishedPart = item.published_text ? ` | published: ${item.published_text}` : "";
|
|
26
|
-
return `${index + 1}. ${item.title}${sourcePart}${publishedPart}${descPart} | ${item.url}${tag}`;
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
return lines.join("\n");
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function formatContentResponse(result, maxChars) {
|
|
33
|
-
const text = String(result.text || result.excerpt || "");
|
|
34
|
-
const header = [
|
|
35
|
-
result.title ? `# ${result.title}` : null,
|
|
36
|
-
result.url ? result.url : null,
|
|
37
|
-
result.extractor ? `(${result.extractor})` : null,
|
|
38
|
-
]
|
|
39
|
-
.filter(Boolean)
|
|
40
|
-
.join(" ");
|
|
41
|
-
|
|
42
|
-
const excerpt = truncateText(text, maxChars);
|
|
43
|
-
return header ? `${header}\n${excerpt}` : excerpt;
|
|
44
|
-
}
|
package/src/mcp/index.js
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { loadMcpConfig } from "./config.js";
|
|
2
|
-
import { createServer, startServer } from "./server.js";
|
|
3
|
-
|
|
4
|
-
export async function main() {
|
|
5
|
-
const config = loadMcpConfig();
|
|
6
|
-
const server = createServer(config);
|
|
7
|
-
await startServer(server);
|
|
8
|
-
console.error("Search MCP Server running on stdio");
|
|
9
|
-
console.error(`Mode: ${config.mode}`);
|
|
10
|
-
}
|
package/src/mcp/local/content.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import { extractPageContent } from "../../content/extract.js";
|
|
2
|
-
import {
|
|
3
|
-
fetchReadableContent,
|
|
4
|
-
normalizeContentMaxBytes,
|
|
5
|
-
normalizeTargetUrl,
|
|
6
|
-
} from "../../content/fetch.js";
|
|
7
|
-
|
|
8
|
-
export function requireUrl(args) {
|
|
9
|
-
const url = args.url;
|
|
10
|
-
if (!url || typeof url !== "string") {
|
|
11
|
-
throw new Error("url required");
|
|
12
|
-
}
|
|
13
|
-
return url;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export async function contentLocal(targetUrl, options = {}) {
|
|
17
|
-
const maxBytes = normalizeContentMaxBytes({
|
|
18
|
-
max_bytes: options.max_bytes,
|
|
19
|
-
});
|
|
20
|
-
return fetchReadableContent(targetUrl, maxBytes);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export async function extractHtmlLocal(targetUrl, html) {
|
|
24
|
-
const normalizedUrl = normalizeTargetUrl(targetUrl);
|
|
25
|
-
return extractPageContent(html, normalizedUrl);
|
|
26
|
-
}
|