@endday/search-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +117 -0
- package/README.zh.md +116 -0
- package/data/blocklist.generated.js +2 -0
- package/envs.js +129 -0
- package/index.d.ts +191 -0
- package/index.js +6 -0
- package/mcp/search-mcp.js +8 -0
- package/package.json +71 -0
- package/src/content/extract.impl.js +228 -0
- package/src/content/extract.js +1 -0
- package/src/content/fetch.impl.js +400 -0
- package/src/content/fetch.js +1 -0
- package/src/core/crypto.js +7 -0
- package/src/core/errors.impl.js +52 -0
- package/src/core/errors.js +1 -0
- package/src/core/html.impl.js +69 -0
- package/src/core/html.js +1 -0
- package/src/mcp/config.js +75 -0
- package/src/mcp/format.js +44 -0
- package/src/mcp/index.js +10 -0
- package/src/mcp/local/content.js +26 -0
- package/src/mcp/local/search.js +233 -0
- package/src/mcp/schemas.js +132 -0
- package/src/mcp/server.js +97 -0
- package/src/mcp/tools/content.js +31 -0
- package/src/mcp/tools/jinaContent.js +38 -0
- package/src/mcp/tools/newsSearch.js +22 -0
- package/src/mcp/tools/webSearch.js +57 -0
- package/src/platform/auth.impl.js +166 -0
- package/src/platform/auth.js +1 -0
- package/src/platform/cache.impl.js +166 -0
- package/src/platform/cache.js +1 -0
- package/src/platform/health.impl.js +133 -0
- package/src/platform/health.js +1 -0
- package/src/platform/http.impl.js +108 -0
- package/src/platform/http.js +1 -0
- package/src/platform/logger.impl.js +51 -0
- package/src/platform/logger.js +1 -0
- package/src/platform/metrics.impl.js +43 -0
- package/src/platform/metrics.js +1 -0
- package/src/platform/nodeHttpClient.js +104 -0
- package/src/platform/rateLimit.impl.js +141 -0
- package/src/platform/rateLimit.js +1 -0
- package/src/platform/requestContext.impl.js +10 -0
- package/src/platform/requestContext.js +1 -0
- package/src/platform/session.impl.js +198 -0
- package/src/platform/session.js +1 -0
- package/src/platform/stateKv.impl.js +18 -0
- package/src/platform/stateKv.js +1 -0
- package/src/platform/tasks.impl.js +17 -0
- package/src/platform/tasks.js +1 -0
- package/src/routes/requestParams.impl.js +12 -0
- package/src/routes/requestParams.js +1 -0
- package/src/search/engineRegistry.impl.js +117 -0
- package/src/search/engineRegistry.js +1 -0
- package/src/search/engineRequest.impl.js +377 -0
- package/src/search/engineRequest.js +1 -0
- package/src/search/engineUtils.impl.js +227 -0
- package/src/search/engineUtils.js +1 -0
- package/src/search/engines/baidu.impl.js +145 -0
- package/src/search/engines/baidu.js +2 -0
- package/src/search/engines/bing.impl.js +509 -0
- package/src/search/engines/bing.js +2 -0
- package/src/search/engines/brave.impl.js +223 -0
- package/src/search/engines/brave.js +2 -0
- package/src/search/engines/duckduckgo.impl.js +164 -0
- package/src/search/engines/duckduckgo.js +2 -0
- package/src/search/engines/mojeek.impl.js +115 -0
- package/src/search/engines/mojeek.js +2 -0
- package/src/search/engines/qwant.impl.js +188 -0
- package/src/search/engines/qwant.js +2 -0
- package/src/search/engines/startpage.impl.js +237 -0
- package/src/search/engines/startpage.js +2 -0
- package/src/search/engines/toutiao.impl.js +265 -0
- package/src/search/engines/toutiao.js +2 -0
- package/src/search/engines/yahoo.impl.js +379 -0
- package/src/search/engines/yahoo.js +2 -0
- package/src/search/gateway.impl.js +423 -0
- package/src/search/gateway.js +1 -0
- package/src/search/ranking.impl.js +381 -0
- package/src/search/ranking.js +1 -0
- package/src/search/requestPolicy.impl.js +137 -0
- package/src/search/requestPolicy.js +1 -0
- package/src/search/upstreamSession.impl.js +148 -0
- package/src/search/upstreamSession.js +1 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
import { ApiError } from "../../core/errors.js";
|
|
2
|
+
import {
|
|
3
|
+
fetchSearchText,
|
|
4
|
+
isChallengeResponse,
|
|
5
|
+
throwBlockedUpstreamError,
|
|
6
|
+
} from "../engineRequest.js";
|
|
7
|
+
import {
|
|
8
|
+
ensureAbsoluteUrl,
|
|
9
|
+
mapLanguage,
|
|
10
|
+
mapTimeRange,
|
|
11
|
+
resolvePageNumber,
|
|
12
|
+
} from "../engineUtils.js";
|
|
13
|
+
import { cleanText, parseHtml } from "../../core/html.js";
|
|
14
|
+
import { normalizeResults } from "../ranking.js";
|
|
15
|
+
|
|
16
|
+
const YAHOO_TIME_RANGE = {
|
|
17
|
+
day: "d",
|
|
18
|
+
week: "w",
|
|
19
|
+
month: "m",
|
|
20
|
+
year: "y",
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const YAHOO_LANGUAGE = {
|
|
24
|
+
all: "any",
|
|
25
|
+
en: "en",
|
|
26
|
+
"en-us": "en",
|
|
27
|
+
"en-gb": "en",
|
|
28
|
+
zh: "zh_chs",
|
|
29
|
+
"zh-cn": "zh_chs",
|
|
30
|
+
"zh-tw": "zh_cht",
|
|
31
|
+
fr: "fr",
|
|
32
|
+
de: "de",
|
|
33
|
+
es: "es",
|
|
34
|
+
it: "it",
|
|
35
|
+
ja: "ja",
|
|
36
|
+
ko: "ko",
|
|
37
|
+
pt: "pt",
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const YAHOO_REGION_DOMAIN = {
|
|
41
|
+
AR: "ar.search.yahoo.com",
|
|
42
|
+
BR: "br.search.yahoo.com",
|
|
43
|
+
CA: "ca.search.yahoo.com",
|
|
44
|
+
CL: "cl.search.yahoo.com",
|
|
45
|
+
CO: "co.search.yahoo.com",
|
|
46
|
+
DE: "de.search.yahoo.com",
|
|
47
|
+
ES: "espanol.search.yahoo.com",
|
|
48
|
+
FR: "fr.search.yahoo.com",
|
|
49
|
+
GB: "uk.search.yahoo.com",
|
|
50
|
+
HK: "hk.search.yahoo.com",
|
|
51
|
+
IN: "in.search.yahoo.com",
|
|
52
|
+
MX: "mx.search.yahoo.com",
|
|
53
|
+
PE: "pe.search.yahoo.com",
|
|
54
|
+
PH: "ph.search.yahoo.com",
|
|
55
|
+
SG: "sg.search.yahoo.com",
|
|
56
|
+
TH: "th.search.yahoo.com",
|
|
57
|
+
TW: "tw.search.yahoo.com",
|
|
58
|
+
UK: "uk.search.yahoo.com",
|
|
59
|
+
VE: "ve.search.yahoo.com",
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const YAHOO_LANGUAGE_DOMAIN = {
|
|
63
|
+
any: "search.yahoo.com",
|
|
64
|
+
en: "search.yahoo.com",
|
|
65
|
+
zh_chs: "hk.search.yahoo.com",
|
|
66
|
+
zh_cht: "tw.search.yahoo.com",
|
|
67
|
+
bg: "search.yahoo.com",
|
|
68
|
+
cs: "search.yahoo.com",
|
|
69
|
+
da: "search.yahoo.com",
|
|
70
|
+
el: "search.yahoo.com",
|
|
71
|
+
et: "search.yahoo.com",
|
|
72
|
+
he: "search.yahoo.com",
|
|
73
|
+
hr: "search.yahoo.com",
|
|
74
|
+
ja: "search.yahoo.com",
|
|
75
|
+
ko: "search.yahoo.com",
|
|
76
|
+
sk: "search.yahoo.com",
|
|
77
|
+
sl: "search.yahoo.com",
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
const YAHOO_CHALLENGE_PATTERNS = [
|
|
81
|
+
/name=["']captcha["']/i,
|
|
82
|
+
/id=["'][^"']*captcha[^"']*["']/i,
|
|
83
|
+
];
|
|
84
|
+
|
|
85
|
+
function isYahooChallengeResponse(source) {
|
|
86
|
+
const text = String(source || "");
|
|
87
|
+
|
|
88
|
+
return (
|
|
89
|
+
isChallengeResponse(text, YAHOO_CHALLENGE_PATTERNS) ||
|
|
90
|
+
((/verify you are human/i.test(text) || /unusual traffic/i.test(text)) &&
|
|
91
|
+
/<form\b/i.test(text))
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function throwYahooChallengeError() {
|
|
96
|
+
throwBlockedUpstreamError({
|
|
97
|
+
engine: "Yahoo",
|
|
98
|
+
surface: "html",
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function getYahooLanguageParts(language) {
|
|
103
|
+
const normalized = String(language || "").trim().toLowerCase();
|
|
104
|
+
const [lang = "en", region = ""] = normalized.split("-");
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
lang,
|
|
108
|
+
region: region.toUpperCase(),
|
|
109
|
+
yahooLanguage: mapLanguage(normalized, YAHOO_LANGUAGE, "any"),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function resolveYahooDomain(language) {
|
|
114
|
+
const { lang, region, yahooLanguage } = getYahooLanguageParts(language);
|
|
115
|
+
|
|
116
|
+
return (
|
|
117
|
+
YAHOO_REGION_DOMAIN[region] ||
|
|
118
|
+
YAHOO_LANGUAGE_DOMAIN[yahooLanguage] ||
|
|
119
|
+
`${lang}.search.yahoo.com`
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function resolveYahooNewsDomain(language) {
|
|
124
|
+
return resolveYahooDomain(language);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function buildYahooCookie(language) {
|
|
128
|
+
const { yahooLanguage } = getYahooLanguageParts(language);
|
|
129
|
+
|
|
130
|
+
return [
|
|
131
|
+
"v=1",
|
|
132
|
+
"vm=p",
|
|
133
|
+
"fl=1",
|
|
134
|
+
`vl=lang_${yahooLanguage}`,
|
|
135
|
+
"pn=10",
|
|
136
|
+
"rw=new",
|
|
137
|
+
"userset=1",
|
|
138
|
+
].join("&");
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function extractYahooRedirectUrl(rawUrl) {
|
|
142
|
+
const value = String(rawUrl || "").trim();
|
|
143
|
+
const redirectMarker = "/RU=";
|
|
144
|
+
const markerIndex = value.indexOf(redirectMarker);
|
|
145
|
+
|
|
146
|
+
if (markerIndex === -1) {
|
|
147
|
+
return value;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const start = value.indexOf("http", markerIndex + redirectMarker.length);
|
|
151
|
+
if (start === -1) {
|
|
152
|
+
return value;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const endMarkers = ["/RS", "/RK"]
|
|
156
|
+
.map((marker) => value.indexOf(marker, start))
|
|
157
|
+
.filter((index) => index > start);
|
|
158
|
+
const end = endMarkers.length > 0 ? Math.min(...endMarkers) : value.length;
|
|
159
|
+
|
|
160
|
+
try {
|
|
161
|
+
return decodeURIComponent(value.slice(start, end));
|
|
162
|
+
} catch (_) {
|
|
163
|
+
return value.slice(start, end);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function extractYahooTitle(node, linkNode) {
|
|
168
|
+
const ariaLabel = linkNode.getAttribute("aria-label");
|
|
169
|
+
if (ariaLabel) {
|
|
170
|
+
return cleanText(ariaLabel);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const titleNode =
|
|
174
|
+
node.querySelector(".compTitle h3 a") ||
|
|
175
|
+
node.querySelector(".compTitle a h3") ||
|
|
176
|
+
node.querySelector("h3");
|
|
177
|
+
|
|
178
|
+
return cleanText(titleNode?.innerHTML || titleNode?.text || linkNode.text || "");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function parseYahooResults(html) {
|
|
182
|
+
if (isYahooChallengeResponse(html)) {
|
|
183
|
+
throwYahooChallengeError();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const root = parseHtml(html);
|
|
187
|
+
const resultNodes = root.querySelectorAll("div.algo-sr");
|
|
188
|
+
const results = [];
|
|
189
|
+
|
|
190
|
+
for (const node of resultNodes) {
|
|
191
|
+
const linkNode =
|
|
192
|
+
node.querySelector(".compTitle h3 a[href]") ||
|
|
193
|
+
node.querySelector(".compTitle a[href]");
|
|
194
|
+
|
|
195
|
+
if (!linkNode) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const rawUrl = ensureAbsoluteUrl(
|
|
200
|
+
linkNode.getAttribute("href"),
|
|
201
|
+
"https://search.yahoo.com"
|
|
202
|
+
);
|
|
203
|
+
const descriptionNode = node.querySelector(".compText");
|
|
204
|
+
|
|
205
|
+
results.push({
|
|
206
|
+
title: extractYahooTitle(node, linkNode),
|
|
207
|
+
url: extractYahooRedirectUrl(rawUrl),
|
|
208
|
+
description: cleanText(
|
|
209
|
+
descriptionNode?.innerHTML || descriptionNode?.text || ""
|
|
210
|
+
),
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const normalized = normalizeResults(results);
|
|
215
|
+
if (normalized.length === 0) {
|
|
216
|
+
throw new ApiError({
|
|
217
|
+
status: 502,
|
|
218
|
+
code: "UPSTREAM_PARSE_ERROR",
|
|
219
|
+
category: "upstream",
|
|
220
|
+
message: "Yahoo parser could not find organic results",
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return normalized;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
export function parseYahooNewsResults(html) {
|
|
228
|
+
if (isYahooChallengeResponse(html)) {
|
|
229
|
+
throwYahooChallengeError();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const root = parseHtml(html);
|
|
233
|
+
const resultNodes = root.querySelectorAll(".NewsArticle");
|
|
234
|
+
const results = [];
|
|
235
|
+
|
|
236
|
+
for (const node of resultNodes) {
|
|
237
|
+
const linkNode =
|
|
238
|
+
node.querySelector(".s-title a[href]") ||
|
|
239
|
+
node.querySelector(".compArticleList a[href]");
|
|
240
|
+
|
|
241
|
+
if (!linkNode) {
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const rawUrl = ensureAbsoluteUrl(
|
|
246
|
+
linkNode.getAttribute("href"),
|
|
247
|
+
"https://news.search.yahoo.com"
|
|
248
|
+
);
|
|
249
|
+
|
|
250
|
+
results.push({
|
|
251
|
+
title: cleanText(linkNode.innerHTML || linkNode.text || ""),
|
|
252
|
+
url: extractYahooRedirectUrl(rawUrl),
|
|
253
|
+
description: cleanText(
|
|
254
|
+
node.querySelector(".s-desc")?.innerHTML ||
|
|
255
|
+
node.querySelector(".s-desc")?.text ||
|
|
256
|
+
""
|
|
257
|
+
),
|
|
258
|
+
source_name: cleanText(
|
|
259
|
+
node.querySelector(".s-source")?.innerHTML ||
|
|
260
|
+
node.querySelector(".s-source")?.text ||
|
|
261
|
+
""
|
|
262
|
+
),
|
|
263
|
+
published_text: cleanText(
|
|
264
|
+
node.querySelector(".s-time")?.innerHTML ||
|
|
265
|
+
node.querySelector(".s-time")?.text ||
|
|
266
|
+
""
|
|
267
|
+
).replace(/\s*[·•]\s*$/, ""),
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const normalized = normalizeResults(results);
|
|
272
|
+
if (normalized.length === 0) {
|
|
273
|
+
throw new ApiError({
|
|
274
|
+
status: 502,
|
|
275
|
+
code: "UPSTREAM_PARSE_ERROR",
|
|
276
|
+
category: "upstream",
|
|
277
|
+
message: "Yahoo News parser could not find organic results",
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return normalized;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async function searchYahoo(params) {
|
|
285
|
+
const {
|
|
286
|
+
vertical = "web",
|
|
287
|
+
query,
|
|
288
|
+
language,
|
|
289
|
+
time_range,
|
|
290
|
+
pageno,
|
|
291
|
+
signal,
|
|
292
|
+
runtimeContext,
|
|
293
|
+
} = params;
|
|
294
|
+
const page = resolvePageNumber(pageno);
|
|
295
|
+
|
|
296
|
+
if (vertical === "news") {
|
|
297
|
+
const domain = resolveYahooNewsDomain(language);
|
|
298
|
+
const searchUrl = new URL(`https://${domain}/search`);
|
|
299
|
+
searchUrl.searchParams.set("p", query);
|
|
300
|
+
searchUrl.searchParams.set("fr", "news");
|
|
301
|
+
|
|
302
|
+
const html = await fetchSearchText(searchUrl.toString(), {
|
|
303
|
+
engine: "yahoo",
|
|
304
|
+
engineLabel: "Yahoo",
|
|
305
|
+
signal,
|
|
306
|
+
language,
|
|
307
|
+
cookies: {
|
|
308
|
+
sB: buildYahooCookie(language),
|
|
309
|
+
},
|
|
310
|
+
referrer: `https://${domain}/`,
|
|
311
|
+
runtimeContext,
|
|
312
|
+
blockedStatuses: [403, 429],
|
|
313
|
+
isBlocked: isYahooChallengeResponse,
|
|
314
|
+
blockedSurface: "html",
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
return parseYahooNewsResults(html);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const domain = resolveYahooDomain(language);
|
|
321
|
+
const searchUrl = new URL(`https://${domain}/search`);
|
|
322
|
+
searchUrl.searchParams.set("p", query);
|
|
323
|
+
|
|
324
|
+
const timeFilter = mapTimeRange(time_range, YAHOO_TIME_RANGE);
|
|
325
|
+
if (timeFilter) {
|
|
326
|
+
searchUrl.searchParams.set("btf", timeFilter);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
if (page === 0) {
|
|
330
|
+
searchUrl.searchParams.set("iscqry", "");
|
|
331
|
+
} else {
|
|
332
|
+
searchUrl.searchParams.set("b", String((page + 1) * 7 + 1));
|
|
333
|
+
searchUrl.searchParams.set("pz", "7");
|
|
334
|
+
searchUrl.searchParams.set("bct", "0");
|
|
335
|
+
searchUrl.searchParams.set("xargs", "0");
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const html = await fetchSearchText(searchUrl.toString(), {
|
|
339
|
+
engine: "yahoo",
|
|
340
|
+
engineLabel: "Yahoo",
|
|
341
|
+
signal,
|
|
342
|
+
language,
|
|
343
|
+
cookies: {
|
|
344
|
+
sB: buildYahooCookie(language),
|
|
345
|
+
},
|
|
346
|
+
referrer: `https://${domain}/`,
|
|
347
|
+
runtimeContext,
|
|
348
|
+
blockedStatuses: [403, 429],
|
|
349
|
+
isBlocked: isYahooChallengeResponse,
|
|
350
|
+
blockedSurface: "html",
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
return parseYahooResults(html);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
export const yahooAdapter = {
|
|
357
|
+
name: "yahoo",
|
|
358
|
+
label: "Yahoo",
|
|
359
|
+
priority: 55,
|
|
360
|
+
tier: "secondary",
|
|
361
|
+
requestPolicy: {
|
|
362
|
+
retryAttempts: 1,
|
|
363
|
+
minRequestIntervalMs: 200,
|
|
364
|
+
},
|
|
365
|
+
supports: {
|
|
366
|
+
verticals: ["web", "news"],
|
|
367
|
+
language: true,
|
|
368
|
+
time_range: true,
|
|
369
|
+
pageno: true,
|
|
370
|
+
news: {
|
|
371
|
+
time_range: false,
|
|
372
|
+
pageno: false,
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
isAvailable: () => true,
|
|
376
|
+
search: searchYahoo,
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
export default searchYahoo;
|