@endday/search-mcp 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +4724 -0
- package/{mcp → dist}/search-mcp.js +1 -2
- package/package.json +14 -14
- package/data/blocklist.generated.js +0 -2
- package/envs.js +0 -129
- package/index.js +0 -6
- package/src/content/extract.impl.js +0 -228
- package/src/content/extract.js +0 -1
- package/src/content/fetch.impl.js +0 -400
- package/src/content/fetch.js +0 -1
- package/src/core/crypto.js +0 -7
- package/src/core/errors.impl.js +0 -52
- package/src/core/errors.js +0 -1
- package/src/core/html.impl.js +0 -69
- package/src/core/html.js +0 -1
- package/src/mcp/config.js +0 -75
- package/src/mcp/format.js +0 -44
- package/src/mcp/index.js +0 -10
- package/src/mcp/local/content.js +0 -26
- package/src/mcp/local/search.js +0 -233
- package/src/mcp/schemas.js +0 -132
- package/src/mcp/server.js +0 -97
- package/src/mcp/tools/content.js +0 -31
- package/src/mcp/tools/jinaContent.js +0 -38
- package/src/mcp/tools/newsSearch.js +0 -22
- package/src/mcp/tools/webSearch.js +0 -57
- package/src/platform/auth.impl.js +0 -166
- package/src/platform/auth.js +0 -1
- package/src/platform/cache.impl.js +0 -166
- package/src/platform/cache.js +0 -1
- package/src/platform/health.impl.js +0 -133
- package/src/platform/health.js +0 -1
- package/src/platform/http.impl.js +0 -108
- package/src/platform/http.js +0 -1
- package/src/platform/logger.impl.js +0 -51
- package/src/platform/logger.js +0 -1
- package/src/platform/metrics.impl.js +0 -43
- package/src/platform/metrics.js +0 -1
- package/src/platform/nodeHttpClient.js +0 -104
- package/src/platform/rateLimit.impl.js +0 -141
- package/src/platform/rateLimit.js +0 -1
- package/src/platform/requestContext.impl.js +0 -10
- package/src/platform/requestContext.js +0 -1
- package/src/platform/session.impl.js +0 -198
- package/src/platform/session.js +0 -1
- package/src/platform/stateKv.impl.js +0 -18
- package/src/platform/stateKv.js +0 -1
- package/src/platform/tasks.impl.js +0 -17
- package/src/platform/tasks.js +0 -1
- package/src/routes/requestParams.impl.js +0 -12
- package/src/routes/requestParams.js +0 -1
- package/src/search/engineRegistry.impl.js +0 -117
- package/src/search/engineRegistry.js +0 -1
- package/src/search/engineRequest.impl.js +0 -377
- package/src/search/engineRequest.js +0 -1
- package/src/search/engineUtils.impl.js +0 -227
- package/src/search/engineUtils.js +0 -1
- package/src/search/engines/baidu.impl.js +0 -145
- package/src/search/engines/baidu.js +0 -2
- package/src/search/engines/bing.impl.js +0 -509
- package/src/search/engines/bing.js +0 -2
- package/src/search/engines/brave.impl.js +0 -223
- package/src/search/engines/brave.js +0 -2
- package/src/search/engines/duckduckgo.impl.js +0 -164
- package/src/search/engines/duckduckgo.js +0 -2
- package/src/search/engines/mojeek.impl.js +0 -115
- package/src/search/engines/mojeek.js +0 -2
- package/src/search/engines/qwant.impl.js +0 -188
- package/src/search/engines/qwant.js +0 -2
- package/src/search/engines/startpage.impl.js +0 -237
- package/src/search/engines/startpage.js +0 -2
- package/src/search/engines/toutiao.impl.js +0 -265
- package/src/search/engines/toutiao.js +0 -2
- package/src/search/engines/yahoo.impl.js +0 -379
- package/src/search/engines/yahoo.js +0 -2
- package/src/search/gateway.impl.js +0 -423
- package/src/search/gateway.js +0 -1
- package/src/search/ranking.impl.js +0 -381
- package/src/search/ranking.js +0 -1
- package/src/search/requestPolicy.impl.js +0 -137
- package/src/search/requestPolicy.js +0 -1
- package/src/search/upstreamSession.impl.js +0 -148
- package/src/search/upstreamSession.js +0 -1
- /package/{index.d.ts → dist/index.d.ts} +0 -0
|
@@ -1,381 +0,0 @@
|
|
|
1
|
-
import { GENERATED_BLOCKLIST } from "../../data/blocklist.generated.js";
|
|
2
|
-
|
|
3
|
-
export const normalizeResults = (results) =>
|
|
4
|
-
results
|
|
5
|
-
.map((result) => {
|
|
6
|
-
const {
|
|
7
|
-
title,
|
|
8
|
-
name,
|
|
9
|
-
url,
|
|
10
|
-
link,
|
|
11
|
-
href,
|
|
12
|
-
description,
|
|
13
|
-
content,
|
|
14
|
-
snippet,
|
|
15
|
-
...rest
|
|
16
|
-
} = result || {};
|
|
17
|
-
|
|
18
|
-
return {
|
|
19
|
-
...rest,
|
|
20
|
-
title: String(title || name || "").trim(),
|
|
21
|
-
url: String(url || link || href || "").trim(),
|
|
22
|
-
description: String(description || content || snippet || "").trim(),
|
|
23
|
-
};
|
|
24
|
-
})
|
|
25
|
-
.filter((result) => result.url && result.title);
|
|
26
|
-
|
|
27
|
-
const TRACKING_QUERY_PARAMS = new Set([
|
|
28
|
-
"fbclid",
|
|
29
|
-
"gclid",
|
|
30
|
-
"msclkid",
|
|
31
|
-
"mc_cid",
|
|
32
|
-
"mc_eid",
|
|
33
|
-
"ref_src",
|
|
34
|
-
"srsltid",
|
|
35
|
-
]);
|
|
36
|
-
const BLOCKED_HOSTS = new Set(GENERATED_BLOCKLIST.domains || []);
|
|
37
|
-
const SOURCE_AUTHORITY_RULES = [
|
|
38
|
-
{
|
|
39
|
-
domains: ["cloudflare.com", "openai.com"],
|
|
40
|
-
source_type: "official",
|
|
41
|
-
authority_score: 85,
|
|
42
|
-
},
|
|
43
|
-
{
|
|
44
|
-
domains: ["developers.cloudflare.com", "platform.openai.com"],
|
|
45
|
-
source_type: "official",
|
|
46
|
-
authority_score: 90,
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
domains: ["deepseek.com"],
|
|
50
|
-
source_type: "official",
|
|
51
|
-
authority_score: 90,
|
|
52
|
-
ai_model_boost: 20,
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
domains: ["huggingface.co"],
|
|
56
|
-
source_type: "model_repo",
|
|
57
|
-
authority_score: 70,
|
|
58
|
-
ai_model_boost: 15,
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
domains: ["github.com"],
|
|
62
|
-
source_type: "code_repo",
|
|
63
|
-
authority_score: 55,
|
|
64
|
-
ai_model_boost: 10,
|
|
65
|
-
},
|
|
66
|
-
{
|
|
67
|
-
domains: ["arxiv.org", "openreview.net"],
|
|
68
|
-
source_type: "paper",
|
|
69
|
-
authority_score: 60,
|
|
70
|
-
ai_model_boost: 15,
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
domains: [
|
|
74
|
-
"artificialanalysis.ai",
|
|
75
|
-
"lmarena.ai",
|
|
76
|
-
"livebench.ai",
|
|
77
|
-
"paperswithcode.com",
|
|
78
|
-
"scale.com",
|
|
79
|
-
"swebench.com",
|
|
80
|
-
"vals.ai",
|
|
81
|
-
],
|
|
82
|
-
source_type: "benchmark",
|
|
83
|
-
authority_score: 55,
|
|
84
|
-
ai_model_boost: 15,
|
|
85
|
-
},
|
|
86
|
-
{
|
|
87
|
-
domains: ["semianalysis.com"],
|
|
88
|
-
source_type: "analysis",
|
|
89
|
-
authority_score: 50,
|
|
90
|
-
ai_model_boost: 10,
|
|
91
|
-
},
|
|
92
|
-
{
|
|
93
|
-
domains: [
|
|
94
|
-
"apnews.com",
|
|
95
|
-
"bloomberg.com",
|
|
96
|
-
"ft.com",
|
|
97
|
-
"reuters.com",
|
|
98
|
-
"theverge.com",
|
|
99
|
-
"wsj.com",
|
|
100
|
-
],
|
|
101
|
-
source_type: "media",
|
|
102
|
-
authority_score: 35,
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
domains: [
|
|
106
|
-
"caixin.com",
|
|
107
|
-
"eet-china.com",
|
|
108
|
-
"infoq.cn",
|
|
109
|
-
"news.cn",
|
|
110
|
-
"stcn.com",
|
|
111
|
-
"yicai.com",
|
|
112
|
-
],
|
|
113
|
-
source_type: "media",
|
|
114
|
-
authority_score: 28,
|
|
115
|
-
},
|
|
116
|
-
];
|
|
117
|
-
const AI_MODEL_QUERY_RE =
|
|
118
|
-
/ai|agent|benchmark|deepseek|gpt|llm|model|性能|模型|推理|评测|测评|基准|代码|上下文|开源/i;
|
|
119
|
-
const PDF_PATH_RE = /\.pdf(?:$|[?#])/i;
|
|
120
|
-
const HAN_SEGMENT_RE = /[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]{2,}/gu;
|
|
121
|
-
const MOSTLY_LATIN_QUERY_RE = /^[\p{Script=Latin}\p{Number}\s'".,!?():/_+-]+$/u;
|
|
122
|
-
|
|
123
|
-
function normalizeUrlPath(pathname) {
|
|
124
|
-
const normalized = pathname.replace(/\/+$/, "");
|
|
125
|
-
return normalized || "/";
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
function hostnameMatches(hostname, domain) {
|
|
129
|
-
return hostname === domain || hostname.endsWith(`.${domain}`);
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
function getMatchedAuthorityRule(hostname) {
|
|
133
|
-
return SOURCE_AUTHORITY_RULES.find((rule) =>
|
|
134
|
-
rule.domains.some((domain) => hostnameMatches(hostname, domain))
|
|
135
|
-
);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
function getSourceAuthority(rawUrl, query) {
|
|
139
|
-
try {
|
|
140
|
-
const url = new URL(rawUrl);
|
|
141
|
-
const hostname = url.hostname.toLowerCase();
|
|
142
|
-
const rule = getMatchedAuthorityRule(hostname);
|
|
143
|
-
const isAiModelQuery = AI_MODEL_QUERY_RE.test(String(query || ""));
|
|
144
|
-
const mobilePenalty = hostname.startsWith("m.") ? -5 : 0;
|
|
145
|
-
const pdfPenalty = PDF_PATH_RE.test(`${url.pathname}${url.search}`) ? -15 : 0;
|
|
146
|
-
|
|
147
|
-
if (!rule) {
|
|
148
|
-
return {
|
|
149
|
-
source_type: PDF_PATH_RE.test(`${url.pathname}${url.search}`)
|
|
150
|
-
? "document"
|
|
151
|
-
: "unknown",
|
|
152
|
-
authority_score: mobilePenalty + pdfPenalty,
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
return {
|
|
157
|
-
source_type: rule.source_type,
|
|
158
|
-
authority_score:
|
|
159
|
-
rule.authority_score +
|
|
160
|
-
mobilePenalty +
|
|
161
|
-
pdfPenalty +
|
|
162
|
-
(isAiModelQuery ? rule.ai_model_boost || 0 : 0),
|
|
163
|
-
};
|
|
164
|
-
} catch (_) {
|
|
165
|
-
return {
|
|
166
|
-
source_type: "unknown",
|
|
167
|
-
authority_score: 0,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
function isBlockedHostname(hostname) {
|
|
173
|
-
const parts = String(hostname || "").toLowerCase().split(".");
|
|
174
|
-
|
|
175
|
-
for (let index = 0; index <= parts.length - 2; index += 1) {
|
|
176
|
-
const candidate = parts.slice(index).join(".");
|
|
177
|
-
if (BLOCKED_HOSTS.has(candidate)) {
|
|
178
|
-
return true;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
return false;
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
function isBlockedUrl(rawUrl) {
|
|
186
|
-
try {
|
|
187
|
-
const url = new URL(rawUrl);
|
|
188
|
-
return isBlockedHostname(url.hostname);
|
|
189
|
-
} catch (_) {
|
|
190
|
-
return false;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
export function canonicalizeUrl(rawUrl) {
|
|
195
|
-
try {
|
|
196
|
-
const url = new URL(rawUrl);
|
|
197
|
-
url.hash = "";
|
|
198
|
-
url.hostname = url.hostname.toLowerCase();
|
|
199
|
-
url.pathname = normalizeUrlPath(url.pathname);
|
|
200
|
-
|
|
201
|
-
[...url.searchParams.keys()].forEach((key) => {
|
|
202
|
-
if (key.startsWith("utm_") || TRACKING_QUERY_PARAMS.has(key)) {
|
|
203
|
-
url.searchParams.delete(key);
|
|
204
|
-
}
|
|
205
|
-
});
|
|
206
|
-
|
|
207
|
-
return url.toString();
|
|
208
|
-
} catch (_) {
|
|
209
|
-
return String(rawUrl || "").trim();
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
function tokenizeQuery(query) {
|
|
214
|
-
const normalizedQuery = String(query || "");
|
|
215
|
-
const tokens = normalizedQuery
|
|
216
|
-
.toLowerCase()
|
|
217
|
-
.split(/\s+/)
|
|
218
|
-
.map((item) => item.trim())
|
|
219
|
-
.filter((item) => item.length >= 2);
|
|
220
|
-
|
|
221
|
-
const hanSegments = normalizedQuery.match(HAN_SEGMENT_RE) || [];
|
|
222
|
-
|
|
223
|
-
return [...new Set([...tokens, ...hanSegments])];
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
function normalizeComparableText(value) {
|
|
227
|
-
return String(value || "")
|
|
228
|
-
.toLowerCase()
|
|
229
|
-
.replace(/[^\p{Letter}\p{Number}\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]+/gu, "");
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
function isMostlyLatinQuery(query) {
|
|
233
|
-
const normalized = String(query || "").trim();
|
|
234
|
-
return MOSTLY_LATIN_QUERY_RE.test(normalized) && /[a-z]/i.test(normalized);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
function getHanPenalty(text) {
|
|
238
|
-
const matches = String(text || "").match(HAN_SEGMENT_RE) || [];
|
|
239
|
-
return matches.join("").length >= 6 ? -30 : 0;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
function calculateResultScore({
|
|
243
|
-
queryTokens,
|
|
244
|
-
queryPhrase,
|
|
245
|
-
query,
|
|
246
|
-
title,
|
|
247
|
-
description,
|
|
248
|
-
url,
|
|
249
|
-
engine,
|
|
250
|
-
sourceType,
|
|
251
|
-
enginePriority,
|
|
252
|
-
position,
|
|
253
|
-
}) {
|
|
254
|
-
const normalizedTitle = title.toLowerCase();
|
|
255
|
-
const normalizedDescription = description.toLowerCase();
|
|
256
|
-
const comparableTitle = normalizeComparableText(title);
|
|
257
|
-
const comparableDescription = normalizeComparableText(description);
|
|
258
|
-
const comparableUrl = normalizeComparableText(url);
|
|
259
|
-
const titleMatches = queryTokens.filter((token) =>
|
|
260
|
-
normalizedTitle.includes(token)
|
|
261
|
-
).length;
|
|
262
|
-
const descriptionMatches = queryTokens.filter((token) =>
|
|
263
|
-
normalizedDescription.includes(token)
|
|
264
|
-
).length;
|
|
265
|
-
const titlePhraseMatch =
|
|
266
|
-
queryPhrase.length >= 4 && comparableTitle.includes(queryPhrase);
|
|
267
|
-
const descriptionPhraseMatch =
|
|
268
|
-
queryPhrase.length >= 4 && comparableDescription.includes(queryPhrase);
|
|
269
|
-
const urlPhraseMatch =
|
|
270
|
-
queryPhrase.length >= 4 && comparableUrl.includes(queryPhrase);
|
|
271
|
-
const hasAnyTokenMatch = titleMatches > 0 || descriptionMatches > 0;
|
|
272
|
-
const officialPhraseBoost =
|
|
273
|
-
sourceType === "official" && (titlePhraseMatch || urlPhraseMatch) ? 20 : 0;
|
|
274
|
-
const englishEngineBoost = isMostlyLatinQuery(query)
|
|
275
|
-
? {
|
|
276
|
-
brave: 18,
|
|
277
|
-
bing: 15,
|
|
278
|
-
yahoo: 2,
|
|
279
|
-
mojeek: -4,
|
|
280
|
-
baidu: -20,
|
|
281
|
-
}[engine] || 0
|
|
282
|
-
: 0;
|
|
283
|
-
const latinQueryHanPenalty =
|
|
284
|
-
isMostlyLatinQuery(query) && sourceType !== "official"
|
|
285
|
-
? getHanPenalty(`${title} ${description}`)
|
|
286
|
-
: 0;
|
|
287
|
-
const hanQueryNoMatchPenalty =
|
|
288
|
-
!isMostlyLatinQuery(query) &&
|
|
289
|
-
queryTokens.length > 0 &&
|
|
290
|
-
!hasAnyTokenMatch &&
|
|
291
|
-
!titlePhraseMatch &&
|
|
292
|
-
!descriptionPhraseMatch &&
|
|
293
|
-
!urlPhraseMatch
|
|
294
|
-
? -60
|
|
295
|
-
: 0;
|
|
296
|
-
|
|
297
|
-
return (
|
|
298
|
-
enginePriority +
|
|
299
|
-
Math.max(0, 30 - position * 2) +
|
|
300
|
-
titleMatches * 6 +
|
|
301
|
-
descriptionMatches * 2 +
|
|
302
|
-
(titlePhraseMatch ? 35 : 0) +
|
|
303
|
-
(descriptionPhraseMatch ? 10 : 0) +
|
|
304
|
-
(urlPhraseMatch ? 20 : 0) +
|
|
305
|
-
officialPhraseBoost +
|
|
306
|
-
englishEngineBoost +
|
|
307
|
-
latinQueryHanPenalty +
|
|
308
|
-
hanQueryNoMatchPenalty
|
|
309
|
-
);
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
const LOW_QUALITY_ENGINE_THRESHOLD = 80;
|
|
313
|
-
|
|
314
|
-
export function dedupeAndRankResults({ engineResults, query, registry }) {
|
|
315
|
-
const queryTokens = tokenizeQuery(query);
|
|
316
|
-
const queryPhrase = normalizeComparableText(query);
|
|
317
|
-
const deduped = new Map();
|
|
318
|
-
// Track which engines contributed to each URL for cross-engine penalty
|
|
319
|
-
const urlEngineCount = new Map();
|
|
320
|
-
|
|
321
|
-
for (const { engine, results } of engineResults) {
|
|
322
|
-
const enginePriority = registry[engine]?.priority || 0;
|
|
323
|
-
|
|
324
|
-
normalizeResults(results).forEach((result, index) => {
|
|
325
|
-
const canonicalUrl = canonicalizeUrl(result.url);
|
|
326
|
-
if (isBlockedUrl(canonicalUrl)) {
|
|
327
|
-
return;
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const sourceAuthority = getSourceAuthority(canonicalUrl, query);
|
|
331
|
-
const candidate = {
|
|
332
|
-
...result,
|
|
333
|
-
url: canonicalUrl,
|
|
334
|
-
engine,
|
|
335
|
-
...sourceAuthority,
|
|
336
|
-
score:
|
|
337
|
-
calculateResultScore({
|
|
338
|
-
queryTokens,
|
|
339
|
-
queryPhrase,
|
|
340
|
-
query,
|
|
341
|
-
title: result.title,
|
|
342
|
-
description: result.description,
|
|
343
|
-
url: canonicalUrl,
|
|
344
|
-
engine,
|
|
345
|
-
sourceType: sourceAuthority.source_type,
|
|
346
|
-
enginePriority,
|
|
347
|
-
position: index,
|
|
348
|
-
}) + sourceAuthority.authority_score,
|
|
349
|
-
};
|
|
350
|
-
|
|
351
|
-
// Track engine contribution count for this URL
|
|
352
|
-
const prevCount = urlEngineCount.get(canonicalUrl) || 0;
|
|
353
|
-
urlEngineCount.set(canonicalUrl, prevCount + 1);
|
|
354
|
-
|
|
355
|
-
const existing = deduped.get(canonicalUrl);
|
|
356
|
-
if (!existing || candidate.score > existing.score) {
|
|
357
|
-
deduped.set(canonicalUrl, candidate);
|
|
358
|
-
} else if (!existing.description && candidate.description) {
|
|
359
|
-
existing.description = candidate.description;
|
|
360
|
-
}
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
// Penalize results that only appear in a single low-quality engine
|
|
365
|
-
// (likely irrelevant / low-quality results that no other engine confirmed)
|
|
366
|
-
const LOW_PRIORITY_SOLE_PENALTY = -40;
|
|
367
|
-
|
|
368
|
-
return [...deduped.values()]
|
|
369
|
-
.map((result) => {
|
|
370
|
-
const enginePriority = registry[result.engine]?.priority || 0;
|
|
371
|
-
const engineCount = urlEngineCount.get(result.url) || 1;
|
|
372
|
-
|
|
373
|
-
if (engineCount === 1 && enginePriority < LOW_QUALITY_ENGINE_THRESHOLD) {
|
|
374
|
-
return { ...result, score: result.score + LOW_PRIORITY_SOLE_PENALTY };
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
return result;
|
|
378
|
-
})
|
|
379
|
-
.sort((left, right) => right.score - left.score)
|
|
380
|
-
.map(({ score, ...result }) => result);
|
|
381
|
-
}
|
package/src/search/ranking.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./ranking.impl.js";
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import { env } from "../../envs.js";
|
|
2
|
-
|
|
3
|
-
const upstreamThrottleState = new Map();
|
|
4
|
-
|
|
5
|
-
function parseNonNegativeInt(value, fallback) {
|
|
6
|
-
const parsed = Number.parseInt(value ?? String(fallback), 10);
|
|
7
|
-
if (Number.isNaN(parsed) || parsed < 0) {
|
|
8
|
-
return fallback;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
return parsed;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export function resetUpstreamRequestPolicyState() {
|
|
15
|
-
upstreamThrottleState.clear();
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export function getEngineTier(adapter) {
|
|
19
|
-
return String(adapter?.tier || "secondary").toLowerCase();
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export function buildEnginePolicy(adapter = {}) {
|
|
23
|
-
const tier = getEngineTier(adapter);
|
|
24
|
-
const policy = adapter.requestPolicy || {};
|
|
25
|
-
const retryAttempts =
|
|
26
|
-
policy.retryAttempts ??
|
|
27
|
-
parseNonNegativeInt(
|
|
28
|
-
env[`UPSTREAM_${tier.toUpperCase()}_RETRY_ATTEMPTS`],
|
|
29
|
-
parseNonNegativeInt(env.UPSTREAM_RETRY_ATTEMPTS, 1)
|
|
30
|
-
);
|
|
31
|
-
const retryDelayMs =
|
|
32
|
-
policy.retryDelayMs ??
|
|
33
|
-
parseNonNegativeInt(env.UPSTREAM_RETRY_DELAY_MS, 200);
|
|
34
|
-
const minRequestIntervalMs =
|
|
35
|
-
policy.minRequestIntervalMs ??
|
|
36
|
-
parseNonNegativeInt(
|
|
37
|
-
env[`UPSTREAM_${tier.toUpperCase()}_MIN_REQUEST_INTERVAL_MS`],
|
|
38
|
-
parseNonNegativeInt(env.UPSTREAM_MIN_REQUEST_INTERVAL_MS, 150)
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
return {
|
|
42
|
-
tier,
|
|
43
|
-
retryAttempts,
|
|
44
|
-
retryDelayMs,
|
|
45
|
-
minRequestIntervalMs,
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
export function groupEnginesByTier(engineNames, registry) {
|
|
50
|
-
const groups = new Map();
|
|
51
|
-
|
|
52
|
-
for (const engineName of engineNames) {
|
|
53
|
-
const adapter = registry[engineName];
|
|
54
|
-
const tier = getEngineTier(adapter);
|
|
55
|
-
if (!groups.has(tier)) {
|
|
56
|
-
groups.set(tier, []);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
groups.get(tier).push(engineName);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
return groups;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export function getTierExecutionOrder(groups) {
|
|
66
|
-
const primaryTiers = new Set(
|
|
67
|
-
(env.SEARCH_PRIMARY_TIERS || []).map((item) => String(item).toLowerCase())
|
|
68
|
-
);
|
|
69
|
-
const secondaryTiers = new Set(
|
|
70
|
-
(env.SEARCH_SECONDARY_TIERS || []).map((item) => String(item).toLowerCase())
|
|
71
|
-
);
|
|
72
|
-
const experimentalTiers = new Set(
|
|
73
|
-
(env.SEARCH_EXPERIMENTAL_TIERS || []).map((item) => String(item).toLowerCase())
|
|
74
|
-
);
|
|
75
|
-
const known = [];
|
|
76
|
-
|
|
77
|
-
for (const tier of [...groups.keys()]) {
|
|
78
|
-
if (primaryTiers.has(tier) || secondaryTiers.has(tier) || experimentalTiers.has(tier)) {
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
known.push(tier);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
return [
|
|
86
|
-
...[...primaryTiers].filter((tier) => groups.has(tier)),
|
|
87
|
-
...[...secondaryTiers].filter((tier) => groups.has(tier)),
|
|
88
|
-
...[...experimentalTiers].filter((tier) => groups.has(tier)),
|
|
89
|
-
...known.sort(),
|
|
90
|
-
];
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
export async function enforceEngineThrottle(engineName, policy) {
|
|
94
|
-
const minInterval = parseNonNegativeInt(policy?.minRequestIntervalMs, 0);
|
|
95
|
-
if (minInterval <= 0) {
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
const now = Date.now();
|
|
100
|
-
const key = String(engineName);
|
|
101
|
-
const nextAllowedAt = upstreamThrottleState.get(key) || 0;
|
|
102
|
-
const delay = nextAllowedAt - now;
|
|
103
|
-
|
|
104
|
-
if (delay > 0) {
|
|
105
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
upstreamThrottleState.set(key, Date.now() + minInterval);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
export function shouldRetryUpstream(error, attempt, policy) {
|
|
112
|
-
if (attempt >= parseNonNegativeInt(policy?.retryAttempts, 0)) {
|
|
113
|
-
return false;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const status = error?.status || error?.details?.upstream_status || 0;
|
|
117
|
-
const code = String(error?.code || "");
|
|
118
|
-
|
|
119
|
-
if (code === "UPSTREAM_BLOCKED") {
|
|
120
|
-
return false;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (code === "ABORT_ERR" || code === "TIMEOUT") {
|
|
124
|
-
return true;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
return status === 408 || status === 429 || status >= 500 || status === 0;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
export async function sleepBeforeRetry(policy) {
|
|
131
|
-
const delay = parseNonNegativeInt(policy?.retryDelayMs, 0);
|
|
132
|
-
if (delay <= 0) {
|
|
133
|
-
return;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
137
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./requestPolicy.impl.js";
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import { env } from "../../envs.js";
|
|
2
|
-
import { sha256Hex } from "../core/crypto.js";
|
|
3
|
-
import { getStateKv, normalizeExpirationTtl } from "../platform/stateKv.js";
|
|
4
|
-
|
|
5
|
-
const UPSTREAM_SESSION_PREFIX = "upstream-session:v1";
|
|
6
|
-
const memoryState = new Map();
|
|
7
|
-
|
|
8
|
-
function nowSeconds() {
|
|
9
|
-
return Math.floor(Date.now() / 1000);
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
function buildMemoryKey(clientId, engine) {
|
|
13
|
-
return `${clientId}:${engine}`;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
async function buildKvKey(clientId, engine) {
|
|
17
|
-
return `${UPSTREAM_SESSION_PREFIX}:${await sha256Hex(`${clientId}:${engine}`)}`;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
function pickStableIndex(seed, size) {
|
|
21
|
-
if (!size || size < 1) {
|
|
22
|
-
return 0;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
let hash = 0;
|
|
26
|
-
const value = String(seed || "default");
|
|
27
|
-
for (let index = 0; index < value.length; index += 1) {
|
|
28
|
-
hash = (hash * 31 + value.charCodeAt(index)) >>> 0;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
return hash % size;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export function resetUpstreamSessionState() {
|
|
35
|
-
memoryState.clear();
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
async function loadFromKv(clientId, engine) {
|
|
39
|
-
const kv = getStateKv();
|
|
40
|
-
if (!kv) {
|
|
41
|
-
return null;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
try {
|
|
45
|
-
return await kv.get(await buildKvKey(clientId, engine), "json");
|
|
46
|
-
} catch (_) {
|
|
47
|
-
return null;
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
async function saveToKv(clientId, engine, value) {
|
|
52
|
-
const kv = getStateKv();
|
|
53
|
-
if (!kv) {
|
|
54
|
-
return false;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
try {
|
|
58
|
-
await kv.put(await buildKvKey(clientId, engine), JSON.stringify(value), {
|
|
59
|
-
expirationTtl: normalizeExpirationTtl(env.UPSTREAM_SESSION_TTL_SECONDS, 3600),
|
|
60
|
-
});
|
|
61
|
-
return true;
|
|
62
|
-
} catch (_) {
|
|
63
|
-
return false;
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function loadFromMemory(clientId, engine) {
|
|
68
|
-
const record = memoryState.get(buildMemoryKey(clientId, engine));
|
|
69
|
-
if (!record) {
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
if (record.expiresAt <= nowSeconds()) {
|
|
74
|
-
memoryState.delete(buildMemoryKey(clientId, engine));
|
|
75
|
-
return null;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
return record.value;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function saveToMemory(clientId, engine, value) {
|
|
82
|
-
memoryState.set(buildMemoryKey(clientId, engine), {
|
|
83
|
-
value,
|
|
84
|
-
expiresAt: nowSeconds() + normalizeExpirationTtl(env.UPSTREAM_SESSION_TTL_SECONDS, 3600),
|
|
85
|
-
});
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
export async function getUpstreamSession(clientId, engine, profiles = []) {
|
|
89
|
-
if (!clientId || !engine || profiles.length === 0) {
|
|
90
|
-
return null;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
const existing = (await loadFromKv(clientId, engine)) || loadFromMemory(clientId, engine);
|
|
94
|
-
if (existing?.profileId) {
|
|
95
|
-
return existing;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
const selectedIndex = pickStableIndex(`${clientId}:${engine}`, profiles.length);
|
|
99
|
-
const selectedProfile = profiles[selectedIndex];
|
|
100
|
-
const created = {
|
|
101
|
-
profileId: selectedProfile.id,
|
|
102
|
-
createdAt: nowSeconds(),
|
|
103
|
-
lastUsedAt: nowSeconds(),
|
|
104
|
-
cookies: {},
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
if (!(await saveToKv(clientId, engine, created))) {
|
|
108
|
-
saveToMemory(clientId, engine, created);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return created;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export async function updateUpstreamSession(clientId, engine, patch = {}) {
|
|
115
|
-
if (!clientId || !engine) {
|
|
116
|
-
return null;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const current =
|
|
120
|
-
(await loadFromKv(clientId, engine)) ||
|
|
121
|
-
loadFromMemory(clientId, engine) || {
|
|
122
|
-
profileId: patch.profileId || "",
|
|
123
|
-
createdAt: nowSeconds(),
|
|
124
|
-
cookies: {},
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
const next = {
|
|
128
|
-
...current,
|
|
129
|
-
...patch,
|
|
130
|
-
cookies: {
|
|
131
|
-
...(current.cookies || {}),
|
|
132
|
-
...(patch.cookies || {}),
|
|
133
|
-
},
|
|
134
|
-
lastUsedAt: nowSeconds(),
|
|
135
|
-
};
|
|
136
|
-
|
|
137
|
-
if (!(await saveToKv(clientId, engine, next))) {
|
|
138
|
-
saveToMemory(clientId, engine, next);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return next;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
export function createDeferredUpstreamSessionWriter(clientId, engine, patch = {}) {
|
|
145
|
-
return async function writeUpstreamSession() {
|
|
146
|
-
await updateUpstreamSession(clientId, engine, patch);
|
|
147
|
-
};
|
|
148
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./upstreamSession.impl.js";
|
|
File without changes
|