@endday/search-mcp 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +4724 -0
- package/dist/search-mcp.js +4715 -0
- package/package.json +14 -14
- package/data/blocklist.generated.js +0 -2
- package/envs.js +0 -129
- package/index.js +0 -6
- package/mcp/search-mcp.js +0 -8
- package/src/content/extract.impl.js +0 -228
- package/src/content/extract.js +0 -1
- package/src/content/fetch.impl.js +0 -400
- package/src/content/fetch.js +0 -1
- package/src/core/crypto.js +0 -7
- package/src/core/errors.impl.js +0 -52
- package/src/core/errors.js +0 -1
- package/src/core/html.impl.js +0 -69
- package/src/core/html.js +0 -1
- package/src/mcp/config.js +0 -75
- package/src/mcp/format.js +0 -44
- package/src/mcp/index.js +0 -10
- package/src/mcp/local/content.js +0 -26
- package/src/mcp/local/search.js +0 -233
- package/src/mcp/schemas.js +0 -132
- package/src/mcp/server.js +0 -97
- package/src/mcp/tools/content.js +0 -31
- package/src/mcp/tools/jinaContent.js +0 -38
- package/src/mcp/tools/newsSearch.js +0 -22
- package/src/mcp/tools/webSearch.js +0 -57
- package/src/platform/auth.impl.js +0 -166
- package/src/platform/auth.js +0 -1
- package/src/platform/cache.impl.js +0 -166
- package/src/platform/cache.js +0 -1
- package/src/platform/health.impl.js +0 -133
- package/src/platform/health.js +0 -1
- package/src/platform/http.impl.js +0 -108
- package/src/platform/http.js +0 -1
- package/src/platform/logger.impl.js +0 -51
- package/src/platform/logger.js +0 -1
- package/src/platform/metrics.impl.js +0 -43
- package/src/platform/metrics.js +0 -1
- package/src/platform/nodeHttpClient.js +0 -104
- package/src/platform/rateLimit.impl.js +0 -141
- package/src/platform/rateLimit.js +0 -1
- package/src/platform/requestContext.impl.js +0 -10
- package/src/platform/requestContext.js +0 -1
- package/src/platform/session.impl.js +0 -198
- package/src/platform/session.js +0 -1
- package/src/platform/stateKv.impl.js +0 -18
- package/src/platform/stateKv.js +0 -1
- package/src/platform/tasks.impl.js +0 -17
- package/src/platform/tasks.js +0 -1
- package/src/routes/requestParams.impl.js +0 -12
- package/src/routes/requestParams.js +0 -1
- package/src/search/engineRegistry.impl.js +0 -117
- package/src/search/engineRegistry.js +0 -1
- package/src/search/engineRequest.impl.js +0 -377
- package/src/search/engineRequest.js +0 -1
- package/src/search/engineUtils.impl.js +0 -227
- package/src/search/engineUtils.js +0 -1
- package/src/search/engines/baidu.impl.js +0 -145
- package/src/search/engines/baidu.js +0 -2
- package/src/search/engines/bing.impl.js +0 -509
- package/src/search/engines/bing.js +0 -2
- package/src/search/engines/brave.impl.js +0 -223
- package/src/search/engines/brave.js +0 -2
- package/src/search/engines/duckduckgo.impl.js +0 -164
- package/src/search/engines/duckduckgo.js +0 -2
- package/src/search/engines/mojeek.impl.js +0 -115
- package/src/search/engines/mojeek.js +0 -2
- package/src/search/engines/qwant.impl.js +0 -188
- package/src/search/engines/qwant.js +0 -2
- package/src/search/engines/startpage.impl.js +0 -237
- package/src/search/engines/startpage.js +0 -2
- package/src/search/engines/toutiao.impl.js +0 -265
- package/src/search/engines/toutiao.js +0 -2
- package/src/search/engines/yahoo.impl.js +0 -379
- package/src/search/engines/yahoo.js +0 -2
- package/src/search/gateway.impl.js +0 -423
- package/src/search/gateway.js +0 -1
- package/src/search/ranking.impl.js +0 -381
- package/src/search/ranking.js +0 -1
- package/src/search/requestPolicy.impl.js +0 -137
- package/src/search/requestPolicy.js +0 -1
- package/src/search/upstreamSession.impl.js +0 -148
- package/src/search/upstreamSession.js +0 -1
- /package/{index.d.ts → dist/index.d.ts} +0 -0
|
@@ -1,423 +0,0 @@
|
|
|
1
|
-
import { env } from "../../envs.js";
|
|
2
|
-
import { getCachedSearchResponse } from "../platform/cache.js";
|
|
3
|
-
import { ApiError, normalizeError } from "../core/errors.js";
|
|
4
|
-
import { getEngineRegistry, resolveEngineSelection } from "./engineRegistry.js";
|
|
5
|
-
import {
|
|
6
|
-
createDeferredEngineFailureRecorder,
|
|
7
|
-
createDeferredEngineSuccessRecorder,
|
|
8
|
-
recordEngineFailure,
|
|
9
|
-
recordEngineSuccess,
|
|
10
|
-
} from "../platform/health.js";
|
|
11
|
-
import { dedupeAndRankResults, canonicalizeUrl } from "./ranking.js";
|
|
12
|
-
import {
|
|
13
|
-
buildEnginePolicy,
|
|
14
|
-
getTierExecutionOrder,
|
|
15
|
-
groupEnginesByTier,
|
|
16
|
-
} from "./requestPolicy.js";
|
|
17
|
-
import { createDeferredCachedSearchResponseWriter } from "../platform/cache.js";
|
|
18
|
-
import { runDeferredTask } from "../platform/tasks.js";
|
|
19
|
-
import { logWarn } from "../platform/logger.js";
|
|
20
|
-
import { recordMetric, recordTiming } from "../platform/metrics.js";
|
|
21
|
-
|
|
22
|
-
function parseNonNegativeInt(value, fallback) {
|
|
23
|
-
const parsed = Number.parseInt(value ?? String(fallback), 10);
|
|
24
|
-
if (Number.isNaN(parsed) || parsed < 0) {
|
|
25
|
-
return fallback;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
return parsed;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
function parsePositiveInt(value, fallback) {
|
|
32
|
-
const parsed = Number.parseInt(value ?? String(fallback), 10);
|
|
33
|
-
if (Number.isNaN(parsed) || parsed < 1) {
|
|
34
|
-
return fallback;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
return parsed;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function filterEnginesByCapabilities(
|
|
41
|
-
engineNames,
|
|
42
|
-
registry,
|
|
43
|
-
{ vertical = "web", time_range, pageno }
|
|
44
|
-
) {
|
|
45
|
-
const page = parseNonNegativeInt(pageno, 0);
|
|
46
|
-
const enabledEngines = [];
|
|
47
|
-
const skippedEngines = [];
|
|
48
|
-
|
|
49
|
-
for (const engineName of engineNames) {
|
|
50
|
-
const adapter = registry[engineName];
|
|
51
|
-
const baseSupports = adapter?.supports || {};
|
|
52
|
-
const supports =
|
|
53
|
-
typeof baseSupports[vertical] === "object"
|
|
54
|
-
? { ...baseSupports, ...baseSupports[vertical] }
|
|
55
|
-
: baseSupports;
|
|
56
|
-
|
|
57
|
-
if (time_range && supports.time_range === false) {
|
|
58
|
-
skippedEngines.push({
|
|
59
|
-
engine: engineName,
|
|
60
|
-
reason: "unsupported_time_range",
|
|
61
|
-
});
|
|
62
|
-
continue;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
if (page > 0 && supports.pageno === false) {
|
|
66
|
-
skippedEngines.push({
|
|
67
|
-
engine: engineName,
|
|
68
|
-
reason: "unsupported_pageno",
|
|
69
|
-
});
|
|
70
|
-
continue;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
enabledEngines.push(engineName);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
return {
|
|
77
|
-
enabledEngines,
|
|
78
|
-
skippedEngines,
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function startEngineSearch(adapter, params) {
|
|
83
|
-
const timeoutMs = Number.parseInt(env.DEFAULT_TIMEOUT || "4000", 10);
|
|
84
|
-
const controller = new AbortController();
|
|
85
|
-
const startedAt = Date.now();
|
|
86
|
-
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
87
|
-
const policy = buildEnginePolicy(adapter);
|
|
88
|
-
|
|
89
|
-
const promise = adapter
|
|
90
|
-
.search({
|
|
91
|
-
...params,
|
|
92
|
-
signal: controller.signal,
|
|
93
|
-
requestPolicy: policy,
|
|
94
|
-
})
|
|
95
|
-
.then((results) => ({
|
|
96
|
-
engine: adapter.name,
|
|
97
|
-
results,
|
|
98
|
-
duration_ms: Date.now() - startedAt,
|
|
99
|
-
tier: policy.tier,
|
|
100
|
-
}))
|
|
101
|
-
.catch((error) => ({
|
|
102
|
-
engine: adapter.name,
|
|
103
|
-
error: normalizeError(error, { engine: adapter.name }),
|
|
104
|
-
duration_ms: Date.now() - startedAt,
|
|
105
|
-
tier: policy.tier,
|
|
106
|
-
}))
|
|
107
|
-
.finally(() => clearTimeout(timeoutId));
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
engine: adapter.name,
|
|
111
|
-
promise,
|
|
112
|
-
abort: () => controller.abort(),
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function buildSearchResponse({
|
|
117
|
-
vertical = "web",
|
|
118
|
-
query,
|
|
119
|
-
enabledEngines,
|
|
120
|
-
skippedEngines,
|
|
121
|
-
unresponsiveEngines,
|
|
122
|
-
results,
|
|
123
|
-
}) {
|
|
124
|
-
return {
|
|
125
|
-
vertical,
|
|
126
|
-
query,
|
|
127
|
-
number_of_results: results.length,
|
|
128
|
-
enabled_engines: enabledEngines,
|
|
129
|
-
skipped_engines: skippedEngines,
|
|
130
|
-
unresponsive_engines: [...new Set(unresponsiveEngines)],
|
|
131
|
-
results,
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
function buildSearchMeta({
|
|
136
|
-
cacheStatus,
|
|
137
|
-
cacheLayer = "none",
|
|
138
|
-
fallbackOrder,
|
|
139
|
-
fallbackPath,
|
|
140
|
-
engineTimings,
|
|
141
|
-
strategy = "tiered",
|
|
142
|
-
}) {
|
|
143
|
-
return {
|
|
144
|
-
cache_status: cacheStatus,
|
|
145
|
-
cache_layer: cacheLayer,
|
|
146
|
-
fallback_order: fallbackOrder,
|
|
147
|
-
fallback_path: fallbackPath,
|
|
148
|
-
engine_timings: engineTimings,
|
|
149
|
-
strategy,
|
|
150
|
-
};
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
function abortActiveSearches(activeSearches) {
|
|
154
|
-
for (const task of activeSearches.values()) {
|
|
155
|
-
task.abort();
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
async function runParallelSearch({
|
|
160
|
-
registry,
|
|
161
|
-
engines,
|
|
162
|
-
vertical,
|
|
163
|
-
query,
|
|
164
|
-
language,
|
|
165
|
-
time_range,
|
|
166
|
-
pageno,
|
|
167
|
-
clientId,
|
|
168
|
-
runtimeContext,
|
|
169
|
-
}) {
|
|
170
|
-
const minResults = Math.max(
|
|
171
|
-
1,
|
|
172
|
-
Number.parseInt(env.FALLBACK_MIN_RESULTS || "6", 10)
|
|
173
|
-
);
|
|
174
|
-
const minContributingEngines = Math.min(
|
|
175
|
-
engines.length,
|
|
176
|
-
parsePositiveInt(env.FALLBACK_MIN_CONTRIBUTING_ENGINES, 2)
|
|
177
|
-
);
|
|
178
|
-
const engineResults = [];
|
|
179
|
-
const unresponsiveEngines = [];
|
|
180
|
-
const engineTimings = [];
|
|
181
|
-
const fallbackPath = [];
|
|
182
|
-
const canonicalUrls = new Set();
|
|
183
|
-
const tierGroups = groupEnginesByTier(engines, registry);
|
|
184
|
-
const tierOrder = getTierExecutionOrder(tierGroups);
|
|
185
|
-
|
|
186
|
-
const hasEnoughResults = () =>
|
|
187
|
-
canonicalUrls.size >= minResults &&
|
|
188
|
-
engineResults.length >= minContributingEngines;
|
|
189
|
-
|
|
190
|
-
for (const tier of tierOrder) {
|
|
191
|
-
if (hasEnoughResults()) {
|
|
192
|
-
break;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
const tierEngines = tierGroups.get(tier) || [];
|
|
196
|
-
if (tierEngines.length === 0) {
|
|
197
|
-
continue;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
fallbackPath.push(...tierEngines);
|
|
201
|
-
const activeSearches = new Map();
|
|
202
|
-
for (const engineName of tierEngines) {
|
|
203
|
-
activeSearches.set(
|
|
204
|
-
engineName,
|
|
205
|
-
startEngineSearch(registry[engineName], {
|
|
206
|
-
vertical,
|
|
207
|
-
query,
|
|
208
|
-
language,
|
|
209
|
-
time_range,
|
|
210
|
-
pageno,
|
|
211
|
-
clientId,
|
|
212
|
-
runtimeContext,
|
|
213
|
-
})
|
|
214
|
-
);
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
while (activeSearches.size > 0 && !hasEnoughResults()) {
|
|
218
|
-
const completionPromises = [...activeSearches.values()].map((task) =>
|
|
219
|
-
task.promise
|
|
220
|
-
);
|
|
221
|
-
const outcome = await Promise.race(completionPromises);
|
|
222
|
-
|
|
223
|
-
activeSearches.delete(outcome.engine);
|
|
224
|
-
engineTimings.push({
|
|
225
|
-
engine: outcome.engine,
|
|
226
|
-
duration_ms: outcome.duration_ms,
|
|
227
|
-
status: outcome.error ? outcome.error.code : "ok",
|
|
228
|
-
result_count: outcome.results?.length || 0,
|
|
229
|
-
tier: outcome.tier,
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
if (outcome.error) {
|
|
233
|
-
logWarn("search.engine_failed", {
|
|
234
|
-
engine: outcome.engine,
|
|
235
|
-
tier: outcome.tier,
|
|
236
|
-
code: outcome.error.code,
|
|
237
|
-
duration_ms: outcome.duration_ms,
|
|
238
|
-
}, undefined, runtimeContext);
|
|
239
|
-
await runDeferredTask(
|
|
240
|
-
runtimeContext,
|
|
241
|
-
`health-failure:${outcome.engine}`,
|
|
242
|
-
createDeferredEngineFailureRecorder(outcome.engine)
|
|
243
|
-
);
|
|
244
|
-
unresponsiveEngines.push(outcome.engine);
|
|
245
|
-
} else {
|
|
246
|
-
await runDeferredTask(
|
|
247
|
-
runtimeContext,
|
|
248
|
-
`health-success:${outcome.engine}`,
|
|
249
|
-
createDeferredEngineSuccessRecorder(outcome.engine)
|
|
250
|
-
);
|
|
251
|
-
|
|
252
|
-
if (outcome.results.length > 0) {
|
|
253
|
-
engineResults.push({
|
|
254
|
-
engine: outcome.engine,
|
|
255
|
-
results: outcome.results,
|
|
256
|
-
});
|
|
257
|
-
for (const result of outcome.results) {
|
|
258
|
-
canonicalUrls.add(canonicalizeUrl(result.url || result.link || result.href || ""));
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
if (activeSearches.size > 0) {
|
|
266
|
-
abortActiveSearches(activeSearches);
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
if (hasEnoughResults()) {
|
|
270
|
-
break;
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
// Dedupe and rank once, after all collected results
|
|
275
|
-
const aggregatedResults = dedupeAndRankResults({
|
|
276
|
-
engineResults,
|
|
277
|
-
query,
|
|
278
|
-
registry,
|
|
279
|
-
});
|
|
280
|
-
|
|
281
|
-
return {
|
|
282
|
-
results: aggregatedResults,
|
|
283
|
-
unresponsiveEngines,
|
|
284
|
-
meta: {
|
|
285
|
-
fallbackPath,
|
|
286
|
-
engineTimings,
|
|
287
|
-
},
|
|
288
|
-
};
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
export async function searchAllWithMeta({
|
|
292
|
-
vertical = "web",
|
|
293
|
-
query,
|
|
294
|
-
engines,
|
|
295
|
-
language,
|
|
296
|
-
time_range,
|
|
297
|
-
pageno,
|
|
298
|
-
clientId,
|
|
299
|
-
runtimeContext,
|
|
300
|
-
}) {
|
|
301
|
-
const registry = getEngineRegistry();
|
|
302
|
-
const engineSelection = resolveEngineSelection(engines, { vertical });
|
|
303
|
-
const capabilitySelection = filterEnginesByCapabilities(
|
|
304
|
-
engineSelection.enabledEngines,
|
|
305
|
-
registry,
|
|
306
|
-
{
|
|
307
|
-
vertical,
|
|
308
|
-
time_range,
|
|
309
|
-
pageno,
|
|
310
|
-
}
|
|
311
|
-
);
|
|
312
|
-
const enabledEngines = capabilitySelection.enabledEngines;
|
|
313
|
-
const skippedEngines = [
|
|
314
|
-
...engineSelection.skippedEngines,
|
|
315
|
-
...capabilitySelection.skippedEngines,
|
|
316
|
-
];
|
|
317
|
-
|
|
318
|
-
if (enabledEngines.length === 0) {
|
|
319
|
-
throw new ApiError({
|
|
320
|
-
status: 400,
|
|
321
|
-
code: "NO_ENGINES_AVAILABLE",
|
|
322
|
-
category: "validation",
|
|
323
|
-
message: "No requested search engines are available for these parameters",
|
|
324
|
-
});
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
const cacheParams = {
|
|
328
|
-
vertical,
|
|
329
|
-
query,
|
|
330
|
-
requested_engines: engineSelection.requestedEngines,
|
|
331
|
-
engines: enabledEngines,
|
|
332
|
-
language,
|
|
333
|
-
time_range,
|
|
334
|
-
pageno,
|
|
335
|
-
};
|
|
336
|
-
const cachedResponse = await getCachedSearchResponse(cacheParams);
|
|
337
|
-
if (cachedResponse?.state === "hit") {
|
|
338
|
-
return {
|
|
339
|
-
response: cachedResponse.response,
|
|
340
|
-
meta: buildSearchMeta({
|
|
341
|
-
cacheStatus: "hit",
|
|
342
|
-
cacheLayer: cachedResponse.layer || "unknown",
|
|
343
|
-
fallbackOrder: enabledEngines,
|
|
344
|
-
fallbackPath: [],
|
|
345
|
-
engineTimings: [],
|
|
346
|
-
strategy: "cache-hit",
|
|
347
|
-
}),
|
|
348
|
-
};
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
const fallbackOrder = enabledEngines;
|
|
352
|
-
const searchOutcome = await runParallelSearch({
|
|
353
|
-
registry,
|
|
354
|
-
engines: fallbackOrder,
|
|
355
|
-
vertical,
|
|
356
|
-
query,
|
|
357
|
-
language,
|
|
358
|
-
time_range,
|
|
359
|
-
pageno,
|
|
360
|
-
clientId,
|
|
361
|
-
runtimeContext,
|
|
362
|
-
});
|
|
363
|
-
|
|
364
|
-
if (
|
|
365
|
-
searchOutcome.results.length === 0 &&
|
|
366
|
-
searchOutcome.unresponsiveEngines.length > 0 &&
|
|
367
|
-
cachedResponse?.state === "stale"
|
|
368
|
-
) {
|
|
369
|
-
return {
|
|
370
|
-
response: cachedResponse.response,
|
|
371
|
-
meta: buildSearchMeta({
|
|
372
|
-
cacheStatus: "stale-if-error",
|
|
373
|
-
cacheLayer: cachedResponse.layer || "kv",
|
|
374
|
-
fallbackOrder,
|
|
375
|
-
fallbackPath: searchOutcome.meta.fallbackPath,
|
|
376
|
-
engineTimings: searchOutcome.meta.engineTimings,
|
|
377
|
-
strategy: "tiered",
|
|
378
|
-
}),
|
|
379
|
-
};
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
const response = buildSearchResponse({
|
|
383
|
-
vertical,
|
|
384
|
-
query,
|
|
385
|
-
enabledEngines,
|
|
386
|
-
skippedEngines,
|
|
387
|
-
unresponsiveEngines: searchOutcome.unresponsiveEngines,
|
|
388
|
-
results: searchOutcome.results,
|
|
389
|
-
});
|
|
390
|
-
|
|
391
|
-
await runDeferredTask(
|
|
392
|
-
runtimeContext,
|
|
393
|
-
"search-cache-write",
|
|
394
|
-
createDeferredCachedSearchResponseWriter(cacheParams, response)
|
|
395
|
-
);
|
|
396
|
-
recordMetric(runtimeContext, "search.cache_write_scheduled", {
|
|
397
|
-
cache_layer: "edge+kv",
|
|
398
|
-
});
|
|
399
|
-
for (const timing of searchOutcome.meta.engineTimings) {
|
|
400
|
-
recordTiming(runtimeContext, "search.engine", timing.duration_ms, {
|
|
401
|
-
engine: timing.engine,
|
|
402
|
-
tier: timing.tier,
|
|
403
|
-
status: timing.status,
|
|
404
|
-
result_count: timing.result_count,
|
|
405
|
-
});
|
|
406
|
-
}
|
|
407
|
-
return {
|
|
408
|
-
response,
|
|
409
|
-
meta: buildSearchMeta({
|
|
410
|
-
cacheStatus: cachedResponse?.state === "stale" ? "revalidated" : "miss",
|
|
411
|
-
cacheLayer: cachedResponse?.layer || "none",
|
|
412
|
-
fallbackOrder,
|
|
413
|
-
fallbackPath: searchOutcome.meta.fallbackPath,
|
|
414
|
-
engineTimings: searchOutcome.meta.engineTimings,
|
|
415
|
-
strategy: "tiered",
|
|
416
|
-
}),
|
|
417
|
-
};
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
export async function searchAll(params) {
|
|
421
|
-
const { response } = await searchAllWithMeta(params);
|
|
422
|
-
return response;
|
|
423
|
-
}
|
package/src/search/gateway.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./gateway.impl.js";
|