@endday/search-mcp 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/index.js +4724 -0
  2. package/{mcp → dist}/search-mcp.js +1 -2
  3. package/package.json +14 -14
  4. package/data/blocklist.generated.js +0 -2
  5. package/envs.js +0 -129
  6. package/index.js +0 -6
  7. package/src/content/extract.impl.js +0 -228
  8. package/src/content/extract.js +0 -1
  9. package/src/content/fetch.impl.js +0 -400
  10. package/src/content/fetch.js +0 -1
  11. package/src/core/crypto.js +0 -7
  12. package/src/core/errors.impl.js +0 -52
  13. package/src/core/errors.js +0 -1
  14. package/src/core/html.impl.js +0 -69
  15. package/src/core/html.js +0 -1
  16. package/src/mcp/config.js +0 -75
  17. package/src/mcp/format.js +0 -44
  18. package/src/mcp/index.js +0 -10
  19. package/src/mcp/local/content.js +0 -26
  20. package/src/mcp/local/search.js +0 -233
  21. package/src/mcp/schemas.js +0 -132
  22. package/src/mcp/server.js +0 -97
  23. package/src/mcp/tools/content.js +0 -31
  24. package/src/mcp/tools/jinaContent.js +0 -38
  25. package/src/mcp/tools/newsSearch.js +0 -22
  26. package/src/mcp/tools/webSearch.js +0 -57
  27. package/src/platform/auth.impl.js +0 -166
  28. package/src/platform/auth.js +0 -1
  29. package/src/platform/cache.impl.js +0 -166
  30. package/src/platform/cache.js +0 -1
  31. package/src/platform/health.impl.js +0 -133
  32. package/src/platform/health.js +0 -1
  33. package/src/platform/http.impl.js +0 -108
  34. package/src/platform/http.js +0 -1
  35. package/src/platform/logger.impl.js +0 -51
  36. package/src/platform/logger.js +0 -1
  37. package/src/platform/metrics.impl.js +0 -43
  38. package/src/platform/metrics.js +0 -1
  39. package/src/platform/nodeHttpClient.js +0 -104
  40. package/src/platform/rateLimit.impl.js +0 -141
  41. package/src/platform/rateLimit.js +0 -1
  42. package/src/platform/requestContext.impl.js +0 -10
  43. package/src/platform/requestContext.js +0 -1
  44. package/src/platform/session.impl.js +0 -198
  45. package/src/platform/session.js +0 -1
  46. package/src/platform/stateKv.impl.js +0 -18
  47. package/src/platform/stateKv.js +0 -1
  48. package/src/platform/tasks.impl.js +0 -17
  49. package/src/platform/tasks.js +0 -1
  50. package/src/routes/requestParams.impl.js +0 -12
  51. package/src/routes/requestParams.js +0 -1
  52. package/src/search/engineRegistry.impl.js +0 -117
  53. package/src/search/engineRegistry.js +0 -1
  54. package/src/search/engineRequest.impl.js +0 -377
  55. package/src/search/engineRequest.js +0 -1
  56. package/src/search/engineUtils.impl.js +0 -227
  57. package/src/search/engineUtils.js +0 -1
  58. package/src/search/engines/baidu.impl.js +0 -145
  59. package/src/search/engines/baidu.js +0 -2
  60. package/src/search/engines/bing.impl.js +0 -509
  61. package/src/search/engines/bing.js +0 -2
  62. package/src/search/engines/brave.impl.js +0 -223
  63. package/src/search/engines/brave.js +0 -2
  64. package/src/search/engines/duckduckgo.impl.js +0 -164
  65. package/src/search/engines/duckduckgo.js +0 -2
  66. package/src/search/engines/mojeek.impl.js +0 -115
  67. package/src/search/engines/mojeek.js +0 -2
  68. package/src/search/engines/qwant.impl.js +0 -188
  69. package/src/search/engines/qwant.js +0 -2
  70. package/src/search/engines/startpage.impl.js +0 -237
  71. package/src/search/engines/startpage.js +0 -2
  72. package/src/search/engines/toutiao.impl.js +0 -265
  73. package/src/search/engines/toutiao.js +0 -2
  74. package/src/search/engines/yahoo.impl.js +0 -379
  75. package/src/search/engines/yahoo.js +0 -2
  76. package/src/search/gateway.impl.js +0 -423
  77. package/src/search/gateway.js +0 -1
  78. package/src/search/ranking.impl.js +0 -381
  79. package/src/search/ranking.js +0 -1
  80. package/src/search/requestPolicy.impl.js +0 -137
  81. package/src/search/requestPolicy.js +0 -1
  82. package/src/search/upstreamSession.impl.js +0 -148
  83. package/src/search/upstreamSession.js +0 -1
  84. /package/{index.d.ts → dist/index.d.ts} +0 -0
@@ -1,223 +0,0 @@
1
- import { ApiError } from "../../core/errors.js";
2
- import {
3
- fetchSearchText,
4
- isChallengeResponse,
5
- throwBlockedUpstreamError,
6
- } from "../engineRequest.js";
7
- import {
8
- ensureAbsoluteUrl,
9
- mapLanguage,
10
- mapTimeRange,
11
- } from "../engineUtils.js";
12
- import { cleanText, parseHtml } from "../../core/html.js";
13
- import { normalizeResults } from "../ranking.js";
14
-
15
- const BRAVE_TIME_RANGE = {
16
- day: "pd",
17
- week: "pw",
18
- month: "pm",
19
- year: "py",
20
- };
21
-
22
- const BRAVE_LANGUAGE = {
23
- en: "en-us",
24
- "en-us": "en-us",
25
- "en-gb": "en-gb",
26
- zh: "zh-hans",
27
- "zh-cn": "zh-hans",
28
- "zh-tw": "zh-hant",
29
- };
30
-
31
- const BRAVE_COUNTRY = {
32
- en: "us",
33
- "en-us": "us",
34
- "en-gb": "gb",
35
- zh: "cn",
36
- "zh-cn": "cn",
37
- "zh-tw": "tw",
38
- };
39
-
40
- const BRAVE_CHALLENGE_PATTERNS = [
41
- /name=["']captcha["']/i,
42
- /id=["'][^"']*captcha[^"']*["']/i,
43
- ];
44
-
45
- function isBraveChallengeResponse(source) {
46
- const text = String(source || "");
47
-
48
- return (
49
- isChallengeResponse(text, BRAVE_CHALLENGE_PATTERNS) ||
50
- ((/verify you are human/i.test(text) || /unusual traffic/i.test(text)) &&
51
- /<form\b/i.test(text))
52
- );
53
- }
54
-
55
- function throwBraveChallengeError() {
56
- throwBlockedUpstreamError({
57
- engine: "Brave",
58
- surface: "html",
59
- });
60
- }
61
-
62
- export function parseBraveResults(html) {
63
- if (isBraveChallengeResponse(html)) {
64
- throwBraveChallengeError();
65
- }
66
-
67
- const root = parseHtml(html);
68
- const resultNodes = root
69
- .querySelectorAll(".snippet")
70
- .filter((node) => node.getAttribute("data-type") === "web");
71
- const results = [];
72
-
73
- for (const node of resultNodes) {
74
- const linkNode =
75
- node.querySelector("a.l1[href]") || node.querySelector("a[href]");
76
- const titleNode =
77
- node.querySelector(".title") || node.querySelector(".search-snippet-title");
78
- const descriptionNode =
79
- node.querySelector(".generic-snippet .content") ||
80
- node.querySelector(".content");
81
-
82
- if (!linkNode || !titleNode) {
83
- continue;
84
- }
85
-
86
- results.push({
87
- title: cleanText(titleNode.innerHTML || titleNode.text),
88
- url: ensureAbsoluteUrl(
89
- linkNode.getAttribute("href"),
90
- "https://search.brave.com"
91
- ),
92
- description: cleanText(
93
- descriptionNode?.innerHTML || descriptionNode?.text || ""
94
- ),
95
- });
96
- }
97
-
98
- if (results.length === 0) {
99
- throw new ApiError({
100
- status: 502,
101
- code: "UPSTREAM_PARSE_ERROR",
102
- category: "upstream",
103
- message: "Brave parser could not find organic results",
104
- });
105
- }
106
-
107
- return normalizeResults(results);
108
- }
109
-
110
- export function parseBraveNewsResults(html) {
111
- if (isBraveChallengeResponse(html)) {
112
- throwBraveChallengeError();
113
- }
114
-
115
- const root = parseHtml(html);
116
- const resultNodes = root
117
- .querySelectorAll('.snippet')
118
- .filter((node) => node.getAttribute("data-type") === "news");
119
- const results = [];
120
-
121
- for (const node of resultNodes) {
122
- const linkNode = node.querySelector("a.l1[href]") || node.querySelector("a[href]");
123
- const titleNode = node.querySelector(".title");
124
- const descriptionNode = node.querySelector(".generic-snippet .description") || node.querySelector(".content");
125
- const sourceNode = node.querySelector(".site-name-content .desktop-small-semibold");
126
- const publishedNode =
127
- node.querySelector(".site-name-content .desktop-small-regular.t-tertiary") ||
128
- node.querySelector(".age-snippet");
129
-
130
- if (!linkNode || !titleNode) {
131
- continue;
132
- }
133
-
134
- results.push({
135
- title: cleanText(titleNode.innerHTML || titleNode.text),
136
- url: ensureAbsoluteUrl(
137
- linkNode.getAttribute("href"),
138
- "https://search.brave.com"
139
- ),
140
- description: cleanText(
141
- descriptionNode?.innerHTML || descriptionNode?.text || ""
142
- ),
143
- source_name: cleanText(sourceNode?.innerHTML || sourceNode?.text || ""),
144
- published_text: cleanText(
145
- publishedNode?.innerHTML || publishedNode?.text || ""
146
- ),
147
- });
148
- }
149
-
150
- if (results.length === 0) {
151
- throw new ApiError({
152
- status: 502,
153
- code: "UPSTREAM_PARSE_ERROR",
154
- category: "upstream",
155
- message: "Brave News parser could not find organic results",
156
- });
157
- }
158
-
159
- return normalizeResults(results);
160
- }
161
-
162
- async function searchBrave(params) {
163
- const { vertical = "web", query, language, time_range, signal, runtimeContext } = params;
164
- const searchUrl = new URL(
165
- vertical === "news"
166
- ? "https://search.brave.com/news"
167
- : "https://search.brave.com/search"
168
- );
169
- searchUrl.searchParams.set("q", query);
170
- searchUrl.searchParams.set("spellcheck", "0");
171
- searchUrl.searchParams.set("source", "web");
172
- searchUrl.searchParams.set("summary", "0");
173
-
174
- const timeFilter = mapTimeRange(time_range, BRAVE_TIME_RANGE);
175
- if (timeFilter) {
176
- searchUrl.searchParams.set("tf", timeFilter);
177
- }
178
-
179
- const html = await fetchSearchText(searchUrl.toString(), {
180
- engine: "brave",
181
- engineLabel: "Brave",
182
- signal,
183
- language,
184
- cookies: {
185
- country: mapLanguage(language, BRAVE_COUNTRY, "us"),
186
- ui_lang: mapLanguage(language, BRAVE_LANGUAGE, "en-us"),
187
- useLocation: "0",
188
- summarizer: "0",
189
- safesearch: "off",
190
- },
191
- referrer: "https://search.brave.com/",
192
- runtimeContext,
193
- blockedStatuses: [403, 429],
194
- isBlocked: isBraveChallengeResponse,
195
- blockedSurface: "html",
196
- });
197
-
198
- return vertical === "news" ? parseBraveNewsResults(html) : parseBraveResults(html);
199
- }
200
-
201
- export const braveAdapter = {
202
- name: "brave",
203
- label: "Brave",
204
- priority: 90,
205
- tier: "secondary",
206
- requestPolicy: {
207
- retryAttempts: 0,
208
- minRequestIntervalMs: 250,
209
- },
210
- supports: {
211
- verticals: ["web", "news"],
212
- language: true,
213
- time_range: true,
214
- pageno: false,
215
- news: {
216
- pageno: false,
217
- },
218
- },
219
- isAvailable: () => true,
220
- search: searchBrave,
221
- };
222
-
223
- export default searchBrave;
@@ -1,2 +0,0 @@
1
- export * from "./brave.impl.js";
2
- export { default } from "./brave.impl.js";
@@ -1,164 +0,0 @@
1
- import { ApiError } from "../../core/errors.js";
2
- import {
3
- fetchSearchText,
4
- isChallengeResponse,
5
- throwBlockedUpstreamError,
6
- } from "../engineRequest.js";
7
- import {
8
- ensureAbsoluteUrl,
9
- mapLanguage,
10
- mapTimeRange,
11
- resolvePageNumber,
12
- } from "../engineUtils.js";
13
- import { cleanText, parseHtml } from "../../core/html.js";
14
- import { normalizeResults } from "../ranking.js";
15
-
16
- const DUCKDUCKGO_LANGUAGE = {
17
- en: "us-en",
18
- "en-us": "us-en",
19
- "en-gb": "uk-en",
20
- zh: "cn-zh",
21
- "zh-cn": "cn-zh",
22
- "zh-tw": "tw-zh",
23
- };
24
-
25
- const DUCKDUCKGO_TIME_RANGE = {
26
- day: "d",
27
- week: "w",
28
- month: "m",
29
- year: "y",
30
- };
31
-
32
- const DUCKDUCKGO_CHALLENGE_PATTERNS = [
33
- /anomaly\.js/i,
34
- /bots use DuckDuckGo too/i,
35
- /automated requests/i,
36
- ];
37
-
38
- function isDuckDuckGoChallengeResponse(source) {
39
- const text = String(source || "");
40
-
41
- return (
42
- isChallengeResponse(text, DUCKDUCKGO_CHALLENGE_PATTERNS) ||
43
- (/verify you are human/i.test(text) && /<form\b/i.test(text))
44
- );
45
- }
46
-
47
- function throwDuckDuckGoChallengeError() {
48
- throwBlockedUpstreamError({
49
- engine: "DuckDuckGo",
50
- surface: "html",
51
- });
52
- }
53
-
54
- function extractDuckDuckGoUrl(rawUrl) {
55
- const absoluteUrl = ensureAbsoluteUrl(rawUrl, "https://duckduckgo.com");
56
-
57
- try {
58
- const parsed = new URL(absoluteUrl);
59
- const uddg = parsed.searchParams.get("uddg");
60
- return uddg ? decodeURIComponent(uddg) : absoluteUrl;
61
- } catch (_) {
62
- return absoluteUrl;
63
- }
64
- }
65
-
66
- export function parseDuckDuckGoResults(html) {
67
- if (isDuckDuckGoChallengeResponse(html)) {
68
- throwDuckDuckGoChallengeError();
69
- }
70
-
71
- const root = parseHtml(html);
72
- const resultNodes = root.querySelectorAll(".result");
73
- const results = [];
74
-
75
- for (const node of resultNodes) {
76
- const linkNode =
77
- node.querySelector("a.result__a[href]") || node.querySelector("a[href]");
78
- const snippetNode =
79
- node.querySelector(".result__snippet") ||
80
- node.querySelector(".result__body");
81
-
82
- if (!linkNode) {
83
- continue;
84
- }
85
-
86
- results.push({
87
- title: cleanText(linkNode.innerHTML || linkNode.text),
88
- url: extractDuckDuckGoUrl(linkNode.getAttribute("href")),
89
- description: cleanText(snippetNode?.innerHTML || snippetNode?.text || ""),
90
- });
91
- }
92
-
93
- if (results.length === 0) {
94
- throw new ApiError({
95
- status: 502,
96
- code: "UPSTREAM_PARSE_ERROR",
97
- category: "upstream",
98
- message: "DuckDuckGo parser could not find organic results",
99
- });
100
- }
101
-
102
- return normalizeResults(results);
103
- }
104
-
105
- async function searchDuckDuckGo(params) {
106
- const { query, language, time_range, pageno, signal, runtimeContext } = params;
107
- const page = resolvePageNumber(pageno);
108
- const locale = mapLanguage(language, DUCKDUCKGO_LANGUAGE, "wt-wt");
109
-
110
- if (page > 0) {
111
- throw new ApiError({
112
- status: 400,
113
- code: "UNSUPPORTED_PARAMETER",
114
- category: "validation",
115
- message: "DuckDuckGo HTML pagination is not supported",
116
- });
117
- }
118
-
119
- const timeFilter = mapTimeRange(time_range, DUCKDUCKGO_TIME_RANGE);
120
- const html = await fetchSearchText("https://html.duckduckgo.com/html/", {
121
- engine: "duckduckgo",
122
- engineLabel: "DuckDuckGo",
123
- signal,
124
- language,
125
- method: "POST",
126
- form: {
127
- q: query,
128
- kl: locale,
129
- ...(timeFilter ? { df: timeFilter } : {}),
130
- },
131
- cookies: {
132
- kl: locale,
133
- ...(timeFilter ? { df: timeFilter } : {}),
134
- },
135
- referrer: "https://html.duckduckgo.com/",
136
- origin: "https://html.duckduckgo.com",
137
- runtimeContext,
138
- blockedStatuses: [403, 429],
139
- isBlocked: isDuckDuckGoChallengeResponse,
140
- blockedSurface: "html",
141
- });
142
-
143
- return parseDuckDuckGoResults(html);
144
- }
145
-
146
- export const duckDuckGoAdapter = {
147
- name: "duckduckgo",
148
- label: "DuckDuckGo",
149
- priority: 95,
150
- tier: "primary",
151
- requestPolicy: {
152
- retryAttempts: 0,
153
- minRequestIntervalMs: 150,
154
- },
155
- supports: {
156
- language: true,
157
- time_range: true,
158
- pageno: false,
159
- },
160
- isAvailable: () => true,
161
- search: searchDuckDuckGo,
162
- };
163
-
164
- export default searchDuckDuckGo;
@@ -1,2 +0,0 @@
1
- export * from "./duckduckgo.impl.js";
2
- export { default } from "./duckduckgo.impl.js";
@@ -1,115 +0,0 @@
1
- import { ApiError } from "../../core/errors.js";
2
- import {
3
- fetchSearchText,
4
- isChallengeResponse,
5
- throwBlockedUpstreamError,
6
- } from "../engineRequest.js";
7
- import { resolvePageNumber } from "../engineUtils.js";
8
- import { cleanText, parseHtml } from "../../core/html.js";
9
- import { normalizeResults } from "../ranking.js";
10
-
11
- const MOJEEK_CHALLENGE_PATTERNS = [
12
- /name=["']captcha["']/i,
13
- /id=["'][^"']*captcha[^"']*["']/i,
14
- ];
15
-
16
- function isMojeekChallengeResponse(source) {
17
- const text = String(source || "");
18
-
19
- return (
20
- isChallengeResponse(text, MOJEEK_CHALLENGE_PATTERNS) ||
21
- ((/verify you are human/i.test(text) || /unusual traffic/i.test(text)) &&
22
- /<form\b/i.test(text))
23
- );
24
- }
25
-
26
- function throwMojeekChallengeError() {
27
- throwBlockedUpstreamError({
28
- engine: "Mojeek",
29
- surface: "html",
30
- });
31
- }
32
-
33
- export function parseMojeekResults(html) {
34
- if (isMojeekChallengeResponse(html)) {
35
- throwMojeekChallengeError();
36
- }
37
-
38
- const root = parseHtml(html);
39
- const resultNodes = root.querySelectorAll("ul.results-standard li");
40
- const results = [];
41
-
42
- for (const node of resultNodes) {
43
- const linkNode =
44
- node.querySelector("h2 a.title[href]") || node.querySelector("h2 a[href]");
45
- const descriptionNode = node.querySelector("p.s");
46
-
47
- if (!linkNode) {
48
- continue;
49
- }
50
-
51
- results.push({
52
- title: cleanText(linkNode.innerHTML || linkNode.text),
53
- url: linkNode.getAttribute("href"),
54
- description: cleanText(
55
- descriptionNode?.innerHTML || descriptionNode?.text || ""
56
- ),
57
- });
58
- }
59
-
60
- if (results.length === 0) {
61
- throw new ApiError({
62
- status: 502,
63
- code: "UPSTREAM_PARSE_ERROR",
64
- category: "upstream",
65
- message: "Mojeek parser could not find organic results",
66
- });
67
- }
68
-
69
- return normalizeResults(results);
70
- }
71
-
72
- async function searchMojeek(params) {
73
- const { query, language, pageno, signal, runtimeContext } = params;
74
- const searchUrl = new URL("https://www.mojeek.com/search");
75
- searchUrl.searchParams.set("q", query);
76
-
77
- const page = resolvePageNumber(pageno);
78
- if (page > 0) {
79
- searchUrl.searchParams.set("s", String(page * 10 + 1));
80
- }
81
-
82
- const html = await fetchSearchText(searchUrl.toString(), {
83
- engine: "mojeek",
84
- engineLabel: "Mojeek",
85
- signal,
86
- language,
87
- referrer: "https://www.mojeek.com/",
88
- runtimeContext,
89
- blockedStatuses: [403, 429],
90
- isBlocked: isMojeekChallengeResponse,
91
- blockedSurface: "html",
92
- });
93
-
94
- return parseMojeekResults(html);
95
- }
96
-
97
- export const mojeekAdapter = {
98
- name: "mojeek",
99
- label: "Mojeek",
100
- priority: 80,
101
- tier: "secondary",
102
- requestPolicy: {
103
- retryAttempts: 0,
104
- minRequestIntervalMs: 250,
105
- },
106
- supports: {
107
- language: true,
108
- time_range: false,
109
- pageno: true,
110
- },
111
- isAvailable: () => true,
112
- search: searchMojeek,
113
- };
114
-
115
- export default searchMojeek;
@@ -1,2 +0,0 @@
1
- export * from "./mojeek.impl.js";
2
- export { default } from "./mojeek.impl.js";
@@ -1,188 +0,0 @@
1
- import { ApiError } from "../../core/errors.js";
2
- import {
3
- fetchSearchText,
4
- isChallengeResponse,
5
- throwBlockedUpstreamError,
6
- } from "../engineRequest.js";
7
- import {
8
- ensureAbsoluteUrl,
9
- mapLanguage,
10
- resolvePageNumber,
11
- } from "../engineUtils.js";
12
- import { cleanText, parseHtml } from "../../core/html.js";
13
- import { normalizeResults } from "../ranking.js";
14
-
15
- const QWANT_LANGUAGE = {
16
- en: "en_US",
17
- "en-us": "en_US",
18
- "en-gb": "en_GB",
19
- zh: "zh_CN",
20
- "zh-cn": "zh_CN",
21
- "zh-tw": "zh_TW",
22
- fr: "fr_FR",
23
- de: "de_DE",
24
- es: "es_ES",
25
- it: "it_IT",
26
- };
27
-
28
- const QWANT_CHALLENGE_PATTERNS = [
29
- /<title>\s*Service unavailable\s*<\/title>/i,
30
- /name=["']captcha["']/i,
31
- /id=["'][^"']*captcha[^"']*["']/i,
32
- ];
33
-
34
- function isQwantChallengeResponse(source) {
35
- const text = String(source || "");
36
- const hasResultArticles = /<section\b[\s\S]*<article\b/i.test(text);
37
-
38
- return (
39
- (isChallengeResponse(text, QWANT_CHALLENGE_PATTERNS) && !hasResultArticles) ||
40
- ((/verify you are human/i.test(text) || /unusual traffic/i.test(text)) &&
41
- /<form\b/i.test(text))
42
- );
43
- }
44
-
45
- function throwQwantChallengeError() {
46
- throwBlockedUpstreamError({
47
- engine: "Qwant",
48
- surface: "html",
49
- });
50
- }
51
-
52
- function normalizeQwantUrl(rawUrl) {
53
- const value = String(rawUrl || "").trim();
54
-
55
- if (!value) {
56
- return "";
57
- }
58
-
59
- if (/^https?:\/\//i.test(value) || value.startsWith("//")) {
60
- return ensureAbsoluteUrl(value, "https://www.qwant.com");
61
- }
62
-
63
- if (/^[\w.-]+\.[a-z]{2,}(?:[/:?#]|$)/i.test(value)) {
64
- return `https://${value}`;
65
- }
66
-
67
- return ensureAbsoluteUrl(value, "https://www.qwant.com");
68
- }
69
-
70
- function extractQwantResultUrl(node, linkNode) {
71
- const visibleUrlNode =
72
- node.querySelector("span.url.partner") ||
73
- node.querySelector(".url.partner") ||
74
- node.querySelector(".url");
75
- const visibleUrl = cleanText(
76
- visibleUrlNode?.innerHTML || visibleUrlNode?.text || ""
77
- );
78
-
79
- return normalizeQwantUrl(visibleUrl || linkNode.getAttribute("href"));
80
- }
81
-
82
- export function parseQwantResults(html) {
83
- if (isQwantChallengeResponse(html)) {
84
- throwQwantChallengeError();
85
- }
86
-
87
- const root = parseHtml(html);
88
- const resultNodes = root.querySelectorAll("section article");
89
- const results = [];
90
-
91
- for (const node of resultNodes) {
92
- if (node.querySelector("span.tooltip")) {
93
- continue;
94
- }
95
-
96
- const linkNode = node.querySelector("h2 a[href]") || node.querySelector("a[href]");
97
- if (!linkNode) {
98
- continue;
99
- }
100
-
101
- const descriptionNode = node.querySelector("p");
102
-
103
- results.push({
104
- title: cleanText(linkNode.innerHTML || linkNode.text),
105
- url: extractQwantResultUrl(node, linkNode),
106
- description: cleanText(
107
- descriptionNode?.innerHTML || descriptionNode?.text || ""
108
- ),
109
- });
110
- }
111
-
112
- const normalized = normalizeResults(results);
113
- if (normalized.length === 0) {
114
- throw new ApiError({
115
- status: 502,
116
- code: "UPSTREAM_PARSE_ERROR",
117
- category: "upstream",
118
- message: "Qwant parser could not find organic results",
119
- });
120
- }
121
-
122
- return normalized;
123
- }
124
-
125
- async function searchQwant(params) {
126
- const { query, language, time_range, pageno, signal, runtimeContext } = params;
127
-
128
- if (time_range) {
129
- throw new ApiError({
130
- status: 400,
131
- code: "UNSUPPORTED_PARAMETER",
132
- category: "validation",
133
- message: "Qwant Lite time_range filtering is not supported",
134
- });
135
- }
136
-
137
- const page = resolvePageNumber(pageno);
138
- if (page > 4) {
139
- throw new ApiError({
140
- status: 400,
141
- code: "UNSUPPORTED_PARAMETER",
142
- category: "validation",
143
- message: "Qwant Lite supports at most five result pages",
144
- });
145
- }
146
-
147
- const locale = mapLanguage(language, QWANT_LANGUAGE, "en_US");
148
- const searchUrl = new URL("https://lite.qwant.com/");
149
- searchUrl.searchParams.set("q", query);
150
- searchUrl.searchParams.set("locale", locale.toLowerCase());
151
- searchUrl.searchParams.set("l", locale.split("_")[0]);
152
- searchUrl.searchParams.set("s", "1");
153
- searchUrl.searchParams.set("p", String(page + 1));
154
-
155
- const html = await fetchSearchText(searchUrl.toString(), {
156
- engine: "qwant",
157
- engineLabel: "Qwant",
158
- signal,
159
- language,
160
- referrer: "https://www.qwant.com/",
161
- runtimeContext,
162
- blockedStatuses: [403, 429, 503],
163
- isBlocked: isQwantChallengeResponse,
164
- blockedSurface: "html",
165
- });
166
-
167
- return parseQwantResults(html);
168
- }
169
-
170
- export const qwantAdapter = {
171
- name: "qwant",
172
- label: "Qwant",
173
- priority: 85,
174
- tier: "secondary",
175
- requestPolicy: {
176
- retryAttempts: 0,
177
- minRequestIntervalMs: 250,
178
- },
179
- supports: {
180
- language: true,
181
- time_range: false,
182
- pageno: true,
183
- },
184
- isAvailable: () => true,
185
- search: searchQwant,
186
- };
187
-
188
- export default searchQwant;
@@ -1,2 +0,0 @@
1
- export * from "./qwant.impl.js";
2
- export { default } from "./qwant.impl.js";