mcp-researchpowerpack-http 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +124 -0
  2. package/dist/index.d.ts +3 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +227 -0
  5. package/dist/index.js.map +7 -0
  6. package/dist/mcp-use.json +7 -0
  7. package/dist/src/clients/github.d.ts +83 -0
  8. package/dist/src/clients/github.d.ts.map +1 -0
  9. package/dist/src/clients/github.js +370 -0
  10. package/dist/src/clients/github.js.map +7 -0
  11. package/dist/src/clients/reddit.d.ts +60 -0
  12. package/dist/src/clients/reddit.d.ts.map +1 -0
  13. package/dist/src/clients/reddit.js +287 -0
  14. package/dist/src/clients/reddit.js.map +7 -0
  15. package/dist/src/clients/research.d.ts +67 -0
  16. package/dist/src/clients/research.d.ts.map +1 -0
  17. package/dist/src/clients/research.js +282 -0
  18. package/dist/src/clients/research.js.map +7 -0
  19. package/dist/src/clients/scraper.d.ts +72 -0
  20. package/dist/src/clients/scraper.d.ts.map +1 -0
  21. package/dist/src/clients/scraper.js +327 -0
  22. package/dist/src/clients/scraper.js.map +7 -0
  23. package/dist/src/clients/search.d.ts +57 -0
  24. package/dist/src/clients/search.d.ts.map +1 -0
  25. package/dist/src/clients/search.js +218 -0
  26. package/dist/src/clients/search.js.map +7 -0
  27. package/dist/src/config/index.d.ts +93 -0
  28. package/dist/src/config/index.d.ts.map +1 -0
  29. package/dist/src/config/index.js +218 -0
  30. package/dist/src/config/index.js.map +7 -0
  31. package/dist/src/schemas/deep-research.d.ts +40 -0
  32. package/dist/src/schemas/deep-research.d.ts.map +1 -0
  33. package/dist/src/schemas/deep-research.js +216 -0
  34. package/dist/src/schemas/deep-research.js.map +7 -0
  35. package/dist/src/schemas/github-score.d.ts +50 -0
  36. package/dist/src/schemas/github-score.d.ts.map +1 -0
  37. package/dist/src/schemas/github-score.js +58 -0
  38. package/dist/src/schemas/github-score.js.map +7 -0
  39. package/dist/src/schemas/scrape-links.d.ts +23 -0
  40. package/dist/src/schemas/scrape-links.d.ts.map +1 -0
  41. package/dist/src/schemas/scrape-links.js +32 -0
  42. package/dist/src/schemas/scrape-links.js.map +7 -0
  43. package/dist/src/schemas/web-search.d.ts +18 -0
  44. package/dist/src/schemas/web-search.d.ts.map +1 -0
  45. package/dist/src/schemas/web-search.js +28 -0
  46. package/dist/src/schemas/web-search.js.map +7 -0
  47. package/dist/src/scoring/github-quality.d.ts +142 -0
  48. package/dist/src/scoring/github-quality.d.ts.map +1 -0
  49. package/dist/src/scoring/github-quality.js +202 -0
  50. package/dist/src/scoring/github-quality.js.map +7 -0
  51. package/dist/src/services/file-attachment.d.ts +30 -0
  52. package/dist/src/services/file-attachment.d.ts.map +1 -0
  53. package/dist/src/services/file-attachment.js +205 -0
  54. package/dist/src/services/file-attachment.js.map +7 -0
  55. package/dist/src/services/llm-processor.d.ts +29 -0
  56. package/dist/src/services/llm-processor.d.ts.map +1 -0
  57. package/dist/src/services/llm-processor.js +206 -0
  58. package/dist/src/services/llm-processor.js.map +7 -0
  59. package/dist/src/services/markdown-cleaner.d.ts +8 -0
  60. package/dist/src/services/markdown-cleaner.d.ts.map +1 -0
  61. package/dist/src/services/markdown-cleaner.js +63 -0
  62. package/dist/src/services/markdown-cleaner.js.map +7 -0
  63. package/dist/src/tools/github-score.d.ts +12 -0
  64. package/dist/src/tools/github-score.d.ts.map +1 -0
  65. package/dist/src/tools/github-score.js +306 -0
  66. package/dist/src/tools/github-score.js.map +7 -0
  67. package/dist/src/tools/mcp-helpers.d.ts +27 -0
  68. package/dist/src/tools/mcp-helpers.d.ts.map +1 -0
  69. package/dist/src/tools/mcp-helpers.js +47 -0
  70. package/dist/src/tools/mcp-helpers.js.map +7 -0
  71. package/dist/src/tools/reddit.d.ts +54 -0
  72. package/dist/src/tools/reddit.d.ts.map +1 -0
  73. package/dist/src/tools/reddit.js +498 -0
  74. package/dist/src/tools/reddit.js.map +7 -0
  75. package/dist/src/tools/registry.d.ts +3 -0
  76. package/dist/src/tools/registry.d.ts.map +1 -0
  77. package/dist/src/tools/registry.js +17 -0
  78. package/dist/src/tools/registry.js.map +7 -0
  79. package/dist/src/tools/research.d.ts +14 -0
  80. package/dist/src/tools/research.d.ts.map +1 -0
  81. package/dist/src/tools/research.js +250 -0
  82. package/dist/src/tools/research.js.map +7 -0
  83. package/dist/src/tools/scrape.d.ts +14 -0
  84. package/dist/src/tools/scrape.d.ts.map +1 -0
  85. package/dist/src/tools/scrape.js +290 -0
  86. package/dist/src/tools/scrape.js.map +7 -0
  87. package/dist/src/tools/search.d.ts +10 -0
  88. package/dist/src/tools/search.d.ts.map +1 -0
  89. package/dist/src/tools/search.js +197 -0
  90. package/dist/src/tools/search.js.map +7 -0
  91. package/dist/src/tools/utils.d.ts +105 -0
  92. package/dist/src/tools/utils.d.ts.map +1 -0
  93. package/dist/src/tools/utils.js +96 -0
  94. package/dist/src/tools/utils.js.map +7 -0
  95. package/dist/src/utils/concurrency.d.ts +28 -0
  96. package/dist/src/utils/concurrency.d.ts.map +1 -0
  97. package/dist/src/utils/concurrency.js +62 -0
  98. package/dist/src/utils/concurrency.js.map +7 -0
  99. package/dist/src/utils/errors.d.ts +95 -0
  100. package/dist/src/utils/errors.d.ts.map +1 -0
  101. package/dist/src/utils/errors.js +289 -0
  102. package/dist/src/utils/errors.js.map +7 -0
  103. package/dist/src/utils/logger.d.ts +33 -0
  104. package/dist/src/utils/logger.d.ts.map +1 -0
  105. package/dist/src/utils/logger.js +41 -0
  106. package/dist/src/utils/logger.js.map +7 -0
  107. package/dist/src/utils/markdown-formatter.d.ts +5 -0
  108. package/dist/src/utils/markdown-formatter.d.ts.map +1 -0
  109. package/dist/src/utils/markdown-formatter.js +15 -0
  110. package/dist/src/utils/markdown-formatter.js.map +7 -0
  111. package/dist/src/utils/response.d.ts +83 -0
  112. package/dist/src/utils/response.d.ts.map +1 -0
  113. package/dist/src/utils/response.js +109 -0
  114. package/dist/src/utils/response.js.map +7 -0
  115. package/dist/src/utils/retry.d.ts +43 -0
  116. package/dist/src/utils/retry.d.ts.map +1 -0
  117. package/dist/src/utils/retry.js +37 -0
  118. package/dist/src/utils/retry.js.map +7 -0
  119. package/dist/src/utils/url-aggregator.d.ts +92 -0
  120. package/dist/src/utils/url-aggregator.d.ts.map +1 -0
  121. package/dist/src/utils/url-aggregator.js +357 -0
  122. package/dist/src/utils/url-aggregator.js.map +7 -0
  123. package/dist/src/version.d.ts +28 -0
  124. package/dist/src/version.d.ts.map +1 -0
  125. package/dist/src/version.js +32 -0
  126. package/dist/src/version.js.map +7 -0
  127. package/package.json +73 -0
@@ -0,0 +1,327 @@
1
+ import { parseEnv } from "../config/index.js";
2
+ import {
3
+ classifyError,
4
+ fetchWithTimeout,
5
+ sleep,
6
+ ErrorCode
7
+ } from "../utils/errors.js";
8
+ import { calculateBackoff } from "../utils/retry.js";
9
+ import { pMapSettled } from "../utils/concurrency.js";
10
+ import { mcpLog } from "../utils/logger.js";
11
+ const SCRAPE_MODES = ["basic", "javascript", "javascript_geo"];
12
+ const CREDIT_COSTS = { basic: 1, javascript: 5, javascript_geo: 5 };
13
+ const DEFAULT_SCRAPE_CONCURRENCY = 10;
14
+ const SCRAPE_BATCH_SIZE = 30;
15
+ const MAX_RETRIES = 1;
16
+ const FALLBACK_OVERALL_TIMEOUT_MS = 3e4;
17
+ const RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([429, 502, 503, 504, 510]);
18
+ const PERMANENT_FAILURE_CODES = /* @__PURE__ */ new Set([400, 401, 403]);
19
+ const MIN_USEFUL_CONTENT_LENGTH = 200;
20
+ const FALLBACK_ATTEMPTS = [
21
+ { mode: "basic", description: "basic mode" },
22
+ { mode: "javascript", description: "javascript rendering" },
23
+ { mode: "javascript", country: "us", description: "javascript + US geo-targeting" }
24
+ ];
25
+ class ScraperClient {
26
+ apiKey;
27
+ baseURL = "https://api.scrape.do";
28
+ constructor(apiKey) {
29
+ const env = parseEnv();
30
+ this.apiKey = apiKey || env.SCRAPER_API_KEY;
31
+ if (!this.apiKey) {
32
+ throw new Error("Web scraping capability is not configured. Please set up the required API credentials.");
33
+ }
34
+ }
35
+ /**
36
+ * Scrape a single URL with retry logic
37
+ * NEVER throws - always returns a ScrapeResponse (possibly with error)
38
+ */
39
+ async scrape(request, maxRetries = MAX_RETRIES) {
40
+ const { url, mode = "basic", timeout = 15, country } = request;
41
+ const credits = CREDIT_COSTS[mode] ?? 1;
42
+ try {
43
+ new URL(url);
44
+ } catch {
45
+ return {
46
+ content: `Invalid URL: ${url}`,
47
+ statusCode: 400,
48
+ credits: 0,
49
+ error: { code: ErrorCode.INVALID_INPUT, message: `Invalid URL: ${url}`, retryable: false }
50
+ };
51
+ }
52
+ const params = new URLSearchParams({
53
+ url,
54
+ token: this.apiKey,
55
+ timeout: String(timeout * 1e3)
56
+ });
57
+ if (mode === "javascript") {
58
+ params.append("render", "true");
59
+ }
60
+ if (country) {
61
+ params.append("geoCode", country.toUpperCase());
62
+ }
63
+ const apiUrl = `${this.baseURL}?${params.toString()}`;
64
+ let lastError;
65
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
66
+ try {
67
+ const timeoutMs = (timeout + 5) * 1e3;
68
+ const response = await fetchWithTimeout(apiUrl, {
69
+ method: "GET",
70
+ headers: { Accept: "text/html,application/json" },
71
+ timeoutMs
72
+ });
73
+ let content;
74
+ try {
75
+ content = await response.text();
76
+ } catch (readError) {
77
+ content = `Failed to read response: ${readError instanceof Error ? readError.message : String(readError)}`;
78
+ }
79
+ if (response.ok) {
80
+ return {
81
+ content,
82
+ statusCode: response.status,
83
+ credits,
84
+ headers: Object.fromEntries(response.headers.entries())
85
+ };
86
+ }
87
+ if (response.status === 404) {
88
+ return {
89
+ content: "404 - Page not found",
90
+ statusCode: 404,
91
+ credits
92
+ };
93
+ }
94
+ if (PERMANENT_FAILURE_CODES.has(response.status)) {
95
+ const errorMsg = response.status === 401 ? "No credits remaining or subscription suspended" : `Request failed with status ${response.status}`;
96
+ return {
97
+ content: `Error: ${errorMsg}`,
98
+ statusCode: response.status,
99
+ credits: 0,
100
+ error: {
101
+ code: response.status === 401 ? ErrorCode.AUTH_ERROR : ErrorCode.INVALID_INPUT,
102
+ message: errorMsg,
103
+ retryable: false,
104
+ statusCode: response.status
105
+ }
106
+ };
107
+ }
108
+ if (RETRYABLE_STATUS_CODES.has(response.status)) {
109
+ lastError = {
110
+ code: response.status === 429 ? ErrorCode.RATE_LIMITED : ErrorCode.SERVICE_UNAVAILABLE,
111
+ message: `Server returned ${response.status}`,
112
+ retryable: true,
113
+ statusCode: response.status
114
+ };
115
+ if (attempt < maxRetries - 1) {
116
+ const delayMs = calculateBackoff(attempt);
117
+ mcpLog("warning", `${response.status} on attempt ${attempt + 1}/${maxRetries}. Retrying in ${delayMs}ms`, "scraper");
118
+ await sleep(delayMs);
119
+ continue;
120
+ }
121
+ }
122
+ lastError = classifyError({ status: response.status, message: content });
123
+ if (attempt < maxRetries - 1 && lastError.retryable) {
124
+ const delayMs = calculateBackoff(attempt);
125
+ mcpLog("warning", `Status ${response.status}. Retrying in ${delayMs}ms`, "scraper");
126
+ await sleep(delayMs);
127
+ continue;
128
+ }
129
+ return {
130
+ content: `Error: ${lastError.message}`,
131
+ statusCode: response.status,
132
+ credits: 0,
133
+ error: lastError
134
+ };
135
+ } catch (error) {
136
+ lastError = classifyError(error);
137
+ if (!lastError.retryable) {
138
+ return {
139
+ content: `Error: ${lastError.message}`,
140
+ statusCode: lastError.statusCode || 500,
141
+ credits: 0,
142
+ error: lastError
143
+ };
144
+ }
145
+ if (attempt < maxRetries - 1) {
146
+ const delayMs = calculateBackoff(attempt);
147
+ mcpLog("warning", `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${maxRetries} in ${delayMs}ms`, "scraper");
148
+ await sleep(delayMs);
149
+ continue;
150
+ }
151
+ }
152
+ }
153
+ return {
154
+ content: `Error: Failed after ${maxRetries} attempts. ${lastError?.message || "Unknown error"}`,
155
+ statusCode: lastError?.statusCode || 500,
156
+ credits: 0,
157
+ error: lastError || { code: ErrorCode.UNKNOWN_ERROR, message: "All retries exhausted", retryable: false }
158
+ };
159
+ }
160
+ /**
161
+ * Scrape with automatic fallback through different modes
162
+ * NEVER throws - always returns a ScrapeResponse
163
+ */
164
+ async scrapeWithFallback(url, options = {}) {
165
+ const attemptResults = [];
166
+ let lastResult = null;
167
+ const deadline = Date.now() + FALLBACK_OVERALL_TIMEOUT_MS;
168
+ for (const attempt of FALLBACK_ATTEMPTS) {
169
+ if (Date.now() >= deadline) {
170
+ mcpLog("warning", `Overall fallback timeout reached for ${url} after ${attemptResults.length} attempt(s)`, "scraper");
171
+ break;
172
+ }
173
+ const result = await this.tryFallbackAttempt(url, attempt, options);
174
+ if (result.done) {
175
+ if (attemptResults.length > 0) {
176
+ mcpLog("info", `Success with ${attempt.description} after ${attemptResults.length} fallback(s)`, "scraper");
177
+ }
178
+ return result.response;
179
+ }
180
+ lastResult = result.response;
181
+ attemptResults.push(`${attempt.description}: ${result.response.error?.message || result.response.statusCode}`);
182
+ mcpLog("warning", `Failed with ${attempt.description} (${result.response.statusCode}), trying next fallback...`, "scraper");
183
+ }
184
+ const errorMessage = `Failed after ${attemptResults.length} fallback attempt(s): ${attemptResults.join("; ")}`;
185
+ return {
186
+ content: `Error: ${errorMessage}`,
187
+ statusCode: lastResult?.statusCode || 500,
188
+ credits: 0,
189
+ error: {
190
+ code: ErrorCode.SERVICE_UNAVAILABLE,
191
+ message: errorMessage,
192
+ retryable: false
193
+ }
194
+ };
195
+ }
196
+ /**
197
+ * Execute a single fallback attempt and determine whether to continue.
198
+ * Returns { done: true } on success/terminal or { done: false } to try the next mode.
199
+ */
200
+ async tryFallbackAttempt(url, attempt, options) {
201
+ const result = await this.scrape({
202
+ url,
203
+ mode: attempt.mode,
204
+ timeout: options.timeout,
205
+ country: attempt.country
206
+ });
207
+ if (result.statusCode >= 200 && result.statusCode < 300 && !result.error) {
208
+ const strippedLength = result.content.replace(/<[^>]*>/g, "").trim().length;
209
+ if (strippedLength < MIN_USEFUL_CONTENT_LENGTH && attempt.mode === "basic") {
210
+ mcpLog("info", `Basic mode returned only ${strippedLength} chars of text for ${url} \u2014 trying JS rendering`, "scraper");
211
+ return { done: false, response: result };
212
+ }
213
+ return { done: true, response: result };
214
+ }
215
+ if (result.statusCode === 404) {
216
+ return { done: true, response: result };
217
+ }
218
+ if (result.statusCode === 502) {
219
+ mcpLog("warning", `502 Bad Gateway for ${url} \u2014 likely WAF/CDN block, skipping fallback modes`, "scraper");
220
+ return { done: true, response: {
221
+ ...result,
222
+ error: {
223
+ code: ErrorCode.SERVICE_UNAVAILABLE,
224
+ message: "Bad gateway \u2014 site is blocking automated access",
225
+ retryable: false
226
+ }
227
+ } };
228
+ }
229
+ if (result.error && !result.error.retryable) {
230
+ mcpLog("error", `Non-retryable error with ${attempt.description}: ${result.error.message}`, "scraper");
231
+ return { done: true, response: result };
232
+ }
233
+ return { done: false, response: result };
234
+ }
235
+ /**
236
+ * Scrape multiple URLs with batching
237
+ * NEVER throws - always returns results array
238
+ */
239
+ async scrapeMultiple(urls, options = {}) {
240
+ if (urls.length === 0) {
241
+ return [];
242
+ }
243
+ if (urls.length <= SCRAPE_BATCH_SIZE) {
244
+ return this.processBatch(urls, options);
245
+ }
246
+ const result = await this.batchScrape(urls, options);
247
+ return result.results;
248
+ }
249
+ /**
250
+ * Batch scrape with progress callback
251
+ * NEVER throws - uses Promise.allSettled internally
252
+ */
253
+ async batchScrape(urls, options = {}, onBatchComplete) {
254
+ const totalBatches = Math.ceil(urls.length / SCRAPE_BATCH_SIZE);
255
+ const allResults = [];
256
+ let rateLimitHits = 0;
257
+ mcpLog("info", `Starting batch processing: ${urls.length} URLs in ${totalBatches} batch(es)`, "scraper");
258
+ for (let batchNum = 0; batchNum < totalBatches; batchNum++) {
259
+ const startIdx = batchNum * SCRAPE_BATCH_SIZE;
260
+ const endIdx = Math.min(startIdx + SCRAPE_BATCH_SIZE, urls.length);
261
+ const batchUrls = urls.slice(startIdx, endIdx);
262
+ mcpLog("info", `Processing batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} URLs)`, "scraper");
263
+ const batchResults = await pMapSettled(
264
+ batchUrls,
265
+ (url) => this.scrapeWithFallback(url, options),
266
+ DEFAULT_SCRAPE_CONCURRENCY
267
+ );
268
+ for (let i = 0; i < batchResults.length; i++) {
269
+ const result = batchResults[i];
270
+ if (!result) continue;
271
+ const url = batchUrls[i] ?? "";
272
+ if (result.status === "fulfilled") {
273
+ const scrapeResult = result.value;
274
+ allResults.push({ ...scrapeResult, url });
275
+ if (scrapeResult.error?.code === ErrorCode.RATE_LIMITED) {
276
+ rateLimitHits++;
277
+ }
278
+ } else {
279
+ const errorMsg = result.reason instanceof Error ? result.reason.message : String(result.reason);
280
+ mcpLog("error", `Unexpected rejection for ${url}: ${errorMsg}`, "scraper");
281
+ allResults.push({
282
+ url,
283
+ content: `Error: Unexpected failure - ${errorMsg}`,
284
+ statusCode: 500,
285
+ credits: 0,
286
+ error: classifyError(result.reason)
287
+ });
288
+ }
289
+ }
290
+ try {
291
+ onBatchComplete?.(batchNum + 1, totalBatches, allResults.length);
292
+ } catch (callbackError) {
293
+ mcpLog("error", `onBatchComplete callback error: ${callbackError}`, "scraper");
294
+ }
295
+ mcpLog("info", `Completed batch ${batchNum + 1}/${totalBatches} (${allResults.length}/${urls.length} total)`, "scraper");
296
+ if (batchNum < totalBatches - 1) {
297
+ const batchDelay = rateLimitHits > 0 ? 2e3 : 500;
298
+ await sleep(batchDelay);
299
+ }
300
+ }
301
+ return { results: allResults, batchesProcessed: totalBatches, totalAttempted: urls.length, rateLimitHits };
302
+ }
303
+ /**
304
+ * Process a single batch of URLs
305
+ * NEVER throws
306
+ */
307
+ async processBatch(urls, options) {
308
+ const results = await pMapSettled(urls, (url) => this.scrapeWithFallback(url, options), DEFAULT_SCRAPE_CONCURRENCY);
309
+ return results.map((result, index) => {
310
+ const url = urls[index] || "";
311
+ if (result.status === "fulfilled") {
312
+ return { ...result.value, url };
313
+ }
314
+ return {
315
+ url,
316
+ content: `Error: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`,
317
+ statusCode: 500,
318
+ credits: 0,
319
+ error: classifyError(result.reason)
320
+ };
321
+ });
322
+ }
323
+ }
324
+ export {
325
+ ScraperClient
326
+ };
327
+ //# sourceMappingURL=scraper.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../../src/clients/scraper.ts"],
4
+ "sourcesContent": ["/**\n * Web Scraper Client\n * Generic interface for URL scraping with automatic fallback modes\n * Implements robust error handling that NEVER crashes\n */\n\nimport { parseEnv, SCRAPER } from '../config/index.js';\nimport {\n classifyError,\n fetchWithTimeout,\n sleep,\n ErrorCode,\n type StructuredError,\n} from '../utils/errors.js';\nimport { calculateBackoff } from '../utils/retry.js';\nimport { pMapSettled } from '../utils/concurrency.js';\nimport { mcpLog } from '../utils/logger.js';\n\n// \u2500\u2500 Constants \u2500\u2500\n\nconst SCRAPE_MODES = ['basic', 'javascript', 'javascript_geo'] as const;\ntype ScrapeMode = typeof SCRAPE_MODES[number];\n\nconst CREDIT_COSTS: Record<string, number> = { basic: 1, javascript: 5, javascript_geo: 5 } as const;\nconst DEFAULT_SCRAPE_CONCURRENCY = 10 as const;\nconst SCRAPE_BATCH_SIZE = 30 as const;\nconst MAX_RETRIES = 1 as const;\n/** Overall timeout for all fallback attempts on a single URL */\nconst FALLBACK_OVERALL_TIMEOUT_MS = 30_000 as const;\n\n// \u2500\u2500 Interfaces \u2500\u2500\n\ninterface ScrapeRequest {\n readonly url: string;\n readonly mode?: 'basic' | 'javascript';\n readonly timeout?: number;\n readonly country?: string;\n}\n\ninterface ScrapeResponse {\n readonly content: string;\n readonly statusCode: number;\n readonly credits: number;\n readonly headers?: Record<string, string>;\n readonly error?: StructuredError;\n}\n\ninterface BatchScrapeResult {\n readonly results: ReadonlyArray<ScrapeResponse & { readonly url: string }>;\n readonly batchesProcessed: number;\n readonly totalAttempted: number;\n readonly rateLimitHits: number;\n}\n\n// Status codes that indicate we should retry (no credit consumed)\nconst RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504, 510]);\n// Status codes that are permanent failures (don't retry)\nconst PERMANENT_FAILURE_CODES = new Set([400, 401, 403]);\n\n/** Minimum stripped-text length to consider a scrape successful (filters out empty SPA shells) */\nconst MIN_USEFUL_CONTENT_LENGTH = 200 as const;\n\n/** Fallback attempt descriptor used by scrapeWithFallback */\ninterface FallbackAttempt {\n readonly mode: 'basic' | 'javascript';\n readonly country?: string;\n readonly description: string;\n}\n\nconst FALLBACK_ATTEMPTS: readonly FallbackAttempt[] = [\n { mode: 'basic', description: 'basic mode' },\n { mode: 'javascript', description: 'javascript rendering' },\n { mode: 'javascript', country: 'us', description: 'javascript + US geo-targeting' },\n] as const;\n\nexport class ScraperClient {\n private apiKey: string;\n private baseURL = 'https://api.scrape.do';\n\n constructor(apiKey?: string) {\n const env = parseEnv();\n this.apiKey = apiKey || env.SCRAPER_API_KEY;\n\n if (!this.apiKey) {\n throw new Error('Web scraping capability is not configured. Please set up the required API credentials.');\n }\n }\n\n /**\n * Scrape a single URL with retry logic\n * NEVER throws - always returns a ScrapeResponse (possibly with error)\n */\n async scrape(request: ScrapeRequest, maxRetries = MAX_RETRIES): Promise<ScrapeResponse> {\n const { url, mode = 'basic', timeout = 15, country } = request;\n const credits = CREDIT_COSTS[mode] ?? 1;\n\n // Validate URL first\n try {\n new URL(url);\n } catch {\n return {\n content: `Invalid URL: ${url}`,\n statusCode: 400,\n credits: 0,\n error: { code: ErrorCode.INVALID_INPUT, message: `Invalid URL: ${url}`, retryable: false },\n };\n }\n\n const params = new URLSearchParams({\n url: url,\n token: this.apiKey,\n timeout: String(timeout * 1000),\n });\n\n if (mode === 'javascript') {\n params.append('render', 'true');\n }\n\n if (country) {\n params.append('geoCode', country.toUpperCase());\n }\n\n const apiUrl = `${this.baseURL}?${params.toString()}`;\n let lastError: StructuredError | undefined;\n\n for (let attempt = 0; attempt < maxRetries; attempt++) {\n try {\n // Use AbortController for timeout\n const timeoutMs = (timeout + 5) * 1000; // Add 5s buffer over scrape timeout\n const response = await fetchWithTimeout(apiUrl, {\n method: 'GET',\n headers: { Accept: 'text/html,application/json' },\n timeoutMs,\n });\n\n // Safely read response body\n let content: string;\n try {\n content = await response.text();\n } catch (readError) {\n content = `Failed to read response: ${readError instanceof Error ? readError.message : String(readError)}`;\n }\n\n // SUCCESS: 2xx - Successful API call\n if (response.ok) {\n return {\n content,\n statusCode: response.status,\n credits,\n headers: Object.fromEntries(response.headers.entries()),\n };\n }\n\n // 404 - Target not found (permanent, but not an error for our purposes)\n if (response.status === 404) {\n return {\n content: '404 - Page not found',\n statusCode: 404,\n credits,\n };\n }\n\n // Permanent failures - don't retry\n if (PERMANENT_FAILURE_CODES.has(response.status)) {\n const errorMsg = response.status === 401\n ? 'No credits remaining or subscription suspended'\n : `Request failed with status ${response.status}`;\n return {\n content: `Error: ${errorMsg}`,\n statusCode: response.status,\n credits: 0,\n error: {\n code: response.status === 401 ? ErrorCode.AUTH_ERROR : ErrorCode.INVALID_INPUT,\n message: errorMsg,\n retryable: false,\n statusCode: response.status,\n },\n };\n }\n\n // Retryable status codes\n if (RETRYABLE_STATUS_CODES.has(response.status)) {\n lastError = {\n code: response.status === 429 ? ErrorCode.RATE_LIMITED : ErrorCode.SERVICE_UNAVAILABLE,\n message: `Server returned ${response.status}`,\n retryable: true,\n statusCode: response.status,\n };\n\n if (attempt < maxRetries - 1) {\n const delayMs = calculateBackoff(attempt);\n mcpLog('warning', `${response.status} on attempt ${attempt + 1}/${maxRetries}. Retrying in ${delayMs}ms`, 'scraper');\n await sleep(delayMs);\n continue;\n }\n }\n\n // Other non-success status - treat as retryable\n lastError = classifyError({ status: response.status, message: content });\n if (attempt < maxRetries - 1 && lastError.retryable) {\n const delayMs = calculateBackoff(attempt);\n mcpLog('warning', `Status ${response.status}. Retrying in ${delayMs}ms`, 'scraper');\n await sleep(delayMs);\n continue;\n }\n\n // Final attempt failed\n return {\n content: `Error: ${lastError.message}`,\n statusCode: response.status,\n credits: 0,\n error: lastError,\n };\n\n } catch (error) {\n lastError = classifyError(error);\n\n // Non-retryable errors - return immediately\n if (!lastError.retryable) {\n return {\n content: `Error: ${lastError.message}`,\n statusCode: lastError.statusCode || 500,\n credits: 0,\n error: lastError,\n };\n }\n\n // Retryable error - continue if attempts remaining\n if (attempt < maxRetries - 1) {\n const delayMs = calculateBackoff(attempt);\n mcpLog('warning', `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${maxRetries} in ${delayMs}ms`, 'scraper');\n await sleep(delayMs);\n continue;\n }\n }\n }\n\n // All retries exhausted\n return {\n content: `Error: Failed after ${maxRetries} attempts. ${lastError?.message || 'Unknown error'}`,\n statusCode: lastError?.statusCode || 500,\n credits: 0,\n error: lastError || { code: ErrorCode.UNKNOWN_ERROR, message: 'All retries exhausted', retryable: false },\n };\n }\n\n /**\n * Scrape with automatic fallback through different modes\n * NEVER throws - always returns a ScrapeResponse\n */\n async scrapeWithFallback(url: string, options: { timeout?: number } = {}): Promise<ScrapeResponse> {\n const attemptResults: string[] = [];\n let lastResult: ScrapeResponse | null = null;\n const deadline = Date.now() + FALLBACK_OVERALL_TIMEOUT_MS;\n\n for (const attempt of FALLBACK_ATTEMPTS) {\n // Check overall deadline before starting next fallback\n if (Date.now() >= deadline) {\n mcpLog('warning', `Overall fallback timeout reached for ${url} after ${attemptResults.length} attempt(s)`, 'scraper');\n break;\n }\n\n const result = await this.tryFallbackAttempt(url, attempt, options);\n\n if (result.done) {\n if (attemptResults.length > 0) {\n mcpLog('info', `Success with ${attempt.description} after ${attemptResults.length} fallback(s)`, 'scraper');\n }\n return result.response;\n }\n\n lastResult = result.response;\n attemptResults.push(`${attempt.description}: ${result.response.error?.message || result.response.statusCode}`);\n mcpLog('warning', `Failed with ${attempt.description} (${result.response.statusCode}), trying next fallback...`, 'scraper');\n }\n\n // All fallbacks exhausted or deadline reached\n const errorMessage = `Failed after ${attemptResults.length} fallback attempt(s): ${attemptResults.join('; ')}`;\n return {\n content: `Error: ${errorMessage}`,\n statusCode: lastResult?.statusCode || 500,\n credits: 0,\n error: {\n code: ErrorCode.SERVICE_UNAVAILABLE,\n message: errorMessage,\n retryable: false,\n },\n };\n }\n\n /**\n * Execute a single fallback attempt and determine whether to continue.\n * Returns { done: true } on success/terminal or { done: false } to try the next mode.\n */\n private async tryFallbackAttempt(\n url: string,\n attempt: FallbackAttempt,\n options: { timeout?: number },\n ): Promise<{ done: boolean; response: ScrapeResponse }> {\n const result = await this.scrape({\n url,\n mode: attempt.mode,\n timeout: options.timeout,\n country: attempt.country,\n });\n\n // Success \u2014 but verify content isn't an empty SPA shell\n if (result.statusCode >= 200 && result.statusCode < 300 && !result.error) {\n const strippedLength = result.content.replace(/<[^>]*>/g, '').trim().length;\n if (strippedLength < MIN_USEFUL_CONTENT_LENGTH && attempt.mode === 'basic') {\n mcpLog('info', `Basic mode returned only ${strippedLength} chars of text for ${url} \u2014 trying JS rendering`, 'scraper');\n return { done: false, response: result };\n }\n return { done: true, response: result };\n }\n\n // 404 is a valid response, not an error\n if (result.statusCode === 404) {\n return { done: true, response: result };\n }\n\n // 502 Bad Gateway \u2014 almost always a WAF/CDN block, not a transient issue.\n // Switching render mode won't bypass CDN protection, so fail fast.\n if (result.statusCode === 502) {\n mcpLog('warning', `502 Bad Gateway for ${url} \u2014 likely WAF/CDN block, skipping fallback modes`, 'scraper');\n return { done: true, response: {\n ...result,\n error: {\n code: ErrorCode.SERVICE_UNAVAILABLE,\n message: 'Bad gateway \u2014 site is blocking automated access',\n retryable: false,\n },\n }};\n }\n\n // Non-retryable errors - don't try other modes\n if (result.error && !result.error.retryable) {\n mcpLog('error', `Non-retryable error with ${attempt.description}: ${result.error.message}`, 'scraper');\n return { done: true, response: result };\n }\n\n return { done: false, response: result };\n }\n\n /**\n * Scrape multiple URLs with batching\n * NEVER throws - always returns results array\n */\n async scrapeMultiple(urls: string[], options: { timeout?: number } = {}): Promise<Array<ScrapeResponse & { url: string }>> {\n if (urls.length === 0) {\n return [];\n }\n\n if (urls.length <= SCRAPE_BATCH_SIZE) {\n return this.processBatch(urls, options);\n }\n\n const result = await this.batchScrape(urls, options);\n return result.results as Array<ScrapeResponse & { url: string }>;\n }\n\n /**\n * Batch scrape with progress callback\n * NEVER throws - uses Promise.allSettled internally\n */\n async batchScrape(\n urls: string[],\n options: { timeout?: number } = {},\n onBatchComplete?: (batchNum: number, totalBatches: number, processed: number) => void\n ): Promise<BatchScrapeResult> {\n const totalBatches = Math.ceil(urls.length / SCRAPE_BATCH_SIZE);\n const allResults: Array<ScrapeResponse & { url: string }> = [];\n let rateLimitHits = 0;\n\n mcpLog('info', `Starting batch processing: ${urls.length} URLs in ${totalBatches} batch(es)`, 'scraper');\n\n for (let batchNum = 0; batchNum < totalBatches; batchNum++) {\n const startIdx = batchNum * SCRAPE_BATCH_SIZE;\n const endIdx = Math.min(startIdx + SCRAPE_BATCH_SIZE, urls.length);\n const batchUrls = urls.slice(startIdx, endIdx);\n\n mcpLog('info', `Processing batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} URLs)`, 'scraper');\n\n const batchResults = await pMapSettled(\n batchUrls,\n url => this.scrapeWithFallback(url, options),\n DEFAULT_SCRAPE_CONCURRENCY\n );\n\n for (let i = 0; i < batchResults.length; i++) {\n const result = batchResults[i];\n if (!result) continue;\n const url = batchUrls[i] ?? '';\n\n if (result.status === 'fulfilled') {\n const scrapeResult = result.value;\n allResults.push({ ...scrapeResult, url });\n\n // Track rate limits\n if (scrapeResult.error?.code === ErrorCode.RATE_LIMITED) {\n rateLimitHits++;\n }\n } else {\n // This shouldn't happen since scrapeWithFallback never throws,\n // but handle it gracefully just in case\n const errorMsg = result.reason instanceof Error ? result.reason.message : String(result.reason);\n mcpLog('error', `Unexpected rejection for ${url}: ${errorMsg}`, 'scraper');\n\n allResults.push({\n url,\n content: `Error: Unexpected failure - ${errorMsg}`,\n statusCode: 500,\n credits: 0,\n error: classifyError(result.reason),\n });\n }\n }\n\n // Safe callback invocation\n try {\n onBatchComplete?.(batchNum + 1, totalBatches, allResults.length);\n } catch (callbackError) {\n mcpLog('error', `onBatchComplete callback error: ${callbackError}`, 'scraper');\n }\n\n mcpLog('info', `Completed batch ${batchNum + 1}/${totalBatches} (${allResults.length}/${urls.length} total)`, 'scraper');\n\n // Adaptive delay between batches \u2014 back off harder under rate limiting\n if (batchNum < totalBatches - 1) {\n const batchDelay = rateLimitHits > 0 ? 2000 : 500;\n await sleep(batchDelay);\n }\n }\n\n return { results: allResults, batchesProcessed: totalBatches, totalAttempted: urls.length, rateLimitHits };\n }\n\n /**\n * Process a single batch of URLs\n * NEVER throws\n */\n private async processBatch(urls: string[], options: { timeout?: number }): Promise<Array<ScrapeResponse & { url: string }>> {\n const results = await pMapSettled(urls, url => this.scrapeWithFallback(url, options), DEFAULT_SCRAPE_CONCURRENCY);\n\n return results.map((result, index) => {\n const url = urls[index] || '';\n\n if (result.status === 'fulfilled') {\n return { ...result.value, url };\n }\n\n // Shouldn't happen, but handle gracefully\n return {\n url,\n content: `Error: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`,\n statusCode: 500,\n credits: 0,\n error: classifyError(result.reason),\n };\n });\n }\n}\n"],
5
+ "mappings": "AAMA,SAAS,gBAAyB;AAClC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,wBAAwB;AACjC,SAAS,mBAAmB;AAC5B,SAAS,cAAc;AAIvB,MAAM,eAAe,CAAC,SAAS,cAAc,gBAAgB;AAG7D,MAAM,eAAuC,EAAE,OAAO,GAAG,YAAY,GAAG,gBAAgB,EAAE;AAC1F,MAAM,6BAA6B;AACnC,MAAM,oBAAoB;AAC1B,MAAM,cAAc;AAEpB,MAAM,8BAA8B;AA2BpC,MAAM,yBAAyB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,KAAK,GAAG,CAAC;AAEhE,MAAM,0BAA0B,oBAAI,IAAI,CAAC,KAAK,KAAK,GAAG,CAAC;AAGvD,MAAM,4BAA4B;AASlC,MAAM,oBAAgD;AAAA,EACpD,EAAE,MAAM,SAAS,aAAa,aAAa;AAAA,EAC3C,EAAE,MAAM,cAAc,aAAa,uBAAuB;AAAA,EAC1D,EAAE,MAAM,cAAc,SAAS,MAAM,aAAa,gCAAgC;AACpF;AAEO,MAAM,cAAc;AAAA,EACjB;AAAA,EACA,UAAU;AAAA,EAElB,YAAY,QAAiB;AAC3B,UAAM,MAAM,SAAS;AACrB,SAAK,SAAS,UAAU,IAAI;AAE5B,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI,MAAM,wFAAwF;AAAA,IAC1G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,OAAO,SAAwB,aAAa,aAAsC;AACtF,UAAM,EAAE,KAAK,OAAO,SAAS,UAAU,IAAI,QAAQ,IAAI;AACvD,UAAM,UAAU,aAAa,IAAI,KAAK;AAGtC,QAAI;AACF,UAAI,IAAI,GAAG;AAAA,IACb,QAAQ;AACN,aAAO;AAAA,QACL,SAAS,gBAAgB,GAAG;AAAA,QAC5B,YAAY;AAAA,QACZ,SAAS;AAAA,QACT,OAAO,EAAE,MAAM,UAAU,eAAe,SAAS,gBAAgB,GAAG,IAAI,WAAW,MAAM;AAAA,MAC3F;AAAA,IACF;AAEA,UAAM,SAAS,IAAI,gBAAgB;AAAA,MACjC;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,SAAS,OAAO,UAAU,GAAI;AAAA,IAChC,CAAC;AAED,QAAI,SAAS,cAAc;AACzB,aAAO,OAAO,UAAU,MAAM;AAAA,IAChC;AAEA,QAAI,SAAS;AACX,aAAO,OAAO,WAAW,QAAQ,YAAY,CAAC;AAAA,IAChD;AAEA,UAAM,SAAS,GAAG,KAAK,OAAO,IAAI,OAAO,SAAS,CAAC;AACnD,QAAI;AAEJ,aAAS,UAAU,GAAG,UAAU,YAAY,WAAW;AACrD,UAAI;AAEF,cAAM,aAAa,UAAU,KAAK;AAClC,cAAM,WAAW,MAAM,iBAAiB,QAAQ;AAAA,UAC9C,QAAQ;AAAA,UACR,SAAS,EAAE,QAAQ,6BAA6B;AAAA,UAChD;AAAA,QACF,CAAC;AAGD,YAAI;AACJ,YAAI;AACF,oBAAU,MAAM,SAAS,KAAK;AAAA,QAChC,SAAS,WAAW;AAClB,oBAAU,4BAA4B,qBAAqB,QAAQ,UAAU,UAAU,OAAO,SAAS,CAAC;AAAA,QAC1G;AAGA,YAAI,SAAS,IAAI;AACf,iBAAO;AAAA,YACL;AAAA,YACA,YAAY,SAAS;AAAA,YACrB;AAAA,YACA,SAAS,OAAO,YAAY,SAAS,QAAQ,QAAQ,CAAC;AAAA,UACxD;AAAA,QACF;AAGA,YAAI,SAAS,WAAW,KAAK;AAC3B,iBAAO;AAAA,YACL,SAAS;AAAA,YACT,YAAY;AAAA,YACZ;AAAA,UACF;AAAA,QACF;AAGA,YAAI,wBAAwB,IAAI,SAAS,MAAM,GAAG;AAChD,gBAAM,WAAW,SAAS,WAAW,MACjC,mDACA,8BAA8B,SAAS,MAAM;AACjD,iBAAO;AAAA,YACL,SAAS,UAAU,QAAQ;AAAA,YAC3B,YAAY,SAAS;AAAA,YACrB,SAAS;AAAA,YACT,OAAO;AAAA,cACL,MAAM,SAAS,WAAW,MAAM,UAAU,aAAa,UAAU;AAAA,cACjE,SAAS;AAAA,cACT,WAAW;AAAA,cACX,YAAY,SAAS;AAAA,YACvB;AAAA,UACF;AAAA,QACF;AAGA,YAAI,uBAAuB,IAAI,SAAS,MAAM,GAAG;AAC/C,sBAAY;AAAA,YACV,MAAM,SAAS,WAAW,MAAM,UAAU,eAAe,UAAU;AAAA,YACnE,SAAS,mBAAmB,SAAS,MAAM;AAAA,YAC3C,WAAW;AAAA,YACX,YAAY,SAAS;AAAA,UACvB;AAEA,cAAI,UAAU,aAAa,GAAG;AAC5B,kBAAM,UAAU,iBAAiB,OAAO;AACxC,mBAAO,WAAW,GAAG,SAAS,MAAM,eAAe,UAAU,CAAC,IAAI,UAAU,iBAAiB,OAAO,MAAM,SAAS;AACnH,kBAAM,MAAM,OAAO;AACnB;AAAA,UACF;AAAA,QACF;AAGA,oBAAY,cAAc,EAAE,QAAQ,SAAS,QAAQ,SAAS,QAAQ,CAAC;AACvE,YAAI,UAAU,aAAa,KAAK,UAAU,WAAW;AACnD,gBAAM,UAAU,iBAAiB,OAAO;AACxC,iBAAO,WAAW,UAAU,SAAS,MAAM,iBAAiB,OAAO,MAAM,SAAS;AAClF,gBAAM,MAAM,OAAO;AACnB;AAAA,QACF;AAGA,eAAO;AAAA,UACL,SAAS,UAAU,UAAU,OAAO;AAAA,UACpC,YAAY,SAAS;AAAA,UACrB,SAAS;AAAA,UACT,OAAO;AAAA,QACT;AAAA,MAEF,SAAS,OAAO;AACd,oBAAY,cAAc,KAAK;AAG/B,YAAI,CAAC,UAAU,WAAW;AACxB,iBAAO;AAAA,YACL,SAAS,UAAU,UAAU,OAAO;AAAA,YACpC,YAAY,UAAU,cAAc;AAAA,YACpC,SAAS;AAAA,YACT,OAAO;AAAA,UACT;AAAA,QACF;AAGA,YAAI,UAAU,aAAa,GAAG;AAC5B,gBAAM,UAAU,iBAAiB,OAAO;AACxC,iBAAO,WAAW,GAAG,UAAU,IAAI,KAAK,UAAU,OAAO,WAAW,UAAU,CAAC,IAAI,UAAU,OAAO,OAAO,MAAM,SAAS;AAC1H,gBAAM,MAAM,OAAO;AACnB;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,WAAO;AAAA,MACL,SAAS,uBAAuB,UAAU,cAAc,WAAW,WAAW,eAAe;AAAA,MAC7F,YAAY,WAAW,cAAc;AAAA,MACrC,SAAS;AAAA,MACT,OAAO,aAAa,EAAE,MAAM,UAAU,eAAe,SAAS,yBAAyB,WAAW,MAAM;AAAA,IAC1G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,mBAAmB,KAAa,UAAgC,CAAC,GAA4B;AACjG,UAAM,iBAA2B,CAAC;AAClC,QAAI,aAAoC;AACxC,UAAM,WAAW,KAAK,IAAI,IAAI;AAE9B,eAAW,WAAW,mBAAmB;AAEvC,UAAI,KAAK,IAAI,KAAK,UAAU;AAC1B,eAAO,WAAW,wCAAwC,GAAG,UAAU,eAAe,MAAM,eAAe,SAAS;AACpH;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,KAAK,mBAAmB,KAAK,SAAS,OAAO;AAElE,UAAI,OAAO,MAAM;AACf,YAAI,eAAe,SAAS,GAAG;AAC7B,iBAAO,QAAQ,gBAAgB,QAAQ,WAAW,UAAU,eAAe,MAAM,gBAAgB,SAAS;AAAA,QAC5G;AACA,eAAO,OAAO;AAAA,MAChB;AAEA,mBAAa,OAAO;AACpB,qBAAe,KAAK,GAAG,QAAQ,WAAW,KAAK,OAAO,SAAS,OAAO,WAAW,OAAO,SAAS,UAAU,EAAE;AAC7G,aAAO,WAAW,eAAe,QAAQ,WAAW,KAAK,OAAO,SAAS,UAAU,8BAA8B,SAAS;AAAA,IAC5H;AAGA,UAAM,eAAe,gBAAgB,eAAe,MAAM,yBAAyB,eAAe,KAAK,IAAI,CAAC;AAC5G,WAAO;AAAA,MACL,SAAS,UAAU,YAAY;AAAA,MAC/B,YAAY,YAAY,cAAc;AAAA,MACtC,SAAS;AAAA,MACT,OAAO;AAAA,QACL,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,mBACZ,KACA,SACA,SACsD;AACtD,UAAM,SAAS,MAAM,KAAK,OAAO;AAAA,MAC/B;AAAA,MACA,MAAM,QAAQ;AAAA,MACd,SAAS,QAAQ;AAAA,MACjB,SAAS,QAAQ;AAAA,IACnB,CAAC;AAGD,QAAI,OAAO,cAAc,OAAO,OAAO,aAAa,OAAO,CAAC,OAAO,OAAO;AACxE,YAAM,iBAAiB,OAAO,QAAQ,QAAQ,YAAY,EAAE,EAAE,KAAK,EAAE;AACrE,UAAI,iBAAiB,6BAA6B,QAAQ,SAAS,SAAS;AAC1E,eAAO,QAAQ,4BAA4B,cAAc,sBAAsB,GAAG,+BAA0B,SAAS;AACrH,eAAO,EAAE,MAAM,OAAO,UAAU,OAAO;AAAA,MACzC;AACA,aAAO,EAAE,MAAM,MAAM,UAAU,OAAO;AAAA,IACxC;AAGA,QAAI,OAAO,eAAe,KAAK;AAC7B,aAAO,EAAE,MAAM,MAAM,UAAU,OAAO;AAAA,IACxC;AAIA,QAAI,OAAO,eAAe,KAAK;AAC7B,aAAO,WAAW,uBAAuB,GAAG,yDAAoD,SAAS;AACzG,aAAO,EAAE,MAAM,MAAM,UAAU;AAAA,QAC7B,GAAG;AAAA,QACH,OAAO;AAAA,UACL,MAAM,UAAU;AAAA,UAChB,SAAS;AAAA,UACT,WAAW;AAAA,QACb;AAAA,MACF,EAAC;AAAA,IACH;AAGA,QAAI,OAAO,SAAS,CAAC,OAAO,MAAM,WAAW;AAC3C,aAAO,SAAS,4BAA4B,QAAQ,WAAW,KAAK,OAAO,MAAM,OAAO,IAAI,SAAS;AACrG,aAAO,EAAE,MAAM,MAAM,UAAU,OAAO;AAAA,IACxC;AAEA,WAAO,EAAE,MAAM,OAAO,UAAU,OAAO;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,eAAe,MAAgB,UAAgC,CAAC,GAAqD;AACzH,QAAI,KAAK,WAAW,GAAG;AACrB,aAAO,CAAC;AAAA,IACV;AAEA,QAAI,KAAK,UAAU,mBAAmB;AACpC,aAAO,KAAK,aAAa,MAAM,OAAO;AAAA,IACxC;AAEA,UAAM,SAAS,MAAM,KAAK,YAAY,MAAM,OAAO;AACnD,WAAO,OAAO;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,YACJ,MACA,UAAgC,CAAC,GACjC,iBAC4B;AAC5B,UAAM,eAAe,KAAK,KAAK,KAAK,SAAS,iBAAiB;AAC9D,UAAM,aAAsD,CAAC;AAC7D,QAAI,gBAAgB;AAEpB,WAAO,QAAQ,8BAA8B,KAAK,MAAM,YAAY,YAAY,cAAc,SAAS;AAEvG,aAAS,WAAW,GAAG,WAAW,cAAc,YAAY;AAC1D,YAAM,WAAW,WAAW;AAC5B,YAAM,SAAS,KAAK,IAAI,WAAW,mBAAmB,KAAK,MAAM;AACjE,YAAM,YAAY,KAAK,MAAM,UAAU,MAAM;AAE7C,aAAO,QAAQ,oBAAoB,WAAW,CAAC,IAAI,YAAY,KAAK,UAAU,MAAM,UAAU,SAAS;AAEvG,YAAM,eAAe,MAAM;AAAA,QACzB;AAAA,QACA,SAAO,KAAK,mBAAmB,KAAK,OAAO;AAAA,QAC3C;AAAA,MACF;AAEA,eAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,cAAM,SAAS,aAAa,CAAC;AAC7B,YAAI,CAAC,OAAQ;AACb,cAAM,MAAM,UAAU,CAAC,KAAK;AAE5B,YAAI,OAAO,WAAW,aAAa;AACjC,gBAAM,eAAe,OAAO;AAC5B,qBAAW,KAAK,EAAE,GAAG,cAAc,IAAI,CAAC;AAGxC,cAAI,aAAa,OAAO,SAAS,UAAU,cAAc;AACvD;AAAA,UACF;AAAA,QACF,OAAO;AAGL,gBAAM,WAAW,OAAO,kBAAkB,QAAQ,OAAO,OAAO,UAAU,OAAO,OAAO,MAAM;AAC9F,iBAAO,SAAS,4BAA4B,GAAG,KAAK,QAAQ,IAAI,SAAS;AAEzE,qBAAW,KAAK;AAAA,YACd;AAAA,YACA,SAAS,+BAA+B,QAAQ;AAAA,YAChD,YAAY;AAAA,YACZ,SAAS;AAAA,YACT,OAAO,cAAc,OAAO,MAAM;AAAA,UACpC,CAAC;AAAA,QACH;AAAA,MACF;AAGA,UAAI;AACF,0BAAkB,WAAW,GAAG,cAAc,WAAW,MAAM;AAAA,MACjE,SAAS,eAAe;AACtB,eAAO,SAAS,mCAAmC,aAAa,IAAI,SAAS;AAAA,MAC/E;AAEA,aAAO,QAAQ,mBAAmB,WAAW,CAAC,IAAI,YAAY,KAAK,WAAW,MAAM,IAAI,KAAK,MAAM,WAAW,SAAS;AAGvH,UAAI,WAAW,eAAe,GAAG;AAC/B,cAAM,aAAa,gBAAgB,IAAI,MAAO;AAC9C,cAAM,MAAM,UAAU;AAAA,MACxB;AAAA,IACF;AAEA,WAAO,EAAE,SAAS,YAAY,kBAAkB,cAAc,gBAAgB,KAAK,QAAQ,cAAc;AAAA,EAC3G;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,aAAa,MAAgB,SAAiF;AAC1H,UAAM,UAAU,MAAM,YAAY,MAAM,SAAO,KAAK,mBAAmB,KAAK,OAAO,GAAG,0BAA0B;AAEhH,WAAO,QAAQ,IAAI,CAAC,QAAQ,UAAU;AACpC,YAAM,MAAM,KAAK,KAAK,KAAK;AAE3B,UAAI,OAAO,WAAW,aAAa;AACjC,eAAO,EAAE,GAAG,OAAO,OAAO,IAAI;AAAA,MAChC;AAGA,aAAO;AAAA,QACL;AAAA,QACA,SAAS,UAAU,OAAO,kBAAkB,QAAQ,OAAO,OAAO,UAAU,OAAO,OAAO,MAAM,CAAC;AAAA,QACjG,YAAY;AAAA,QACZ,SAAS;AAAA,QACT,OAAO,cAAc,OAAO,MAAM;AAAA,MACpC;AAAA,IACF,CAAC;AAAA,EACH;AACF;",
6
+ "names": []
7
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Web Search Client
3
+ * Generic interface for web search via Google (Serper implementation)
4
+ * Implements robust error handling that NEVER crashes
5
+ */
6
+ import { type StructuredError } from '../utils/errors.js';
7
+ interface SearchResult {
8
+ readonly title: string;
9
+ readonly link: string;
10
+ readonly snippet: string;
11
+ readonly date?: string;
12
+ readonly position: number;
13
+ }
14
+ export interface KeywordSearchResult {
15
+ readonly keyword: string;
16
+ readonly results: SearchResult[];
17
+ readonly totalResults: number;
18
+ readonly related: string[];
19
+ readonly error?: StructuredError;
20
+ }
21
+ interface MultipleSearchResponse {
22
+ readonly searches: KeywordSearchResult[];
23
+ readonly totalKeywords: number;
24
+ readonly executionTime: number;
25
+ readonly error?: StructuredError;
26
+ }
27
+ export interface RedditSearchResult {
28
+ readonly title: string;
29
+ readonly url: string;
30
+ readonly snippet: string;
31
+ readonly date?: string;
32
+ }
33
+ export declare class SearchClient {
34
+ private apiKey;
35
+ constructor(apiKey?: string);
36
+ /**
37
+ * Check if error is retryable
38
+ */
39
+ private isRetryable;
40
+ /**
41
+ * Search multiple keywords in parallel
42
+ * NEVER throws - always returns a valid response
43
+ */
44
+ searchMultiple(keywords: string[]): Promise<MultipleSearchResponse>;
45
+ /**
46
+ * Search Reddit via Google (adds site:reddit.com automatically)
47
+ * NEVER throws - returns empty array on failure
48
+ */
49
+ searchReddit(query: string, dateAfter?: string): Promise<RedditSearchResult[]>;
50
+ /**
51
+ * Search Reddit with multiple queries (bounded concurrency)
52
+ * NEVER throws - searchReddit never throws, pMap preserves order
53
+ */
54
+ searchRedditMultiple(queries: string[], dateAfter?: string): Promise<Map<string, RedditSearchResult[]>>;
55
+ }
56
+ export {};
57
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../../src/clients/search.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAKL,KAAK,eAAe,EACrB,MAAM,oBAAoB,CAAC;AAc5B,UAAU,YAAY;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC;IACjC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,eAAe,CAAC;CAClC;AAED,UAAU,sBAAsB;IAC9B,QAAQ,CAAC,QAAQ,EAAE,mBAAmB,EAAE,CAAC;IACzC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,KAAK,CAAC,EAAE,eAAe,CAAC;CAClC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;CACxB;AAwHD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,MAAM;IAS3B;;OAEG;IACH,OAAO,CAAC,WAAW;IAUnB;;;OAGG;IACG,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC;IAkCzE;;;OAGG;IACG,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC;IAuDpF;;;OAGG;IACG,oBAAoB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,kBAAkB,EAAE,CAAC,CAAC;CAa9G"}
@@ -0,0 +1,218 @@
1
+ import { parseEnv } from "../config/index.js";
2
+ import {
3
+ classifyError,
4
+ fetchWithTimeout,
5
+ sleep,
6
+ ErrorCode
7
+ } from "../utils/errors.js";
8
+ import { calculateBackoff } from "../utils/retry.js";
9
+ import { pMap } from "../utils/concurrency.js";
10
+ import { mcpLog } from "../utils/logger.js";
11
+ const SERPER_API_URL = "https://google.serper.dev/search";
12
+ const DEFAULT_RESULTS_PER_KEYWORD = 10;
13
+ const MAX_SEARCH_CONCURRENCY = 8;
14
+ const MAX_RETRIES = 3;
15
+ const SEARCH_RETRY_CONFIG = {
16
+ maxRetries: MAX_RETRIES,
17
+ baseDelayMs: 1e3,
18
+ maxDelayMs: 1e4,
19
+ timeoutMs: 3e4
20
+ };
21
+ const RETRYABLE_SEARCH_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]);
22
+ const REDDIT_SITE_REGEX = /site:\s*reddit\.com/i;
23
+ const REDDIT_SUBREDDIT_SUFFIX_REGEX = / : r\/\w+$/;
24
+ const REDDIT_SUFFIX_REGEX = / - Reddit$/;
25
+ function parseSearchResponses(responses, keywords) {
26
+ return responses.map((resp, index) => {
27
+ try {
28
+ const organic = resp.organic || [];
29
+ const results = organic.map((item, idx) => ({
30
+ title: item.title || "No title",
31
+ link: item.link || "#",
32
+ snippet: item.snippet || "",
33
+ date: item.date,
34
+ position: item.position || idx + 1
35
+ }));
36
+ const searchInfo = resp.searchInformation;
37
+ const totalResults = searchInfo?.totalResults ? parseInt(String(searchInfo.totalResults).replace(/,/g, ""), 10) : results.length;
38
+ const relatedSearches = resp.relatedSearches || [];
39
+ const related = relatedSearches.map((r) => r.query || "");
40
+ return { keyword: keywords[index] || "", results, totalResults, related };
41
+ } catch {
42
+ return { keyword: keywords[index] || "", results: [], totalResults: 0, related: [] };
43
+ }
44
+ });
45
+ }
46
+ async function executeSearchWithRetry(apiKey, body, isRetryable) {
47
+ let lastError;
48
+ for (let attempt = 0; attempt <= SEARCH_RETRY_CONFIG.maxRetries; attempt++) {
49
+ try {
50
+ if (attempt > 0) {
51
+ mcpLog("warning", `Retry attempt ${attempt}/${SEARCH_RETRY_CONFIG.maxRetries}`, "search");
52
+ }
53
+ const response = await fetchWithTimeout(SERPER_API_URL, {
54
+ method: "POST",
55
+ headers: {
56
+ "X-API-KEY": apiKey,
57
+ "Content-Type": "application/json"
58
+ },
59
+ body: JSON.stringify(body),
60
+ timeoutMs: SEARCH_RETRY_CONFIG.timeoutMs
61
+ });
62
+ if (!response.ok) {
63
+ const errorText = await response.text().catch(() => "");
64
+ lastError = classifyError({ status: response.status, message: errorText });
65
+ if (isRetryable(response.status) && attempt < SEARCH_RETRY_CONFIG.maxRetries) {
66
+ const delayMs = calculateBackoff(attempt, SEARCH_RETRY_CONFIG.baseDelayMs, SEARCH_RETRY_CONFIG.maxDelayMs);
67
+ mcpLog("warning", `API returned ${response.status}, retrying in ${delayMs}ms...`, "search");
68
+ await sleep(delayMs);
69
+ continue;
70
+ }
71
+ return { data: void 0, error: lastError };
72
+ }
73
+ try {
74
+ const data = await response.json();
75
+ return { data };
76
+ } catch {
77
+ return {
78
+ data: void 0,
79
+ error: { code: ErrorCode.PARSE_ERROR, message: "Failed to parse search response", retryable: false }
80
+ };
81
+ }
82
+ } catch (error) {
83
+ lastError = classifyError(error);
84
+ if (isRetryable(void 0, error) && attempt < SEARCH_RETRY_CONFIG.maxRetries) {
85
+ const delayMs = calculateBackoff(attempt, SEARCH_RETRY_CONFIG.baseDelayMs, SEARCH_RETRY_CONFIG.maxDelayMs);
86
+ mcpLog("warning", `${lastError.code}: ${lastError.message}, retrying in ${delayMs}ms...`, "search");
87
+ await sleep(delayMs);
88
+ continue;
89
+ }
90
+ return { data: void 0, error: lastError };
91
+ }
92
+ }
93
+ return {
94
+ data: void 0,
95
+ error: lastError || { code: ErrorCode.UNKNOWN_ERROR, message: "Search failed", retryable: false }
96
+ };
97
+ }
98
+ class SearchClient {
99
+ apiKey;
100
+ constructor(apiKey) {
101
+ const env = parseEnv();
102
+ this.apiKey = apiKey || env.SEARCH_API_KEY || "";
103
+ if (!this.apiKey) {
104
+ throw new Error("Web search capability is not configured. Please set up the required API credentials.");
105
+ }
106
+ }
107
+ /**
108
+ * Check if error is retryable
109
+ */
110
+ isRetryable(status, error) {
111
+ if (status && RETRYABLE_SEARCH_CODES.has(status)) return true;
112
+ if (error == null) return false;
113
+ const message = typeof error === "object" && "message" in error && typeof error.message === "string" ? error.message.toLowerCase() : "";
114
+ return message.includes("timeout") || message.includes("rate limit") || message.includes("connection");
115
+ }
116
+ /**
117
+ * Search multiple keywords in parallel
118
+ * NEVER throws - always returns a valid response
119
+ */
120
+ async searchMultiple(keywords) {
121
+ const startTime = Date.now();
122
+ if (keywords.length === 0) {
123
+ return {
124
+ searches: [],
125
+ totalKeywords: 0,
126
+ executionTime: 0,
127
+ error: { code: ErrorCode.INVALID_INPUT, message: "No keywords provided", retryable: false }
128
+ };
129
+ }
130
+ const searchQueries = keywords.map((keyword) => ({ q: keyword }));
131
+ const { data, error } = await executeSearchWithRetry(
132
+ this.apiKey,
133
+ searchQueries,
134
+ (status, err) => this.isRetryable(status, err)
135
+ );
136
+ if (error || data === void 0) {
137
+ return {
138
+ searches: [],
139
+ totalKeywords: keywords.length,
140
+ executionTime: Date.now() - startTime,
141
+ error
142
+ };
143
+ }
144
+ const responses = Array.isArray(data) ? data : [data];
145
+ const searches = parseSearchResponses(responses, keywords);
146
+ return { searches, totalKeywords: keywords.length, executionTime: Date.now() - startTime };
147
+ }
148
+ /**
149
+ * Search Reddit via Google (adds site:reddit.com automatically)
150
+ * NEVER throws - returns empty array on failure
151
+ */
152
+ async searchReddit(query, dateAfter) {
153
+ if (!query?.trim()) {
154
+ return [];
155
+ }
156
+ let q = query.replace(REDDIT_SITE_REGEX, "").trim() + " site:reddit.com";
157
+ if (dateAfter) {
158
+ q += ` after:${dateAfter}`;
159
+ }
160
+ for (let attempt = 0; attempt <= SEARCH_RETRY_CONFIG.maxRetries; attempt++) {
161
+ try {
162
+ const res = await fetchWithTimeout(SERPER_API_URL, {
163
+ method: "POST",
164
+ headers: { "X-API-KEY": this.apiKey, "Content-Type": "application/json" },
165
+ body: JSON.stringify({ q, num: DEFAULT_RESULTS_PER_KEYWORD }),
166
+ timeoutMs: SEARCH_RETRY_CONFIG.timeoutMs
167
+ });
168
+ if (!res.ok) {
169
+ if (this.isRetryable(res.status) && attempt < SEARCH_RETRY_CONFIG.maxRetries) {
170
+ const delayMs = calculateBackoff(attempt, SEARCH_RETRY_CONFIG.baseDelayMs, SEARCH_RETRY_CONFIG.maxDelayMs);
171
+ mcpLog("warning", `Reddit search ${res.status}, retrying in ${delayMs}ms...`, "search");
172
+ await sleep(delayMs);
173
+ continue;
174
+ }
175
+ mcpLog("error", `Reddit search failed with status ${res.status}`, "search");
176
+ return [];
177
+ }
178
+ const data = await res.json();
179
+ return (data.organic || []).map((r) => ({
180
+ title: (r.title || "").replace(REDDIT_SUBREDDIT_SUFFIX_REGEX, "").replace(REDDIT_SUFFIX_REGEX, ""),
181
+ url: r.link || "",
182
+ snippet: r.snippet || "",
183
+ date: r.date
184
+ }));
185
+ } catch (error) {
186
+ const err = classifyError(error);
187
+ if (this.isRetryable(void 0, error) && attempt < SEARCH_RETRY_CONFIG.maxRetries) {
188
+ const delayMs = calculateBackoff(attempt, SEARCH_RETRY_CONFIG.baseDelayMs, SEARCH_RETRY_CONFIG.maxDelayMs);
189
+ mcpLog("warning", `Reddit search ${err.code}, retrying in ${delayMs}ms...`, "search");
190
+ await sleep(delayMs);
191
+ continue;
192
+ }
193
+ mcpLog("error", `Reddit search failed: ${err.message}`, "search");
194
+ return [];
195
+ }
196
+ }
197
+ return [];
198
+ }
199
+ /**
200
+ * Search Reddit with multiple queries (bounded concurrency)
201
+ * NEVER throws - searchReddit never throws, pMap preserves order
202
+ */
203
+ async searchRedditMultiple(queries, dateAfter) {
204
+ if (queries.length === 0) {
205
+ return /* @__PURE__ */ new Map();
206
+ }
207
+ const results = await pMap(
208
+ queries,
209
+ (q) => this.searchReddit(q, dateAfter),
210
+ MAX_SEARCH_CONCURRENCY
211
+ );
212
+ return new Map(queries.map((q, i) => [q, results[i] || []]));
213
+ }
214
+ }
215
+ export {
216
+ SearchClient
217
+ };
218
+ //# sourceMappingURL=search.js.map