mcp-researchpowerpack-http 3.11.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-use.json +2 -2
- package/dist/src/clients/reddit.js +1 -1
- package/dist/src/clients/reddit.js.map +2 -2
- package/dist/src/config/index.js +3 -4
- package/dist/src/config/index.js.map +2 -2
- package/dist/src/schemas/reddit.js +13 -21
- package/dist/src/schemas/reddit.js.map +2 -2
- package/dist/src/schemas/scrape-links.js +7 -11
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/schemas/web-search.js +10 -10
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/services/llm-processor.js +79 -0
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/reddit.js +58 -92
- package/dist/src/tools/reddit.js.map +2 -2
- package/dist/src/tools/scrape.js +11 -9
- package/dist/src/tools/scrape.js.map +2 -2
- package/dist/src/tools/search.js +123 -23
- package/dist/src/tools/search.js.map +2 -2
- package/package.json +1 -1
package/dist/mcp-use.json
CHANGED
|
@@ -13,7 +13,7 @@ import { mcpLog } from "../utils/logger.js";
|
|
|
13
13
|
const REDDIT_TOKEN_URL = "https://www.reddit.com/api/v1/access_token";
|
|
14
14
|
const REDDIT_API_BASE = "https://oauth.reddit.com";
|
|
15
15
|
const TOKEN_EXPIRY_MS = 55e3;
|
|
16
|
-
const FETCH_LIMIT =
|
|
16
|
+
const FETCH_LIMIT = 500;
|
|
17
17
|
let cachedToken = null;
|
|
18
18
|
let cachedTokenExpiry = 0;
|
|
19
19
|
const DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === "true";
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/clients/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Reddit OAuth API Client\n * Fetches posts and comments sorted by score (most upvoted first)\n * Implements robust error handling that NEVER crashes\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { REDDIT, CONCURRENCY } from '../config/index.js';\nimport { USER_AGENT_VERSION } from '../version.js';\nimport { calculateBackoff } from '../utils/retry.js';\nimport {\n classifyError,\n fetchWithTimeout,\n sleep,\n ErrorCode,\n type StructuredError,\n} from '../utils/errors.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport { mcpLog } from '../utils/logger.js';\n\n// \u2500\u2500 Constants \u2500\u2500\n\nconst REDDIT_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token' as const;\nconst REDDIT_API_BASE = 'https://oauth.reddit.com' as const;\nconst TOKEN_EXPIRY_MS = 55_000 as const; // 55 second expiry (conservative)\n\n// \u2500\u2500 Data Interfaces \u2500\u2500\n\ninterface Post {\n readonly title: string;\n readonly author: string;\n readonly subreddit: string;\n readonly body: string;\n readonly score: number;\n readonly commentCount: number;\n readonly url: string;\n readonly created: Date;\n readonly flair?: string;\n readonly isNsfw: boolean;\n readonly isPinned: boolean;\n}\n\nexport interface Comment {\n readonly author: string;\n readonly body: string;\n readonly score: number;\n readonly depth: number;\n readonly isOP: boolean;\n}\n\nexport interface PostResult {\n readonly post: Post;\n readonly comments: Comment[];\n readonly actualComments: number;\n}\n\ninterface BatchPostResult {\n readonly results: Map<string, PostResult | Error>;\n readonly batchesProcessed: number;\n readonly totalPosts: number;\n readonly rateLimitHits: number;\n}\n\n/** Reddit API \"Listing\" wrapper */\ninterface RedditListing<T> {\n readonly kind: string;\n readonly data: {\n readonly children: ReadonlyArray<{ readonly kind: string; readonly data: T }>;\n readonly after?: string;\n readonly before?: string;\n };\n}\n\n/** Reddit post data from API */\ninterface RedditPostData {\n readonly title: string;\n readonly selftext: string;\n readonly selftext_html?: string;\n readonly author: string;\n readonly subreddit: string;\n readonly score: number;\n readonly upvote_ratio: number;\n readonly num_comments: number;\n readonly created_utc: number;\n readonly url: string;\n readonly permalink: string;\n readonly is_self: boolean;\n readonly over_18: boolean;\n readonly stickied: boolean;\n readonly link_flair_text?: string;\n readonly [key: string]: unknown;\n}\n\n/** Reddit comment data from API */\ninterface RedditCommentData {\n readonly body?: string;\n readonly author?: string;\n readonly score?: number;\n readonly created_utc?: number;\n readonly replies?: RedditListing<RedditCommentData> | string;\n readonly [key: string]: unknown;\n}\n\ntype RedditPostResponse = [RedditListing<RedditPostData>, RedditListing<RedditCommentData>];\n\n/** Max comments to fetch per post from Reddit API */\nconst FETCH_LIMIT = REDDIT.FETCH_LIMIT_PER_POST;\n\n// ============================================================================\n// Module-Level Token Cache (shared across all RedditClient instances)\n// ============================================================================\nlet cachedToken: string | null = null;\nlet cachedTokenExpiry = 0;\n\n// Token cache logging only when DEBUG env is set\nconst DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === 'true';\nconst clientLogger = Logger.get('reddit-client');\n\n// Pending auth promise for deduplicating concurrent auth calls\nlet pendingAuthPromise: Promise<string | null> | null = null;\n\n// \u2500\u2500 Decomposed Helpers \u2500\u2500\n\n/**\n * Fetch a Reddit post's JSON from the API\n */\nasync function fetchRedditJson(\n sub: string,\n id: string,\n token: string,\n userAgent: string,\n): Promise<RedditPostResponse> {\n const limit = Math.min(FETCH_LIMIT, 500);\n const apiUrl = `${REDDIT_API_BASE}/r/${sub}/comments/${id}?sort=top&limit=${limit}&depth=10&raw_json=1`;\n\n const res = await fetchWithTimeout(apiUrl, {\n headers: {\n 'Authorization': `Bearer ${token}`,\n 'User-Agent': userAgent,\n },\n timeoutMs: 30000,\n });\n\n if (res.status === 429) {\n const err = new Error('Rate limited by Reddit API');\n (err as Error & { status: number }).status = 429;\n throw err;\n }\n\n if (res.status === 404) {\n throw new Error(`Post not found: /r/${sub}/comments/${id}`);\n }\n\n if (!res.ok) {\n const err = new Error(`Reddit API error: ${res.status}`);\n (err as Error & { status: number }).status = res.status;\n throw err;\n }\n\n try {\n return await res.json() as RedditPostResponse;\n } catch {\n throw new Error('Failed to parse Reddit API response');\n }\n}\n\n/**\n * Extract structured post data from a Reddit listing\n */\nfunction parsePostData(\n postListing: RedditListing<RedditPostData>,\n sub: string,\n): Post {\n const p = postListing?.data?.children?.[0]?.data;\n if (!p) {\n throw new Error(`Post data not found in response for /r/${sub}`);\n }\n\n return {\n title: p.title || 'Untitled',\n author: p.author || '[deleted]',\n subreddit: p.subreddit || sub,\n body: formatBody(p),\n score: p.score || 0,\n commentCount: p.num_comments || 0,\n url: `https://reddit.com${p.permalink || ''}`,\n created: new Date((p.created_utc || 0) * 1000),\n flair: p.link_flair_text || undefined,\n isNsfw: p.over_18 || false,\n isPinned: p.stickied || false,\n };\n}\n\nfunction formatBody(p: RedditPostData): string {\n if (p.selftext?.trim()) return p.selftext;\n if (p.is_self) return '';\n if (p.url) return `**Link:** ${p.url}`;\n return '';\n}\n\n/** Safety cap on comment tree recursion depth */\nconst MAX_COMMENT_DEPTH = 15 as const;\n\n/**\n * Extract and sort comments from a Reddit comment listing\n */\nfunction parseCommentTree(\n commentListing: RedditListing<RedditCommentData>,\n opAuthor: string,\n): Comment[] {\n const result: Comment[] = [];\n\n const extract = (items: ReadonlyArray<{ readonly kind: string; readonly data: RedditCommentData }>, depth = 0): void => {\n if (depth > MAX_COMMENT_DEPTH) return;\n const sorted = [...items].sort((a, b) => (b.data?.score || 0) - (a.data?.score || 0));\n\n for (const c of sorted) {\n if (c.kind !== 't1' || !c.data?.author || c.data.author === '[deleted]') continue;\n\n result.push({\n author: c.data.author,\n body: c.data.body || '',\n score: c.data.score || 0,\n depth,\n isOP: c.data.author === opAuthor,\n });\n\n if (typeof c.data.replies === 'object' && c.data.replies?.data?.children) {\n extract(c.data.replies.data.children, depth + 1);\n }\n }\n };\n\n extract(commentListing?.data?.children || []);\n return result;\n}\n\n// \u2500\u2500 Batch Helpers \u2500\u2500\n\n/**\n * Process a single batch of Reddit URLs, returning results keyed by URL\n */\nasync function processBatch(\n client: RedditClient,\n batchUrls: string[],\n): Promise<{ results: Map<string, PostResult | Error>; rateLimitHits: number }> {\n const results = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const batchResults = await pMapSettled(\n batchUrls,\n url => client.getPost(url),\n CONCURRENCY.REDDIT,\n );\n\n for (let i = 0; i < batchResults.length; i++) {\n const result = batchResults[i];\n if (!result) continue;\n const url = batchUrls[i] ?? '';\n\n if (result.status === 'fulfilled') {\n results.set(url, result.value);\n } else {\n const errorMsg = result.reason?.message || String(result.reason);\n if (errorMsg.includes('429') || errorMsg.includes('rate')) rateLimitHits++;\n results.set(url, new Error(errorMsg));\n }\n }\n\n return { results, rateLimitHits };\n}\n\n// \u2500\u2500 RedditClient Class \u2500\u2500\n\nexport class RedditClient {\n private userAgent = `script:${USER_AGENT_VERSION} (by /u/research-powerpack)`;\n\n constructor(private clientId: string, private clientSecret: string) {}\n\n /**\n * Authenticate with Reddit API with retry logic\n * Uses module-level token cache and promise deduplication to prevent\n * concurrent auth calls from firing multiple token requests\n * Returns null on failure instead of throwing\n */\n private async auth(): Promise<string | null> {\n if (cachedToken && Date.now() < cachedTokenExpiry - TOKEN_EXPIRY_MS) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache HIT');\n return cachedToken;\n }\n\n if (pendingAuthPromise) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Auth already in flight, awaiting...');\n return pendingAuthPromise;\n }\n\n pendingAuthPromise = this.performAuth();\n try {\n return await pendingAuthPromise;\n } finally {\n pendingAuthPromise = null;\n }\n }\n\n private async performAuth(): Promise<string | null> {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache MISS - authenticating');\n\n const credentials = Buffer.from(`${this.clientId}:${this.clientSecret}`).toString('base64');\n\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const res = await fetchWithTimeout(REDDIT_TOKEN_URL, {\n method: 'POST',\n headers: {\n 'Authorization': `Basic ${credentials}`,\n 'Content-Type': 'application/x-www-form-urlencoded',\n 'User-Agent': this.userAgent,\n },\n body: 'grant_type=client_credentials',\n timeoutMs: 15000,\n });\n\n if (!res.ok) {\n const text = await res.text().catch(() => '');\n mcpLog('error', `Auth failed (${res.status}): ${text}`, 'reddit');\n\n if (res.status === 401 || res.status === 403) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n return null;\n }\n\n if (res.status >= 500 && attempt < 2) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n\n const data = await res.json() as { access_token?: string; expires_in?: number };\n if (!data.access_token) {\n mcpLog('error', 'Auth response missing access_token', 'reddit');\n return null;\n }\n\n cachedToken = data.access_token;\n cachedTokenExpiry = Date.now() + (data.expires_in || 3600) * 1000;\n return cachedToken;\n\n } catch (error) {\n const err = classifyError(error);\n mcpLog('error', `Auth error (attempt ${attempt + 1}): ${err.message}`, 'reddit');\n\n if (err.code === ErrorCode.AUTH_ERROR) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n }\n\n if (attempt < 2 && err.retryable) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n }\n\n return null;\n }\n\n private parseUrl(url: string): { sub: string; id: string } | null {\n const m = url.match(/reddit\\.com\\/r\\/([^\\/]+)\\/comments\\/([a-z0-9]+)/i);\n return m ? { sub: m[1]!, id: m[2]! } : null;\n }\n\n /**\n * Get a single Reddit post with comments\n * Returns PostResult or throws Error (for use with Promise.allSettled)\n */\n async getPost(url: string): Promise<PostResult> {\n const parsed = this.parseUrl(url);\n if (!parsed) {\n throw new Error(`Invalid Reddit URL format: ${url}`);\n }\n\n const token = await this.auth();\n if (!token) {\n throw new Error('Reddit authentication failed - check credentials');\n }\n\n let lastError: StructuredError | null = null;\n\n for (let attempt = 0; attempt < REDDIT.RETRY_COUNT; attempt++) {\n try {\n const data = await fetchRedditJson(parsed.sub, parsed.id, token, this.userAgent);\n const [postListing, commentListing] = data;\n\n const post = parsePostData(postListing, parsed.sub);\n const comments = parseCommentTree(commentListing, post.author);\n\n return { post, comments, actualComments: post.commentCount };\n\n } catch (error) {\n lastError = classifyError(error);\n\n // Rate limited \u2014 always retry with backoff\n const status = (error as Error & { status?: number }).status;\n if (status === 429) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 32000;\n mcpLog('warning', `Rate limited. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT} after ${delay}ms`, 'reddit');\n await sleep(delay);\n continue;\n }\n\n if (!lastError.retryable) {\n throw error instanceof Error ? error : new Error(lastError.message);\n }\n\n if (attempt < REDDIT.RETRY_COUNT - 1) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 2000;\n mcpLog('warning', `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT}`, 'reddit');\n await sleep(delay);\n }\n }\n }\n\n throw new Error(lastError?.message || 'Failed to fetch Reddit post after retries');\n }\n\n async getPosts(urls: string[]): Promise<Map<string, PostResult | Error>> {\n if (urls.length <= REDDIT.BATCH_SIZE) {\n const results = await pMap(\n urls,\n u => this.getPost(u).catch(e => e as Error),\n CONCURRENCY.REDDIT,\n );\n return new Map(urls.map((u, i) => [u, results[i]!]));\n }\n return (await this.batchGetPosts(urls)).results;\n }\n\n async batchGetPosts(\n urls: string[],\n fetchComments = true,\n onBatchComplete?: (batchNum: number, totalBatches: number, processed: number) => void,\n ): Promise<BatchPostResult> {\n const allResults = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n mcpLog('info', `Fetching ${urls.length} posts in ${totalBatches} batch(es), up to ${FETCH_LIMIT} comments/post`, 'reddit');\n\n for (let batchNum = 0; batchNum < totalBatches; batchNum++) {\n const startIdx = batchNum * REDDIT.BATCH_SIZE;\n const batchUrls = urls.slice(startIdx, startIdx + REDDIT.BATCH_SIZE);\n\n mcpLog('info', `Batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} posts)`, 'reddit');\n\n const batchResult = await processBatch(this, batchUrls);\n for (const [url, result] of batchResult.results) {\n allResults.set(url, result);\n }\n rateLimitHits += batchResult.rateLimitHits;\n\n try {\n onBatchComplete?.(batchNum + 1, totalBatches, allResults.size);\n } catch (callbackError) {\n mcpLog('error', `onBatchComplete callback error: ${callbackError}`, 'reddit');\n }\n\n mcpLog('info', `Batch ${batchNum + 1} complete (${allResults.size}/${urls.length})`, 'reddit');\n\n if (batchNum < totalBatches - 1) {\n await sleep(500);\n }\n }\n\n return { results: allResults, batchesProcessed: totalBatches, totalPosts: urls.length, rateLimitHits };\n }\n}\n"],
|
|
5
|
-
"mappings": "AAMA,SAAS,cAAc;AAEvB,SAAS,QAAQ,mBAAmB;AACpC,SAAS,0BAA0B;AACnC,SAAS,wBAAwB;AACjC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,MAAM,mBAAmB;AAClC,SAAS,cAAc;AAIvB,MAAM,mBAAmB;AACzB,MAAM,kBAAkB;AACxB,MAAM,kBAAkB;
|
|
4
|
+
"sourcesContent": ["/**\n * Reddit OAuth API Client\n * Fetches posts and comments sorted by score (most upvoted first)\n * Implements robust error handling that NEVER crashes\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { REDDIT, CONCURRENCY } from '../config/index.js';\nimport { USER_AGENT_VERSION } from '../version.js';\nimport { calculateBackoff } from '../utils/retry.js';\nimport {\n classifyError,\n fetchWithTimeout,\n sleep,\n ErrorCode,\n type StructuredError,\n} from '../utils/errors.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport { mcpLog } from '../utils/logger.js';\n\n// \u2500\u2500 Constants \u2500\u2500\n\nconst REDDIT_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token' as const;\nconst REDDIT_API_BASE = 'https://oauth.reddit.com' as const;\nconst TOKEN_EXPIRY_MS = 55_000 as const; // 55 second expiry (conservative)\n\n// \u2500\u2500 Data Interfaces \u2500\u2500\n\ninterface Post {\n readonly title: string;\n readonly author: string;\n readonly subreddit: string;\n readonly body: string;\n readonly score: number;\n readonly commentCount: number;\n readonly url: string;\n readonly created: Date;\n readonly flair?: string;\n readonly isNsfw: boolean;\n readonly isPinned: boolean;\n}\n\nexport interface Comment {\n readonly author: string;\n readonly body: string;\n readonly score: number;\n readonly depth: number;\n readonly isOP: boolean;\n}\n\nexport interface PostResult {\n readonly post: Post;\n readonly comments: Comment[];\n readonly actualComments: number;\n}\n\ninterface BatchPostResult {\n readonly results: Map<string, PostResult | Error>;\n readonly batchesProcessed: number;\n readonly totalPosts: number;\n readonly rateLimitHits: number;\n}\n\n/** Reddit API \"Listing\" wrapper */\ninterface RedditListing<T> {\n readonly kind: string;\n readonly data: {\n readonly children: ReadonlyArray<{ readonly kind: string; readonly data: T }>;\n readonly after?: string;\n readonly before?: string;\n };\n}\n\n/** Reddit post data from API */\ninterface RedditPostData {\n readonly title: string;\n readonly selftext: string;\n readonly selftext_html?: string;\n readonly author: string;\n readonly subreddit: string;\n readonly score: number;\n readonly upvote_ratio: number;\n readonly num_comments: number;\n readonly created_utc: number;\n readonly url: string;\n readonly permalink: string;\n readonly is_self: boolean;\n readonly over_18: boolean;\n readonly stickied: boolean;\n readonly link_flair_text?: string;\n readonly [key: string]: unknown;\n}\n\n/** Reddit comment data from API */\ninterface RedditCommentData {\n readonly body?: string;\n readonly author?: string;\n readonly score?: number;\n readonly created_utc?: number;\n readonly replies?: RedditListing<RedditCommentData> | string;\n readonly [key: string]: unknown;\n}\n\ntype RedditPostResponse = [RedditListing<RedditPostData>, RedditListing<RedditCommentData>];\n\n/** Max comments to fetch per post from Reddit API */\n/** Reddit API caps at 500 comments per request */\nconst FETCH_LIMIT = 500;\n\n// ============================================================================\n// Module-Level Token Cache (shared across all RedditClient instances)\n// ============================================================================\nlet cachedToken: string | null = null;\nlet cachedTokenExpiry = 0;\n\n// Token cache logging only when DEBUG env is set\nconst DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === 'true';\nconst clientLogger = Logger.get('reddit-client');\n\n// Pending auth promise for deduplicating concurrent auth calls\nlet pendingAuthPromise: Promise<string | null> | null = null;\n\n// \u2500\u2500 Decomposed Helpers \u2500\u2500\n\n/**\n * Fetch a Reddit post's JSON from the API\n */\nasync function fetchRedditJson(\n sub: string,\n id: string,\n token: string,\n userAgent: string,\n): Promise<RedditPostResponse> {\n const limit = Math.min(FETCH_LIMIT, 500);\n const apiUrl = `${REDDIT_API_BASE}/r/${sub}/comments/${id}?sort=top&limit=${limit}&depth=10&raw_json=1`;\n\n const res = await fetchWithTimeout(apiUrl, {\n headers: {\n 'Authorization': `Bearer ${token}`,\n 'User-Agent': userAgent,\n },\n timeoutMs: 30000,\n });\n\n if (res.status === 429) {\n const err = new Error('Rate limited by Reddit API');\n (err as Error & { status: number }).status = 429;\n throw err;\n }\n\n if (res.status === 404) {\n throw new Error(`Post not found: /r/${sub}/comments/${id}`);\n }\n\n if (!res.ok) {\n const err = new Error(`Reddit API error: ${res.status}`);\n (err as Error & { status: number }).status = res.status;\n throw err;\n }\n\n try {\n return await res.json() as RedditPostResponse;\n } catch {\n throw new Error('Failed to parse Reddit API response');\n }\n}\n\n/**\n * Extract structured post data from a Reddit listing\n */\nfunction parsePostData(\n postListing: RedditListing<RedditPostData>,\n sub: string,\n): Post {\n const p = postListing?.data?.children?.[0]?.data;\n if (!p) {\n throw new Error(`Post data not found in response for /r/${sub}`);\n }\n\n return {\n title: p.title || 'Untitled',\n author: p.author || '[deleted]',\n subreddit: p.subreddit || sub,\n body: formatBody(p),\n score: p.score || 0,\n commentCount: p.num_comments || 0,\n url: `https://reddit.com${p.permalink || ''}`,\n created: new Date((p.created_utc || 0) * 1000),\n flair: p.link_flair_text || undefined,\n isNsfw: p.over_18 || false,\n isPinned: p.stickied || false,\n };\n}\n\nfunction formatBody(p: RedditPostData): string {\n if (p.selftext?.trim()) return p.selftext;\n if (p.is_self) return '';\n if (p.url) return `**Link:** ${p.url}`;\n return '';\n}\n\n/** Safety cap on comment tree recursion depth */\nconst MAX_COMMENT_DEPTH = 15 as const;\n\n/**\n * Extract and sort comments from a Reddit comment listing\n */\nfunction parseCommentTree(\n commentListing: RedditListing<RedditCommentData>,\n opAuthor: string,\n): Comment[] {\n const result: Comment[] = [];\n\n const extract = (items: ReadonlyArray<{ readonly kind: string; readonly data: RedditCommentData }>, depth = 0): void => {\n if (depth > MAX_COMMENT_DEPTH) return;\n const sorted = [...items].sort((a, b) => (b.data?.score || 0) - (a.data?.score || 0));\n\n for (const c of sorted) {\n if (c.kind !== 't1' || !c.data?.author || c.data.author === '[deleted]') continue;\n\n result.push({\n author: c.data.author,\n body: c.data.body || '',\n score: c.data.score || 0,\n depth,\n isOP: c.data.author === opAuthor,\n });\n\n if (typeof c.data.replies === 'object' && c.data.replies?.data?.children) {\n extract(c.data.replies.data.children, depth + 1);\n }\n }\n };\n\n extract(commentListing?.data?.children || []);\n return result;\n}\n\n// \u2500\u2500 Batch Helpers \u2500\u2500\n\n/**\n * Process a single batch of Reddit URLs, returning results keyed by URL\n */\nasync function processBatch(\n client: RedditClient,\n batchUrls: string[],\n): Promise<{ results: Map<string, PostResult | Error>; rateLimitHits: number }> {\n const results = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const batchResults = await pMapSettled(\n batchUrls,\n url => client.getPost(url),\n CONCURRENCY.REDDIT,\n );\n\n for (let i = 0; i < batchResults.length; i++) {\n const result = batchResults[i];\n if (!result) continue;\n const url = batchUrls[i] ?? '';\n\n if (result.status === 'fulfilled') {\n results.set(url, result.value);\n } else {\n const errorMsg = result.reason?.message || String(result.reason);\n if (errorMsg.includes('429') || errorMsg.includes('rate')) rateLimitHits++;\n results.set(url, new Error(errorMsg));\n }\n }\n\n return { results, rateLimitHits };\n}\n\n// \u2500\u2500 RedditClient Class \u2500\u2500\n\nexport class RedditClient {\n private userAgent = `script:${USER_AGENT_VERSION} (by /u/research-powerpack)`;\n\n constructor(private clientId: string, private clientSecret: string) {}\n\n /**\n * Authenticate with Reddit API with retry logic\n * Uses module-level token cache and promise deduplication to prevent\n * concurrent auth calls from firing multiple token requests\n * Returns null on failure instead of throwing\n */\n private async auth(): Promise<string | null> {\n if (cachedToken && Date.now() < cachedTokenExpiry - TOKEN_EXPIRY_MS) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache HIT');\n return cachedToken;\n }\n\n if (pendingAuthPromise) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Auth already in flight, awaiting...');\n return pendingAuthPromise;\n }\n\n pendingAuthPromise = this.performAuth();\n try {\n return await pendingAuthPromise;\n } finally {\n pendingAuthPromise = null;\n }\n }\n\n private async performAuth(): Promise<string | null> {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache MISS - authenticating');\n\n const credentials = Buffer.from(`${this.clientId}:${this.clientSecret}`).toString('base64');\n\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const res = await fetchWithTimeout(REDDIT_TOKEN_URL, {\n method: 'POST',\n headers: {\n 'Authorization': `Basic ${credentials}`,\n 'Content-Type': 'application/x-www-form-urlencoded',\n 'User-Agent': this.userAgent,\n },\n body: 'grant_type=client_credentials',\n timeoutMs: 15000,\n });\n\n if (!res.ok) {\n const text = await res.text().catch(() => '');\n mcpLog('error', `Auth failed (${res.status}): ${text}`, 'reddit');\n\n if (res.status === 401 || res.status === 403) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n return null;\n }\n\n if (res.status >= 500 && attempt < 2) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n\n const data = await res.json() as { access_token?: string; expires_in?: number };\n if (!data.access_token) {\n mcpLog('error', 'Auth response missing access_token', 'reddit');\n return null;\n }\n\n cachedToken = data.access_token;\n cachedTokenExpiry = Date.now() + (data.expires_in || 3600) * 1000;\n return cachedToken;\n\n } catch (error) {\n const err = classifyError(error);\n mcpLog('error', `Auth error (attempt ${attempt + 1}): ${err.message}`, 'reddit');\n\n if (err.code === ErrorCode.AUTH_ERROR) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n }\n\n if (attempt < 2 && err.retryable) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n }\n\n return null;\n }\n\n private parseUrl(url: string): { sub: string; id: string } | null {\n const m = url.match(/reddit\\.com\\/r\\/([^\\/]+)\\/comments\\/([a-z0-9]+)/i);\n return m ? { sub: m[1]!, id: m[2]! } : null;\n }\n\n /**\n * Get a single Reddit post with comments\n * Returns PostResult or throws Error (for use with Promise.allSettled)\n */\n async getPost(url: string): Promise<PostResult> {\n const parsed = this.parseUrl(url);\n if (!parsed) {\n throw new Error(`Invalid Reddit URL format: ${url}`);\n }\n\n const token = await this.auth();\n if (!token) {\n throw new Error('Reddit authentication failed - check credentials');\n }\n\n let lastError: StructuredError | null = null;\n\n for (let attempt = 0; attempt < REDDIT.RETRY_COUNT; attempt++) {\n try {\n const data = await fetchRedditJson(parsed.sub, parsed.id, token, this.userAgent);\n const [postListing, commentListing] = data;\n\n const post = parsePostData(postListing, parsed.sub);\n const comments = parseCommentTree(commentListing, post.author);\n\n return { post, comments, actualComments: post.commentCount };\n\n } catch (error) {\n lastError = classifyError(error);\n\n // Rate limited \u2014 always retry with backoff\n const status = (error as Error & { status?: number }).status;\n if (status === 429) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 32000;\n mcpLog('warning', `Rate limited. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT} after ${delay}ms`, 'reddit');\n await sleep(delay);\n continue;\n }\n\n if (!lastError.retryable) {\n throw error instanceof Error ? error : new Error(lastError.message);\n }\n\n if (attempt < REDDIT.RETRY_COUNT - 1) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 2000;\n mcpLog('warning', `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT}`, 'reddit');\n await sleep(delay);\n }\n }\n }\n\n throw new Error(lastError?.message || 'Failed to fetch Reddit post after retries');\n }\n\n async getPosts(urls: string[]): Promise<Map<string, PostResult | Error>> {\n if (urls.length <= REDDIT.BATCH_SIZE) {\n const results = await pMap(\n urls,\n u => this.getPost(u).catch(e => e as Error),\n CONCURRENCY.REDDIT,\n );\n return new Map(urls.map((u, i) => [u, results[i]!]));\n }\n return (await this.batchGetPosts(urls)).results;\n }\n\n async batchGetPosts(\n urls: string[],\n fetchComments = true,\n onBatchComplete?: (batchNum: number, totalBatches: number, processed: number) => void,\n ): Promise<BatchPostResult> {\n const allResults = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n mcpLog('info', `Fetching ${urls.length} posts in ${totalBatches} batch(es), up to ${FETCH_LIMIT} comments/post`, 'reddit');\n\n for (let batchNum = 0; batchNum < totalBatches; batchNum++) {\n const startIdx = batchNum * REDDIT.BATCH_SIZE;\n const batchUrls = urls.slice(startIdx, startIdx + REDDIT.BATCH_SIZE);\n\n mcpLog('info', `Batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} posts)`, 'reddit');\n\n const batchResult = await processBatch(this, batchUrls);\n for (const [url, result] of batchResult.results) {\n allResults.set(url, result);\n }\n rateLimitHits += batchResult.rateLimitHits;\n\n try {\n onBatchComplete?.(batchNum + 1, totalBatches, allResults.size);\n } catch (callbackError) {\n mcpLog('error', `onBatchComplete callback error: ${callbackError}`, 'reddit');\n }\n\n mcpLog('info', `Batch ${batchNum + 1} complete (${allResults.size}/${urls.length})`, 'reddit');\n\n if (batchNum < totalBatches - 1) {\n await sleep(500);\n }\n }\n\n return { results: allResults, batchesProcessed: totalBatches, totalPosts: urls.length, rateLimitHits };\n }\n}\n"],
|
|
5
|
+
"mappings": "AAMA,SAAS,cAAc;AAEvB,SAAS,QAAQ,mBAAmB;AACpC,SAAS,0BAA0B;AACnC,SAAS,wBAAwB;AACjC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,MAAM,mBAAmB;AAClC,SAAS,cAAc;AAIvB,MAAM,mBAAmB;AACzB,MAAM,kBAAkB;AACxB,MAAM,kBAAkB;AAmFxB,MAAM,cAAc;AAKpB,IAAI,cAA6B;AACjC,IAAI,oBAAoB;AAGxB,MAAM,oBAAoB,QAAQ,IAAI,iBAAiB;AACvD,MAAM,eAAe,OAAO,IAAI,eAAe;AAG/C,IAAI,qBAAoD;AAOxD,eAAe,gBACb,KACA,IACA,OACA,WAC6B;AAC7B,QAAM,QAAQ,KAAK,IAAI,aAAa,GAAG;AACvC,QAAM,SAAS,GAAG,eAAe,MAAM,GAAG,aAAa,EAAE,mBAAmB,KAAK;AAEjF,QAAM,MAAM,MAAM,iBAAiB,QAAQ;AAAA,IACzC,SAAS;AAAA,MACP,iBAAiB,UAAU,KAAK;AAAA,MAChC,cAAc;AAAA,IAChB;AAAA,IACA,WAAW;AAAA,EACb,CAAC;AAED,MAAI,IAAI,WAAW,KAAK;AACtB,UAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,IAAC,IAAmC,SAAS;AAC7C,UAAM;AAAA,EACR;AAEA,MAAI,IAAI,WAAW,KAAK;AACtB,UAAM,IAAI,MAAM,sBAAsB,GAAG,aAAa,EAAE,EAAE;AAAA,EAC5D;AAEA,MAAI,CAAC,IAAI,IAAI;AACX,UAAM,MAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,EAAE;AACvD,IAAC,IAAmC,SAAS,IAAI;AACjD,UAAM;AAAA,EACR;AAEA,MAAI;AACF,WAAO,MAAM,IAAI,KAAK;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACvD;AACF;AAKA,SAAS,cACP,aACA,KACM;AACN,QAAM,IAAI,aAAa,MAAM,WAAW,CAAC,GAAG;AAC5C,MAAI,CAAC,GAAG;AACN,UAAM,IAAI,MAAM,0CAA0C,GAAG,EAAE;AAAA,EACjE;AAEA,SAAO;AAAA,IACL,OAAO,EAAE,SAAS;AAAA,IAClB,QAAQ,EAAE,UAAU;AAAA,IACpB,WAAW,EAAE,aAAa;AAAA,IAC1B,MAAM,WAAW,CAAC;AAAA,IAClB,OAAO,EAAE,SAAS;AAAA,IAClB,cAAc,EAAE,gBAAgB;AAAA,IAChC,KAAK,qBAAqB,EAAE,aAAa,EAAE;AAAA,IAC3C,SAAS,IAAI,MAAM,EAAE,eAAe,KAAK,GAAI;AAAA,IAC7C,OAAO,EAAE,mBAAmB;AAAA,IAC5B,QAAQ,EAAE,WAAW;AAAA,IACrB,UAAU,EAAE,YAAY;AAAA,EAC1B;AACF;AAEA,SAAS,WAAW,GAA2B;AAC7C,MAAI,EAAE,UAAU,KAAK,EAAG,QAAO,EAAE;AACjC,MAAI,EAAE,QAAS,QAAO;AACtB,MAAI,EAAE,IAAK,QAAO,aAAa,EAAE,GAAG;AACpC,SAAO;AACT;AAGA,MAAM,oBAAoB;AAK1B,SAAS,iBACP,gBACA,UACW;AACX,QAAM,SAAoB,CAAC;AAE3B,QAAM,UAAU,CAAC,OAAmF,QAAQ,MAAY;AACtH,QAAI,QAAQ,kBAAmB;AAC/B,UAAM,SAAS,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,OAAO,EAAE,MAAM,SAAS,MAAM,EAAE,MAAM,SAAS,EAAE;AAEpF,eAAW,KAAK,QAAQ;AACtB,UAAI,EAAE,SAAS,QAAQ,CAAC,EAAE,MAAM,UAAU,EAAE,KAAK,WAAW,YAAa;AAEzE,aAAO,KAAK;AAAA,QACV,QAAQ,EAAE,KAAK;AAAA,QACf,MAAM,EAAE,KAAK,QAAQ;AAAA,QACrB,OAAO,EAAE,KAAK,SAAS;AAAA,QACvB;AAAA,QACA,MAAM,EAAE,KAAK,WAAW;AAAA,MAC1B,CAAC;AAED,UAAI,OAAO,EAAE,KAAK,YAAY,YAAY,EAAE,KAAK,SAAS,MAAM,UAAU;AACxE,gBAAQ,EAAE,KAAK,QAAQ,KAAK,UAAU,QAAQ,CAAC;AAAA,MACjD;AAAA,IACF;AAAA,EACF;AAEA,UAAQ,gBAAgB,MAAM,YAAY,CAAC,CAAC;AAC5C,SAAO;AACT;AAOA,eAAe,aACb,QACA,WAC8E;AAC9E,QAAM,UAAU,oBAAI,IAAgC;AACpD,MAAI,gBAAgB;AAEpB,QAAM,eAAe,MAAM;AAAA,IACzB;AAAA,IACA,SAAO,OAAO,QAAQ,GAAG;AAAA,IACzB,YAAY;AAAA,EACd;AAEA,WAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,UAAM,SAAS,aAAa,CAAC;AAC7B,QAAI,CAAC,OAAQ;AACb,UAAM,MAAM,UAAU,CAAC,KAAK;AAE5B,QAAI,OAAO,WAAW,aAAa;AACjC,cAAQ,IAAI,KAAK,OAAO,KAAK;AAAA,IAC/B,OAAO;AACL,YAAM,WAAW,OAAO,QAAQ,WAAW,OAAO,OAAO,MAAM;AAC/D,UAAI,SAAS,SAAS,KAAK,KAAK,SAAS,SAAS,MAAM,EAAG;AAC3D,cAAQ,IAAI,KAAK,IAAI,MAAM,QAAQ,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc;AAClC;AAIO,MAAM,aAAa;AAAA,EAGxB,YAAoB,UAA0B,cAAsB;AAAhD;AAA0B;AAAA,EAAuB;AAAA,EAF7D,YAAY,UAAU,kBAAkB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUhD,MAAc,OAA+B;AAC3C,QAAI,eAAe,KAAK,IAAI,IAAI,oBAAoB,iBAAiB;AACnE,UAAI,kBAAmB,cAAa,MAAM,iBAAiB;AAC3D,aAAO;AAAA,IACT;AAEA,QAAI,oBAAoB;AACtB,UAAI,kBAAmB,cAAa,MAAM,qCAAqC;AAC/E,aAAO;AAAA,IACT;AAEA,yBAAqB,KAAK,YAAY;AACtC,QAAI;AACF,aAAO,MAAM;AAAA,IACf,UAAE;AACA,2BAAqB;AAAA,IACvB;AAAA,EACF;AAAA,EAEA,MAAc,cAAsC;AAClD,QAAI,kBAAmB,cAAa,MAAM,mCAAmC;AAE7E,UAAM,cAAc,OAAO,KAAK,GAAG,KAAK,QAAQ,IAAI,KAAK,YAAY,EAAE,EAAE,SAAS,QAAQ;AAE1F,aAAS,UAAU,GAAG,UAAU,GAAG,WAAW;AAC5C,UAAI;AACF,cAAM,MAAM,MAAM,iBAAiB,kBAAkB;AAAA,UACnD,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,iBAAiB,SAAS,WAAW;AAAA,YACrC,gBAAgB;AAAA,YAChB,cAAc,KAAK;AAAA,UACrB;AAAA,UACA,MAAM;AAAA,UACN,WAAW;AAAA,QACb,CAAC;AAED,YAAI,CAAC,IAAI,IAAI;AACX,gBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,iBAAO,SAAS,gBAAgB,IAAI,MAAM,MAAM,IAAI,IAAI,QAAQ;AAEhE,cAAI,IAAI,WAAW,OAAO,IAAI,WAAW,KAAK;AAC5C,0BAAc;AACd,gCAAoB;AACpB,mBAAO;AAAA,UACT;AAEA,cAAI,IAAI,UAAU,OAAO,UAAU,GAAG;AACpC,kBAAM,MAAM,iBAAiB,OAAO,CAAC;AACrC;AAAA,UACF;AAEA,iBAAO;AAAA,QACT;AAEA,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,YAAI,CAAC,KAAK,cAAc;AACtB,iBAAO,SAAS,sCAAsC,QAAQ;AAC9D,iBAAO;AAAA,QACT;AAEA,sBAAc,KAAK;AACnB,4BAAoB,KAAK,IAAI,KAAK,KAAK,cAAc,QAAQ;AAC7D,eAAO;AAAA,MAET,SAAS,OAAO;AACd,cAAM,MAAM,cAAc,KAAK;AAC/B,eAAO,SAAS,uBAAuB,UAAU,CAAC,MAAM,IAAI,OAAO,IAAI,QAAQ;AAE/E,YAAI,IAAI,SAAS,UAAU,YAAY;AACrC,wBAAc;AACd,8BAAoB;AAAA,QACtB;AAEA,YAAI,UAAU,KAAK,IAAI,WAAW;AAChC,gBAAM,MAAM,iBAAiB,OAAO,CAAC;AACrC;AAAA,QACF;AAEA,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEQ,SAAS,KAAiD;AAChE,UAAM,IAAI,IAAI,MAAM,kDAAkD;AACtE,WAAO,IAAI,EAAE,KAAK,EAAE,CAAC,GAAI,IAAI,EAAE,CAAC,EAAG,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,QAAQ,KAAkC;AAC9C,UAAM,SAAS,KAAK,SAAS,GAAG;AAChC,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B,GAAG,EAAE;AAAA,IACrD;AAEA,UAAM,QAAQ,MAAM,KAAK,KAAK;AAC9B,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACpE;AAEA,QAAI,YAAoC;AAExC,aAAS,UAAU,GAAG,UAAU,OAAO,aAAa,WAAW;AAC7D,UAAI;AACF,cAAM,OAAO,MAAM,gBAAgB,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,SAAS;AAC/E,cAAM,CAAC,aAAa,cAAc,IAAI;AAEtC,cAAM,OAAO,cAAc,aAAa,OAAO,GAAG;AAClD,cAAM,WAAW,iBAAiB,gBAAgB,KAAK,MAAM;AAE7D,eAAO,EAAE,MAAM,UAAU,gBAAgB,KAAK,aAAa;AAAA,MAE7D,SAAS,OAAO;AACd,oBAAY,cAAc,KAAK;AAG/B,cAAM,SAAU,MAAsC;AACtD,YAAI,WAAW,KAAK;AAClB,gBAAM,QAAQ,OAAO,aAAa,OAAO,KAAK;AAC9C,iBAAO,WAAW,uBAAuB,UAAU,CAAC,IAAI,OAAO,WAAW,UAAU,KAAK,MAAM,QAAQ;AACvG,gBAAM,MAAM,KAAK;AACjB;AAAA,QACF;AAEA,YAAI,CAAC,UAAU,WAAW;AACxB,gBAAM,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,UAAU,OAAO;AAAA,QACpE;AAEA,YAAI,UAAU,OAAO,cAAc,GAAG;AACpC,gBAAM,QAAQ,OAAO,aAAa,OAAO,KAAK;AAC9C,iBAAO,WAAW,GAAG,UAAU,IAAI,KAAK,UAAU,OAAO,WAAW,UAAU,CAAC,IAAI,OAAO,WAAW,IAAI,QAAQ;AACjH,gBAAM,MAAM,KAAK;AAAA,QACnB;AAAA,MACF;AAAA,IACF;AAEA,UAAM,IAAI,MAAM,WAAW,WAAW,2CAA2C;AAAA,EACnF;AAAA,EAEA,MAAM,SAAS,MAA0D;AACvE,QAAI,KAAK,UAAU,OAAO,YAAY;AACpC,YAAM,UAAU,MAAM;AAAA,QACpB;AAAA,QACA,OAAK,KAAK,QAAQ,CAAC,EAAE,MAAM,OAAK,CAAU;AAAA,QAC1C,YAAY;AAAA,MACd;AACA,aAAO,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAE,CAAC,CAAC;AAAA,IACrD;AACA,YAAQ,MAAM,KAAK,cAAc,IAAI,GAAG;AAAA,EAC1C;AAAA,EAEA,MAAM,cACJ,MACA,gBAAgB,MAChB,iBAC0B;AAC1B,UAAM,aAAa,oBAAI,IAAgC;AACvD,QAAI,gBAAgB;AAEpB,UAAM,eAAe,KAAK,KAAK,KAAK,SAAS,OAAO,UAAU;AAC9D,WAAO,QAAQ,YAAY,KAAK,MAAM,aAAa,YAAY,qBAAqB,WAAW,kBAAkB,QAAQ;AAEzH,aAAS,WAAW,GAAG,WAAW,cAAc,YAAY;AAC1D,YAAM,WAAW,WAAW,OAAO;AACnC,YAAM,YAAY,KAAK,MAAM,UAAU,WAAW,OAAO,UAAU;AAEnE,aAAO,QAAQ,SAAS,WAAW,CAAC,IAAI,YAAY,KAAK,UAAU,MAAM,WAAW,QAAQ;AAE5F,YAAM,cAAc,MAAM,aAAa,MAAM,SAAS;AACtD,iBAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,mBAAW,IAAI,KAAK,MAAM;AAAA,MAC5B;AACA,uBAAiB,YAAY;AAE7B,UAAI;AACF,0BAAkB,WAAW,GAAG,cAAc,WAAW,IAAI;AAAA,MAC/D,SAAS,eAAe;AACtB,eAAO,SAAS,mCAAmC,aAAa,IAAI,QAAQ;AAAA,MAC9E;AAEA,aAAO,QAAQ,SAAS,WAAW,CAAC,cAAc,WAAW,IAAI,IAAI,KAAK,MAAM,KAAK,QAAQ;AAE7F,UAAI,WAAW,eAAe,GAAG;AAC/B,cAAM,MAAM,GAAG;AAAA,MACjB;AAAA,IACF;AAEA,WAAO,EAAE,SAAS,YAAY,kBAAkB,cAAc,YAAY,KAAK,QAAQ,cAAc;AAAA,EACvG;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/config/index.js
CHANGED
|
@@ -102,9 +102,8 @@ const SCRAPER = {
|
|
|
102
102
|
};
|
|
103
103
|
const REDDIT = {
|
|
104
104
|
BATCH_SIZE: 10,
|
|
105
|
-
MAX_WORDS_PER_POST:
|
|
106
|
-
MAX_WORDS_TOTAL:
|
|
107
|
-
FETCH_LIMIT_PER_POST: 500,
|
|
105
|
+
MAX_WORDS_PER_POST: 5e4,
|
|
106
|
+
MAX_WORDS_TOTAL: 5e5,
|
|
108
107
|
MIN_POSTS: 1,
|
|
109
108
|
MAX_POSTS: 50,
|
|
110
109
|
RETRY_COUNT: 5,
|
|
@@ -146,7 +145,7 @@ let cachedLlmExtraction = null;
|
|
|
146
145
|
function getLlmExtraction() {
|
|
147
146
|
if (cachedLlmExtraction) return cachedLlmExtraction;
|
|
148
147
|
cachedLlmExtraction = {
|
|
149
|
-
MODEL: process.env.LLM_EXTRACTION_MODEL || "
|
|
148
|
+
MODEL: process.env.LLM_EXTRACTION_MODEL || "gpt-5.4-mini",
|
|
150
149
|
BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1",
|
|
151
150
|
API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || "",
|
|
152
151
|
MAX_TOKENS: 8e3,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/config/index.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// Import version utilities (not re-exported - use directly from version.ts if needed externally)\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n * @param value - The string value to parse (from process.env)\n * @param defaultVal - Default value if parsing fails or value is undefined\n * @param min - Minimum allowed value (clamped if below)\n * @param max - Maximum allowed value (clamped if above)\n * @returns Parsed integer within bounds, or default value\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n \n const parsed = parseInt(value, 10);\n \n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n \n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n \n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n \n return parsed;\n}\n\n// ============================================================================\n// Reasoning Effort Validation\n// ============================================================================\n\nconst VALID_REASONING_EFFORTS = ['low', 'medium', 'high'] as const;\ntype ReasoningEffort = typeof VALID_REASONING_EFFORTS[number];\n\nfunction parseReasoningEffort(value: string | undefined): ReasoningEffort {\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'high';\n}\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function resetEnvCache(): void {\n cachedEnv = null;\n cachedResearch = null;\n cachedLlmExtraction = null;\n}\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// Research API Configuration\n// ============================================================================\n\ninterface ResearchConfig {\n readonly BASE_URL: string;\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly API_KEY: string;\n readonly TIMEOUT_MS: number;\n readonly REASONING_EFFORT: 'low' | 'medium' | 'high';\n readonly MAX_URLS: number;\n}\n\nlet cachedResearch: ResearchConfig | null = null;\n\nfunction getResearch(): ResearchConfig {\n if (cachedResearch) return cachedResearch;\n cachedResearch = {\n BASE_URL: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n MODEL: process.env.RESEARCH_MODEL || 'x-ai/grok-4-fast',\n FALLBACK_MODEL: process.env.RESEARCH_FALLBACK_MODEL || 'google/gemini-2.5-flash',\n API_KEY: process.env.OPENROUTER_API_KEY || '',\n TIMEOUT_MS: safeParseInt(process.env.API_TIMEOUT_MS, 1800000, 1000, 3600000),\n REASONING_EFFORT: parseReasoningEffort(process.env.DEFAULT_REASONING_EFFORT),\n MAX_URLS: safeParseInt(process.env.DEFAULT_MAX_URLS, 100, 10, 200),\n };\n return cachedResearch;\n}\n\n// Lazy proxy so existing code using RESEARCH.X still works\nexport const RESEARCH: ResearchConfig = new Proxy({} as ResearchConfig, {\n get(_target, prop: string) {\n return getResearch()[prop as keyof ResearchConfig];\n },\n});\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\n// Version is now automatically read from package.json via version.ts\n// No need to manually update version strings anymore!\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: !!LLM_EXTRACTION.API_KEY,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` and `search-reddit`.\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** The `use_llm` and `what_to_extract` features require `LLM_EXTRACTION_API_KEY` or `OPENROUTER_API_KEY`.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Scraper Configuration (Scrape.do implementation)\n// ============================================================================\n\n// ============================================================================\n// Concurrency Limits \u2014 tuned for 2-core deployments\n//\n// Bottleneck analysis (Node.js single-threaded event loop):\n// SEARCH: Pure I/O + tiny JSON parse (~5KB). High concurrency safe.\n// SCRAPER: I/O + Turndown HTML\u2192MD conversion (20-50ms/page, synchronous).\n// Too many concurrent = burst of responses blocks event loop.\n// 20 concurrent \u00D7 30ms avg = 600ms worst-case event loop stall.\n// REDDIT: I/O + moderate JSON. Reddit rate-limits at ~60 req/min.\n// LLM: I/O-only locally, but remote inference uses multiple cores per\n// request. Default 10 keeps remote server responsive.\n// ============================================================================\n\nexport const CONCURRENCY = {\n /** Serper API \u2014 tiny JSON responses, pure I/O, no CPU cost */\n SEARCH: 30,\n /** Scrape.do \u2014 HTML responses (50-500KB), Turndown conversion is CPU work */\n SCRAPER: 20,\n /** Reddit API \u2014 moderate payloads, aggressive rate limiting (60 req/min) */\n REDDIT: 10,\n /** LLM extraction \u2014 remote inference bottleneck. Tune via LLM_CONCURRENCY env */\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 10, 1, 50),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Research Compression Prefix/Suffix\n// ============================================================================\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 20_000,\n MAX_WORDS_TOTAL: 100_000,\n FETCH_LIMIT_PER_POST: 500,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n EXTRACTION_SUFFIX: `\n---\n\n\u26A0\uFE0F IMPORTANT: Extract and synthesize the key insights, opinions, and recommendations from these Reddit discussions. Focus on:\n- Common themes and consensus across posts\n- Specific recommendations with context\n- Contrasting viewpoints and debates\n- Real-world experiences and lessons learned\n- Technical details and implementation tips\n\nBe comprehensive but concise. Prioritize actionable insights.\n\n---`,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Extraction Model (uses OPENROUTER for scrape-links AI extraction)\n// ============================================================================\n\ntype LlmReasoningEffort = ReasoningEffort | 'none';\n\nfunction parseLlmReasoningEffort(value: string | undefined): LlmReasoningEffort {\n if (value === 'none') return 'none';\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'low';\n}\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n readonly MAX_TOKENS: number;\n readonly REASONING_EFFORT: LlmReasoningEffort;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n cachedLlmExtraction = {\n MODEL: process.env.LLM_EXTRACTION_MODEL || 'openai/gpt-oss-120b:nitro',\n BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || '',\n MAX_TOKENS: 8000,\n REASONING_EFFORT: parseLlmReasoningEffort(process.env.LLM_EXTRACTION_REASONING),\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n\n"],
|
|
5
|
-
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAgB3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAMA,MAAM,0BAA0B,CAAC,OAAO,UAAU,MAAM;AAGxD,SAAS,qBAAqB,OAA4C;AACxE,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAaA,IAAI,YAA8B;AAE3B,SAAS,gBAAsB;AACpC,cAAY;AACZ,mBAAiB;AACjB,wBAAsB;AACxB;AAEO,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,EAC5D;AACA,SAAO;AACT;AAgBA,IAAI,iBAAwC;AAE5C,SAAS,cAA8B;AACrC,MAAI,eAAgB,QAAO;AAC3B,mBAAiB;AAAA,IACf,UAAU,QAAQ,IAAI,uBAAuB;AAAA,IAC7C,OAAO,QAAQ,IAAI,kBAAkB;AAAA,IACrC,gBAAgB,QAAQ,IAAI,2BAA2B;AAAA,IACvD,SAAS,QAAQ,IAAI,sBAAsB;AAAA,IAC3C,YAAY,aAAa,QAAQ,IAAI,gBAAgB,MAAS,KAAM,IAAO;AAAA,IAC3E,kBAAkB,qBAAqB,QAAQ,IAAI,wBAAwB;AAAA,IAC3E,UAAU,aAAa,QAAQ,IAAI,kBAAkB,KAAK,IAAI,GAAG;AAAA,EACnE;AACA,SAAO;AACT;AAGO,MAAM,WAA2B,IAAI,MAAM,CAAC,GAAqB;AAAA,EACtE,IAAI,SAAS,MAAc;AACzB,WAAO,YAAY,EAAE,IAA4B;AAAA,EACnD;AACF,CAAC;AAQM,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,CAAC,CAAC,eAAe;AAAA,EAClC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAmBO,MAAM,cAAc;AAAA;AAAA,EAEzB,QAAQ;AAAA;AAAA,EAER,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA;AAAA,EAER,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,EAAE;AACrE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAUO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,
|
|
4
|
+
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// Import version utilities (not re-exported - use directly from version.ts if needed externally)\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n * @param value - The string value to parse (from process.env)\n * @param defaultVal - Default value if parsing fails or value is undefined\n * @param min - Minimum allowed value (clamped if below)\n * @param max - Maximum allowed value (clamped if above)\n * @returns Parsed integer within bounds, or default value\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n \n const parsed = parseInt(value, 10);\n \n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n \n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n \n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n \n return parsed;\n}\n\n// ============================================================================\n// Reasoning Effort Validation\n// ============================================================================\n\nconst VALID_REASONING_EFFORTS = ['low', 'medium', 'high'] as const;\ntype ReasoningEffort = typeof VALID_REASONING_EFFORTS[number];\n\nfunction parseReasoningEffort(value: string | undefined): ReasoningEffort {\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'high';\n}\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function resetEnvCache(): void {\n cachedEnv = null;\n cachedResearch = null;\n cachedLlmExtraction = null;\n}\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// Research API Configuration\n// ============================================================================\n\ninterface ResearchConfig {\n readonly BASE_URL: string;\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly API_KEY: string;\n readonly TIMEOUT_MS: number;\n readonly REASONING_EFFORT: 'low' | 'medium' | 'high';\n readonly MAX_URLS: number;\n}\n\nlet cachedResearch: ResearchConfig | null = null;\n\nfunction getResearch(): ResearchConfig {\n if (cachedResearch) return cachedResearch;\n cachedResearch = {\n BASE_URL: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n MODEL: process.env.RESEARCH_MODEL || 'x-ai/grok-4-fast',\n FALLBACK_MODEL: process.env.RESEARCH_FALLBACK_MODEL || 'google/gemini-2.5-flash',\n API_KEY: process.env.OPENROUTER_API_KEY || '',\n TIMEOUT_MS: safeParseInt(process.env.API_TIMEOUT_MS, 1800000, 1000, 3600000),\n REASONING_EFFORT: parseReasoningEffort(process.env.DEFAULT_REASONING_EFFORT),\n MAX_URLS: safeParseInt(process.env.DEFAULT_MAX_URLS, 100, 10, 200),\n };\n return cachedResearch;\n}\n\n// Lazy proxy so existing code using RESEARCH.X still works\nexport const RESEARCH: ResearchConfig = new Proxy({} as ResearchConfig, {\n get(_target, prop: string) {\n return getResearch()[prop as keyof ResearchConfig];\n },\n});\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\n// Version is now automatically read from package.json via version.ts\n// No need to manually update version strings anymore!\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: !!LLM_EXTRACTION.API_KEY,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` and `search-reddit`.\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** The `use_llm` and `what_to_extract` features require `LLM_EXTRACTION_API_KEY` or `OPENROUTER_API_KEY`.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Scraper Configuration (Scrape.do implementation)\n// ============================================================================\n\n// ============================================================================\n// Concurrency Limits \u2014 tuned for 2-core deployments\n//\n// Bottleneck analysis (Node.js single-threaded event loop):\n// SEARCH: Pure I/O + tiny JSON parse (~5KB). High concurrency safe.\n// SCRAPER: I/O + Turndown HTML\u2192MD conversion (20-50ms/page, synchronous).\n// Too many concurrent = burst of responses blocks event loop.\n// 20 concurrent \u00D7 30ms avg = 600ms worst-case event loop stall.\n// REDDIT: I/O + moderate JSON. Reddit rate-limits at ~60 req/min.\n// LLM: I/O-only locally, but remote inference uses multiple cores per\n// request. Default 10 keeps remote server responsive.\n// ============================================================================\n\nexport const CONCURRENCY = {\n /** Serper API \u2014 tiny JSON responses, pure I/O, no CPU cost */\n SEARCH: 30,\n /** Scrape.do \u2014 HTML responses (50-500KB), Turndown conversion is CPU work */\n SCRAPER: 20,\n /** Reddit API \u2014 moderate payloads, aggressive rate limiting (60 req/min) */\n REDDIT: 10,\n /** LLM extraction \u2014 remote inference bottleneck. Tune via LLM_CONCURRENCY env */\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 10, 1, 50),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Research Compression Prefix/Suffix\n// ============================================================================\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 50_000,\n MAX_WORDS_TOTAL: 500_000,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n EXTRACTION_SUFFIX: `\n---\n\n\u26A0\uFE0F IMPORTANT: Extract and synthesize the key insights, opinions, and recommendations from these Reddit discussions. Focus on:\n- Common themes and consensus across posts\n- Specific recommendations with context\n- Contrasting viewpoints and debates\n- Real-world experiences and lessons learned\n- Technical details and implementation tips\n\nBe comprehensive but concise. Prioritize actionable insights.\n\n---`,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Extraction Model (uses OPENROUTER for scrape-links AI extraction)\n// ============================================================================\n\ntype LlmReasoningEffort = ReasoningEffort | 'none';\n\nfunction parseLlmReasoningEffort(value: string | undefined): LlmReasoningEffort {\n if (value === 'none') return 'none';\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'low';\n}\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n readonly MAX_TOKENS: number;\n readonly REASONING_EFFORT: LlmReasoningEffort;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n cachedLlmExtraction = {\n MODEL: process.env.LLM_EXTRACTION_MODEL || 'gpt-5.4-mini',\n BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || '',\n MAX_TOKENS: 8000,\n REASONING_EFFORT: parseLlmReasoningEffort(process.env.LLM_EXTRACTION_REASONING),\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n\n"],
|
|
5
|
+
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAgB3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAMA,MAAM,0BAA0B,CAAC,OAAO,UAAU,MAAM;AAGxD,SAAS,qBAAqB,OAA4C;AACxE,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAaA,IAAI,YAA8B;AAE3B,SAAS,gBAAsB;AACpC,cAAY;AACZ,mBAAiB;AACjB,wBAAsB;AACxB;AAEO,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,EAC5D;AACA,SAAO;AACT;AAgBA,IAAI,iBAAwC;AAE5C,SAAS,cAA8B;AACrC,MAAI,eAAgB,QAAO;AAC3B,mBAAiB;AAAA,IACf,UAAU,QAAQ,IAAI,uBAAuB;AAAA,IAC7C,OAAO,QAAQ,IAAI,kBAAkB;AAAA,IACrC,gBAAgB,QAAQ,IAAI,2BAA2B;AAAA,IACvD,SAAS,QAAQ,IAAI,sBAAsB;AAAA,IAC3C,YAAY,aAAa,QAAQ,IAAI,gBAAgB,MAAS,KAAM,IAAO;AAAA,IAC3E,kBAAkB,qBAAqB,QAAQ,IAAI,wBAAwB;AAAA,IAC3E,UAAU,aAAa,QAAQ,IAAI,kBAAkB,KAAK,IAAI,GAAG;AAAA,EACnE;AACA,SAAO;AACT;AAGO,MAAM,WAA2B,IAAI,MAAM,CAAC,GAAqB;AAAA,EACtE,IAAI,SAAS,MAAc;AACzB,WAAO,YAAY,EAAE,IAA4B;AAAA,EACnD;AACF,CAAC;AAQM,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,CAAC,CAAC,eAAe;AAAA,EAClC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAmBO,MAAM,cAAc;AAAA;AAAA,EAEzB,QAAQ;AAAA;AAAA,EAER,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA;AAAA,EAER,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,EAAE;AACrE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAUO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,WAAW;AAAA,EACX,aAAa;AAAA,EACb,cAAc,CAAC,KAAM,KAAM,KAAM,MAAO,IAAK;AAAA,EAC7C,mBAAmB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAarB;AAMO,MAAM,cAAsC;AAAA,EACjD,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,IAAI;AACN;AAQA,SAAS,wBAAwB,OAA+C;AAC9E,MAAI,UAAU,OAAQ,QAAO;AAC7B,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAUA,IAAI,sBAAkD;AAEtD,SAAS,mBAAwC;AAC/C,MAAI,oBAAqB,QAAO;AAChC,wBAAsB;AAAA,IACpB,OAAO,QAAQ,IAAI,wBAAwB;AAAA,IAC3C,UAAU,QAAQ,IAAI,2BAA2B,QAAQ,IAAI,uBAAuB;AAAA,IACpF,SAAS,QAAQ,IAAI,0BAA0B,QAAQ,IAAI,sBAAsB;AAAA,IACjF,YAAY;AAAA,IACZ,kBAAkB,wBAAwB,QAAQ,IAAI,wBAAwB;AAAA,EAChF;AACA,SAAO;AACT;AAEO,MAAM,iBAAsC,IAAI,MAAM,CAAC,GAA0B;AAAA,EACtF,IAAI,SAAS,MAAc;AACzB,WAAO,iBAAiB,EAAE,IAAiC;AAAA,EAC7D;AACF,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,43 +1,35 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
const searchRedditParamsSchema = z.object({
|
|
3
3
|
queries: z.array(
|
|
4
|
-
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A
|
|
5
|
-
).min(1, { message: "search-reddit: At least 1 query is required" }).max(50, { message: "search-reddit: Maximum 50 queries allowed" }).describe(
|
|
6
|
-
'Array of 1\u201350 Reddit search queries. RECOMMENDED: 3\u20137 for solid consensus ranking (results are aggregated across queries and URLs appearing in multiple searches are flagged as high-confidence). Each query should target a different angle: direct topic, "best of" lists, comparisons, pain points, subreddit-specific (e.g., "r/programming topic"), or year-specific. Single-query lookups work but produce no consensus signal. More queries = better signal-to-noise.'
|
|
7
|
-
),
|
|
8
|
-
date_after: z.string().optional().describe("Optional lower date bound in YYYY-MM-DD format.")
|
|
4
|
+
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A Reddit search query. Do NOT add "site:reddit.com" \u2014 it is appended automatically.')
|
|
5
|
+
).min(1, { message: "search-reddit: At least 1 query is required" }).max(50, { message: "search-reddit: Maximum 50 queries allowed" }).describe('Array of 1-50 search queries. Each query gets "site:reddit.com" appended and is sent to Serper as a standard Google search. Returns a flat list of deduplicated Reddit URLs. Use with get-reddit-post to fetch full content and extract insights.')
|
|
9
6
|
}).strict();
|
|
10
7
|
const getRedditPostParamsSchema = z.object({
|
|
11
8
|
urls: z.array(
|
|
12
|
-
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe('A full Reddit post URL (e.g., "https://www.reddit.com/r/subreddit/comments/id/title/").
|
|
13
|
-
).min(1, { message: "get-reddit-post: At least 1 Reddit post URL is required" }).max(50, { message: "get-reddit-post: Maximum 50 Reddit post URLs allowed" }).describe("Array of 1
|
|
14
|
-
fetch_comments: z.boolean().default(true).describe("Fetch threaded comment trees for each post. Defaults to true. Comments include author, score, OP markers, and nested replies
|
|
15
|
-
|
|
16
|
-
what_to_extract: z.string().max(1e3, { message: "get-reddit-post: what_to_extract is too long" }).optional().describe("Optional extraction instructions used only when use_llm=true.")
|
|
9
|
+
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe('A full Reddit post URL (e.g., "https://www.reddit.com/r/subreddit/comments/id/title/").')
|
|
10
|
+
).min(1, { message: "get-reddit-post: At least 1 Reddit post URL is required" }).max(50, { message: "get-reddit-post: Maximum 50 Reddit post URLs allowed" }).describe("Array of 1-50 Reddit post URLs. Each post is fetched with full comment trees, then the LLM extracts insights per what_to_extract. Best used after search-reddit."),
|
|
11
|
+
fetch_comments: z.boolean().default(true).describe("Fetch threaded comment trees for each post. Defaults to true. Comments include author, score, OP markers, and nested replies. Set false only when you need post titles/selftext without community discussion."),
|
|
12
|
+
what_to_extract: z.string({ error: "get-reddit-post: what_to_extract is required" }).min(5, { message: "get-reddit-post: what_to_extract must be at least 5 characters" }).max(1e3, { message: "get-reddit-post: what_to_extract is too long (max 1000 characters)" }).describe('REQUIRED. Extraction instructions for the LLM. Describes what insights, opinions, or data to pull from each post and its comments. Use pipe separators for multiple targets: "Extract recommendations | pain points | consensus on best practices | specific tools mentioned".')
|
|
17
13
|
}).strict();
|
|
18
14
|
const searchRedditOutputSchema = z.object({
|
|
19
|
-
content: z.string().describe("
|
|
15
|
+
content: z.string().describe("Newline-separated list of unique Reddit URLs discovered across all queries."),
|
|
20
16
|
metadata: z.object({
|
|
21
|
-
query_count: z.number().int().nonnegative().describe("Number of
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}).strict().describe("Structured metadata about the Reddit search batch.")
|
|
17
|
+
query_count: z.number().int().nonnegative().describe("Number of queries executed."),
|
|
18
|
+
total_urls: z.number().int().nonnegative().describe("Total unique Reddit URLs returned.")
|
|
19
|
+
}).strict().describe("Metadata about the Reddit URL search.")
|
|
25
20
|
}).strict();
|
|
26
21
|
const getRedditPostOutputSchema = z.object({
|
|
27
|
-
content: z.string().describe("
|
|
22
|
+
content: z.string().describe("LLM-synthesized extraction from Reddit posts and comments, structured per what_to_extract instructions."),
|
|
28
23
|
metadata: z.object({
|
|
29
24
|
total_urls: z.number().int().nonnegative().describe("Total number of Reddit post URLs processed."),
|
|
30
25
|
successful: z.number().int().nonnegative().describe("Number of posts fetched successfully."),
|
|
31
26
|
failed: z.number().int().nonnegative().describe("Number of post fetches that failed."),
|
|
32
27
|
fetch_comments: z.boolean().describe("Whether comments were fetched for each post."),
|
|
33
|
-
max_words_per_post: z.number().int().nonnegative().describe("Word budget per post for comment output."),
|
|
34
28
|
total_words_used: z.number().int().nonnegative().describe("Total words used across all posts."),
|
|
35
|
-
|
|
36
|
-
llm_available: z.boolean().describe("Whether LLM extraction was actually available at runtime."),
|
|
37
|
-
llm_failures: z.number().int().nonnegative().describe("Count of posts where optional LLM extraction failed or was skipped."),
|
|
29
|
+
llm_failures: z.number().int().nonnegative().describe("Count of posts where LLM extraction failed (raw content returned instead)."),
|
|
38
30
|
total_batches: z.number().int().nonnegative().describe("Number of Reddit API batches executed."),
|
|
39
31
|
rate_limit_hits: z.number().int().nonnegative().describe("Observed Reddit API rate-limit retries during the batch.")
|
|
40
|
-
}).strict().describe("
|
|
32
|
+
}).strict().describe("Metadata about the Reddit post fetch and extraction.")
|
|
41
33
|
}).strict();
|
|
42
34
|
export {
|
|
43
35
|
getRedditPostOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit \u2014 input schema\n// ============================================================================\n\nexport const searchRedditParamsSchema = z.object({\n queries: z\n .array(\n z\n .string()\n .min(1, { message: 'search-reddit: Query cannot be empty' })\n .describe('A
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,EACG,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit \u2014 input schema\n// ============================================================================\n\nexport const searchRedditParamsSchema = z.object({\n queries: z\n .array(\n z\n .string()\n .min(1, { message: 'search-reddit: Query cannot be empty' })\n .describe('A Reddit search query. Do NOT add \"site:reddit.com\" \u2014 it is appended automatically.'),\n )\n .min(1, { message: 'search-reddit: At least 1 query is required' })\n .max(50, { message: 'search-reddit: Maximum 50 queries allowed' })\n .describe('Array of 1-50 search queries. Each query gets \"site:reddit.com\" appended and is sent to Serper as a standard Google search. Returns a flat list of deduplicated Reddit URLs. Use with get-reddit-post to fetch full content and extract insights.'),\n}).strict();\n\nexport type SearchRedditParams = z.infer<typeof searchRedditParamsSchema>;\n\n// ============================================================================\n// get-reddit-post \u2014 input schema\n// ============================================================================\n\nexport const getRedditPostParamsSchema = z.object({\n urls: z\n .array(\n z\n .string()\n .url({ message: 'get-reddit-post: Each URL must be valid' })\n .describe('A full Reddit post URL (e.g., \"https://www.reddit.com/r/subreddit/comments/id/title/\").'),\n )\n .min(1, { message: 'get-reddit-post: At least 1 Reddit post URL is required' })\n .max(50, { message: 'get-reddit-post: Maximum 50 Reddit post URLs allowed' })\n .describe('Array of 1-50 Reddit post URLs. Each post is fetched with full comment trees, then the LLM extracts insights per what_to_extract. Best used after search-reddit.'),\n fetch_comments: z\n .boolean()\n .default(true)\n .describe('Fetch threaded comment trees for each post. Defaults to true. Comments include author, score, OP markers, and nested replies. Set false only when you need post titles/selftext without community discussion.'),\n what_to_extract: z\n .string({ error: 'get-reddit-post: what_to_extract is required' })\n .min(5, { message: 'get-reddit-post: what_to_extract must be at least 5 characters' })\n .max(1000, { message: 'get-reddit-post: what_to_extract is too long (max 1000 characters)' })\n .describe('REQUIRED. Extraction instructions for the LLM. Describes what insights, opinions, or data to pull from each post and its comments. Use pipe separators for multiple targets: \"Extract recommendations | pain points | consensus on best practices | specific tools mentioned\".'),\n}).strict();\n\nexport type GetRedditPostParams = z.infer<typeof getRedditPostParamsSchema>;\n\n// ============================================================================\n// search-reddit \u2014 output schema\n// ============================================================================\n\nexport const searchRedditOutputSchema = z.object({\n content: z\n .string()\n .describe('Newline-separated list of unique Reddit URLs discovered across all queries.'),\n metadata: z.object({\n query_count: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of queries executed.'),\n total_urls: z\n .number()\n .int()\n .nonnegative()\n .describe('Total unique Reddit URLs returned.'),\n }).strict().describe('Metadata about the Reddit URL search.'),\n}).strict();\n\nexport type SearchRedditOutput = z.infer<typeof searchRedditOutputSchema>;\n\n// ============================================================================\n// get-reddit-post \u2014 output schema\n// ============================================================================\n\nexport const getRedditPostOutputSchema = z.object({\n content: z\n .string()\n .describe('LLM-synthesized extraction from Reddit posts and comments, structured per what_to_extract instructions.'),\n metadata: z.object({\n total_urls: z\n .number()\n .int()\n .nonnegative()\n .describe('Total number of Reddit post URLs processed.'),\n successful: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of posts fetched successfully.'),\n failed: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of post fetches that failed.'),\n fetch_comments: z\n .boolean()\n .describe('Whether comments were fetched for each post.'),\n total_words_used: z\n .number()\n .int()\n .nonnegative()\n .describe('Total words used across all posts.'),\n llm_failures: z\n .number()\n .int()\n .nonnegative()\n .describe('Count of posts where LLM extraction failed (raw content returned instead).'),\n total_batches: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of Reddit API batches executed.'),\n rate_limit_hits: z\n .number()\n .int()\n .nonnegative()\n .describe('Observed Reddit API rate-limit retries during the batch.'),\n }).strict().describe('Metadata about the Reddit post fetch and extraction.'),\n}).strict();\n\nexport type GetRedditPostOutput = z.infer<typeof getRedditPostOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,EACG,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,0FAAqF;AAAA,EACnG,EACC,IAAI,GAAG,EAAE,SAAS,8CAA8C,CAAC,EACjE,IAAI,IAAI,EAAE,SAAS,4CAA4C,CAAC,EAChE,SAAS,mPAAmP;AACjQ,CAAC,EAAE,OAAO;AAQH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH;AAAA,IACC,EACG,OAAO,EACP,IAAI,EAAE,SAAS,0CAA0C,CAAC,EAC1D,SAAS,yFAAyF;AAAA,EACvG,EACC,IAAI,GAAG,EAAE,SAAS,0DAA0D,CAAC,EAC7E,IAAI,IAAI,EAAE,SAAS,uDAAuD,CAAC,EAC3E,SAAS,kKAAkK;AAAA,EAC9K,gBAAgB,EACb,QAAQ,EACR,QAAQ,IAAI,EACZ,SAAS,+MAA+M;AAAA,EAC3N,iBAAiB,EACd,OAAO,EAAE,OAAO,+CAA+C,CAAC,EAChE,IAAI,GAAG,EAAE,SAAS,iEAAiE,CAAC,EACpF,IAAI,KAAM,EAAE,SAAS,qEAAqE,CAAC,EAC3F,SAAS,gRAAgR;AAC9R,CAAC,EAAE,OAAO;AAQH,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN,OAAO,EACP,SAAS,6EAA6E;AAAA,EACzF,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EACV,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,6BAA6B;AAAA,IACzC,YAAY,EACT,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO,EAAE,SAAS,uCAAuC;AAC9D,CAAC,EAAE,OAAO;AAQH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,SAAS,EACN,OAAO,EACP,SAAS,yGAAyG;AAAA,EACrH,UAAU,EAAE,OAAO;AAAA,IACjB,YAAY,EACT,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,6CAA6C;AAAA,IACzD,YAAY,EACT,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,uCAAuC;AAAA,IACnD,QAAQ,EACL,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,qCAAqC;AAAA,IACjD,gBAAgB,EACb,QAAQ,EACR,SAAS,8CAA8C;AAAA,IAC1D,kBAAkB,EACf,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,oCAAoC;AAAA,IAChD,cAAc,EACX,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,4EAA4E;AAAA,IACxF,eAAe,EACZ,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,wCAAwC;AAAA,IACpD,iBAAiB,EACd,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,0DAA0D;AAAA,EACxE,CAAC,EAAE,OAAO,EAAE,SAAS,sDAAsD;AAC7E,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -3,17 +3,13 @@ const urlSchema = z.string({ error: "scrape-links: URL is required" }).url({ mes
|
|
|
3
3
|
(url) => url.startsWith("http://") || url.startsWith("https://"),
|
|
4
4
|
{ message: "scrape-links: URL must use http:// or https:// protocol" }
|
|
5
5
|
).describe("A fully-qualified HTTP or HTTPS URL to fetch and extract content from.");
|
|
6
|
-
const
|
|
7
|
-
urls: z.array(urlSchema, {
|
|
8
|
-
|
|
9
|
-
}).min(
|
|
10
|
-
|
|
11
|
-
use_llm: z.boolean({ error: "scrape-links: use_llm must be a boolean" }).default(true).describe("AI extraction enabled by default (requires OPENROUTER_API_KEY). Auto-filters nav/ads/footers, extracts ONLY what you specify. Set false only for raw HTML debugging."),
|
|
12
|
-
what_to_extract: z.string().max(1e3, { message: "scrape-links: Extraction instructions too long (max 1000 characters)" }).optional().describe('Extraction instructions for AI. Will be wrapped with compression prefix+suffix automatically. Formula: "Extract [target1] | [target2] | [target3] with focus on [aspect1], [aspect2]". Min 3 targets with | separator. Be specific (pricing tiers not pricing). Aim 5-10 targets.')
|
|
13
|
-
};
|
|
14
|
-
const scrapeLinksParamsSchema = z.object(scrapeLinksParamsShape).strict();
|
|
6
|
+
const scrapeLinksParamsSchema = z.object({
|
|
7
|
+
urls: z.array(urlSchema, { error: "scrape-links: URLs must be an array" }).min(1, { message: "scrape-links: At least 1 URL is required" }).max(50, { message: "scrape-links: Maximum 50 URLs allowed per request" }).describe("URLs to scrape (1-50). Token budget (32K) is split across URLs: 3 URLs get ~10K tokens each (deep), 10 get ~3K (balanced), 50 get ~640 (scan). Each page is scraped, cleaned, and processed by the LLM per what_to_extract."),
|
|
8
|
+
timeout: z.number({ error: "scrape-links: Timeout must be a number" }).min(5, { message: "scrape-links: Timeout must be at least 5 seconds" }).max(120, { message: "scrape-links: Timeout cannot exceed 120 seconds" }).default(30).describe("Timeout in seconds for each URL."),
|
|
9
|
+
what_to_extract: z.string({ error: "scrape-links: what_to_extract is required" }).min(5, { message: "scrape-links: what_to_extract must be at least 5 characters" }).max(1e3, { message: "scrape-links: Extraction instructions too long (max 1000 characters)" }).describe('REQUIRED. Extraction instructions for the LLM. The LLM processes each scraped page and extracts ONLY what you specify. Formula: "Extract [target1] | [target2] | [target3] with focus on [aspect]". Be specific: "pricing tiers | monthly vs annual cost | free tier limits" not just "pricing".')
|
|
10
|
+
}).strict();
|
|
15
11
|
const scrapeLinksOutputSchema = z.object({
|
|
16
|
-
content: z.string().describe("
|
|
12
|
+
content: z.string().describe("LLM-extracted content from scraped pages, structured per what_to_extract instructions."),
|
|
17
13
|
metadata: z.object({
|
|
18
14
|
total_urls: z.number().int().nonnegative().describe("Total number of input URLs processed."),
|
|
19
15
|
successful: z.number().int().nonnegative().describe("Number of URLs that were fetched successfully."),
|
|
@@ -23,7 +19,7 @@ const scrapeLinksOutputSchema = z.object({
|
|
|
23
19
|
tokens_per_url: z.number().int().nonnegative().optional().describe("Allocated LLM token budget per successfully scraped URL."),
|
|
24
20
|
total_token_budget: z.number().int().nonnegative().optional().describe("Overall token budget available for extraction."),
|
|
25
21
|
batches_processed: z.number().int().nonnegative().optional().describe("Number of scrape batches executed.")
|
|
26
|
-
}).strict().describe("Structured metadata about the scrape batch.")
|
|
22
|
+
}).strict().describe("Structured metadata about the scrape and extraction batch.")
|
|
27
23
|
}).strict();
|
|
28
24
|
export {
|
|
29
25
|
scrapeLinksOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/scrape-links.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\n// URL schema with protocol validation\nconst urlSchema = z\n .string({ error: 'scrape-links: URL is required' })\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https:// protocol' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to fetch and extract content from.');\n\n// Input schema for scrape-links tool\
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAGlB,MAAM,YAAY,EACf,OAAO,EAAE,OAAO,gCAAgC,CAAC,EACjD,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,0DAA0D;AACvE,EACC,SAAS,wEAAwE;
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\n// URL schema with protocol validation\nconst urlSchema = z\n .string({ error: 'scrape-links: URL is required' })\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https:// protocol' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to fetch and extract content from.');\n\n// Input schema for scrape-links tool\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema, { error: 'scrape-links: URLs must be an array' })\n .min(1, { message: 'scrape-links: At least 1 URL is required' })\n .max(50, { message: 'scrape-links: Maximum 50 URLs allowed per request' })\n .describe('URLs to scrape (1-50). Token budget (32K) is split across URLs: 3 URLs get ~10K tokens each (deep), 10 get ~3K (balanced), 50 get ~640 (scan). Each page is scraped, cleaned, and processed by the LLM per what_to_extract.'),\n timeout: z\n .number({ error: 'scrape-links: Timeout must be a number' })\n .min(5, { message: 'scrape-links: Timeout must be at least 5 seconds' })\n .max(120, { message: 'scrape-links: Timeout cannot exceed 120 seconds' })\n .default(30)\n .describe('Timeout in seconds for each URL.'),\n what_to_extract: z\n .string({ error: 'scrape-links: what_to_extract is required' })\n .min(5, { message: 'scrape-links: what_to_extract must be at least 5 characters' })\n .max(1000, { message: 'scrape-links: Extraction instructions too long (max 1000 characters)' })\n .describe('REQUIRED. Extraction instructions for the LLM. The LLM processes each scraped page and extracts ONLY what you specify. Formula: \"Extract [target1] | [target2] | [target3] with focus on [aspect]\". Be specific: \"pricing tiers | monthly vs annual cost | free tier limits\" not just \"pricing\".'),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n content: z\n .string()\n .describe('LLM-extracted content from scraped pages, structured per what_to_extract instructions.'),\n metadata: z.object({\n total_urls: z\n .number()\n .int()\n .nonnegative()\n .describe('Total number of input URLs processed.'),\n successful: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of URLs that were fetched successfully.'),\n failed: z\n .number()\n .int()\n .nonnegative()\n .describe('Number of URLs that failed validation or scraping.'),\n total_credits: z\n .number()\n .int()\n .nonnegative()\n .describe('Total external scraping credits consumed.'),\n execution_time_ms: z\n .number()\n .int()\n .nonnegative()\n .describe('Elapsed execution time in milliseconds.'),\n tokens_per_url: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Allocated LLM token budget per successfully scraped URL.'),\n total_token_budget: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Overall token budget available for extraction.'),\n batches_processed: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Number of scrape batches executed.'),\n }).strict().describe('Structured metadata about the scrape and extraction batch.'),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAGlB,MAAM,YAAY,EACf,OAAO,EAAE,OAAO,gCAAgC,CAAC,EACjD,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,0DAA0D;AACvE,EACC,SAAS,wEAAwE;AAG7E,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,WAAW,EAAE,OAAO,sCAAsC,CAAC,EACjE,IAAI,GAAG,EAAE,SAAS,2CAA2C,CAAC,EAC9D,IAAI,IAAI,EAAE,SAAS,oDAAoD,CAAC,EACxE,SAAS,6NAA6N;AAAA,EACzO,SAAS,EACN,OAAO,EAAE,OAAO,yCAAyC,CAAC,EAC1D,IAAI,GAAG,EAAE,SAAS,mDAAmD,CAAC,EACtE,IAAI,KAAK,EAAE,SAAS,kDAAkD,CAAC,EACvE,QAAQ,EAAE,EACV,SAAS,kCAAkC;AAAA,EAC9C,iBAAiB,EACd,OAAO,EAAE,OAAO,4CAA4C,CAAC,EAC7D,IAAI,GAAG,EAAE,SAAS,8DAA8D,CAAC,EACjF,IAAI,KAAM,EAAE,SAAS,uEAAuE,CAAC,EAC7F,SAAS,kSAAkS;AAChT,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,SAAS,EACN,OAAO,EACP,SAAS,wFAAwF;AAAA,EACpG,UAAU,EAAE,OAAO;AAAA,IACjB,YAAY,EACT,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,uCAAuC;AAAA,IACnD,YAAY,EACT,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,gDAAgD;AAAA,IAC5D,QAAQ,EACL,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,oDAAoD;AAAA,IAChE,eAAe,EACZ,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,2CAA2C;AAAA,IACvD,mBAAmB,EAChB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,yCAAyC;AAAA,IACrD,gBAAgB,EACb,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,0DAA0D;AAAA,IACtE,oBAAoB,EACjB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,gDAAgD;AAAA,IAC5D,mBAAmB,EAChB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO,EAAE,SAAS,4DAA4D;AACnF,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -2,23 +2,23 @@ import { z } from "zod";
|
|
|
2
2
|
const keywordSchema = z.string({ error: "web-search: Keyword is required" }).min(1, { message: "web-search: Keyword cannot be empty" }).max(500, { message: "web-search: Keyword too long (max 500 characters)" }).refine(
|
|
3
3
|
(k) => k.trim().length > 0,
|
|
4
4
|
{ message: "web-search: Keyword cannot be whitespace only" }
|
|
5
|
-
).describe('A single Google search query (1
|
|
6
|
-
const
|
|
7
|
-
error: "web-search: Keywords must be an array"
|
|
8
|
-
}).min(
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
};
|
|
12
|
-
const webSearchParamsSchema = z.object(webSearchParamsShape).strict();
|
|
5
|
+
).describe('A single Google search query (1-500 chars). Each keyword runs as a separate parallel search. Use varied angles: direct topic, comparisons, "best of" lists, year-specific, site-specific (e.g., "site:github.com topic").');
|
|
6
|
+
const webSearchParamsSchema = z.object({
|
|
7
|
+
keywords: z.array(keywordSchema, { error: "web-search: Keywords must be an array" }).min(1, { message: "web-search: At least 1 keyword required" }).max(100, { message: "web-search: Maximum 100 keywords allowed per request" }).describe("Array of 1-100 search keywords. Each runs as a separate Google search in parallel. Results are aggregated, deduplicated, and ranked by CTR-weighted consensus. RECOMMENDED: 3-7 keywords for solid consensus, up to 20 for thorough coverage."),
|
|
8
|
+
objective: z.string({ error: "web-search: objective is required" }).min(5, { message: "web-search: objective must be at least 5 characters" }).max(500, { message: "web-search: objective too long (max 500 characters)" }).describe('REQUIRED. Describes what you are looking for. An LLM classifies each search result into 3 relevance tiers (highly relevant, maybe relevant, other) using only titles, snippets, and site names \u2014 no URLs are fetched. Be specific: "open-source MCP server implementations in TypeScript" not "MCP servers". Also generates a synthesis paragraph summarizing key findings.'),
|
|
9
|
+
raw: z.boolean({ error: "web-search: raw must be a boolean" }).default(false).describe("When true, skip LLM classification and return the traditional CTR-weighted consensus-ranked URL list. Use when you need raw context or the LLM endpoint is unavailable. Default: false (LLM classification enabled).")
|
|
10
|
+
}).strict();
|
|
13
11
|
const webSearchOutputSchema = z.object({
|
|
14
|
-
content: z.string().describe("
|
|
12
|
+
content: z.string().describe("Markdown report. With LLM: 3-tier table (highly relevant / maybe relevant / other) with synthesis. With raw=true: traditional CTR-ranked list."),
|
|
15
13
|
metadata: z.object({
|
|
16
14
|
total_keywords: z.number().int().nonnegative().describe("Total number of keyword queries executed."),
|
|
17
|
-
total_results: z.number().int().nonnegative().describe("Total
|
|
15
|
+
total_results: z.number().int().nonnegative().describe("Total unique URLs found across all searches."),
|
|
18
16
|
execution_time_ms: z.number().int().nonnegative().describe("Elapsed execution time in milliseconds."),
|
|
19
17
|
total_unique_urls: z.number().int().nonnegative().optional().describe("Unique URL count observed across all searches."),
|
|
20
18
|
consensus_url_count: z.number().int().nonnegative().optional().describe("Count of URLs that met the consensus threshold."),
|
|
21
19
|
frequency_threshold: z.number().int().nonnegative().optional().describe("Minimum frequency required for a URL to be considered consensus."),
|
|
20
|
+
llm_classified: z.boolean().describe("Whether LLM classification was applied to the results."),
|
|
21
|
+
llm_error: z.string().optional().describe("LLM classification error message if classification failed and fell back to raw output."),
|
|
22
22
|
coverage_summary: z.array(z.object({
|
|
23
23
|
keyword: z.string().describe("The search keyword."),
|
|
24
24
|
result_count: z.number().int().nonnegative().describe("Number of results returned for this keyword."),
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/web-search.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\n// Keyword schema with validation\nconst keywordSchema = z\n .string({ error: 'web-search: Keyword is required' })\n .min(1, { message: 'web-search: Keyword cannot be empty' })\n .max(500, { message: 'web-search: Keyword too long (max 500 characters)' })\n .refine(\n k => k.trim().length > 0,\n { message: 'web-search: Keyword cannot be whitespace only' }\n )\n .describe('A single Google search query (1
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAGlB,MAAM,gBAAgB,EACnB,OAAO,EAAE,OAAO,kCAAkC,CAAC,EACnD,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD,IAAI,KAAK,EAAE,SAAS,oDAAoD,CAAC,EACzE;AAAA,EACC,OAAK,EAAE,KAAK,EAAE,SAAS;AAAA,EACvB,EAAE,SAAS,gDAAgD;AAC7D,EACC,SAAS,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\n// Keyword schema with validation\nconst keywordSchema = z\n .string({ error: 'web-search: Keyword is required' })\n .min(1, { message: 'web-search: Keyword cannot be empty' })\n .max(500, { message: 'web-search: Keyword too long (max 500 characters)' })\n .refine(\n k => k.trim().length > 0,\n { message: 'web-search: Keyword cannot be whitespace only' }\n )\n .describe('A single Google search query (1-500 chars). Each keyword runs as a separate parallel search. Use varied angles: direct topic, comparisons, \"best of\" lists, year-specific, site-specific (e.g., \"site:github.com topic\").');\n\n// Input schema for web-search tool\nexport const webSearchParamsSchema = z.object({\n keywords: z\n .array(keywordSchema, { error: 'web-search: Keywords must be an array' })\n .min(1, { message: 'web-search: At least 1 keyword required' })\n .max(100, { message: 'web-search: Maximum 100 keywords allowed per request' })\n .describe('Array of 1-100 search keywords. Each runs as a separate Google search in parallel. Results are aggregated, deduplicated, and ranked by CTR-weighted consensus. RECOMMENDED: 3-7 keywords for solid consensus, up to 20 for thorough coverage.'),\n objective: z\n .string({ error: 'web-search: objective is required' })\n .min(5, { message: 'web-search: objective must be at least 5 characters' })\n .max(500, { message: 'web-search: objective too long (max 500 characters)' })\n .describe('REQUIRED. Describes what you are looking for. An LLM classifies each search result into 3 relevance tiers (highly relevant, maybe relevant, other) using only titles, snippets, and site names \u2014 no URLs are fetched. Be specific: \"open-source MCP server implementations in TypeScript\" not \"MCP servers\". Also generates a synthesis paragraph summarizing key findings.'),\n raw: z\n .boolean({ error: 'web-search: raw must be a boolean' })\n .default(false)\n .describe('When true, skip LLM classification and return the traditional CTR-weighted consensus-ranked URL list. Use when you need raw context or the LLM endpoint is unavailable. Default: false (LLM classification enabled).'),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n content: z\n .string()\n .describe('Markdown report. With LLM: 3-tier table (highly relevant / maybe relevant / other) with synthesis. With raw=true: traditional CTR-ranked list.'),\n metadata: z.object({\n total_keywords: z\n .number()\n .int()\n .nonnegative()\n .describe('Total number of keyword queries executed.'),\n total_results: z\n .number()\n .int()\n .nonnegative()\n .describe('Total unique URLs found across all searches.'),\n execution_time_ms: z\n .number()\n .int()\n .nonnegative()\n .describe('Elapsed execution time in milliseconds.'),\n total_unique_urls: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Unique URL count observed across all searches.'),\n consensus_url_count: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Count of URLs that met the consensus threshold.'),\n frequency_threshold: z\n .number()\n .int()\n .nonnegative()\n .optional()\n .describe('Minimum frequency required for a URL to be considered consensus.'),\n llm_classified: z\n .boolean()\n .describe('Whether LLM classification was applied to the results.'),\n llm_error: z\n .string()\n .optional()\n .describe('LLM classification error message if classification failed and fell back to raw output.'),\n coverage_summary: z\n .array(z.object({\n keyword: z.string().describe('The search keyword.'),\n result_count: z.number().int().nonnegative().describe('Number of results returned for this keyword.'),\n top_url: z.string().optional().describe('Domain of the top-ranked result for this keyword.'),\n }))\n .optional()\n .describe('Per-keyword result counts and top URLs for coverage analysis.'),\n low_yield_keywords: z\n .array(z.string())\n .optional()\n .describe('Keywords that produced 0-1 results.'),\n }).strict().describe('Structured metadata about the completed web search batch.'),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAGlB,MAAM,gBAAgB,EACnB,OAAO,EAAE,OAAO,kCAAkC,CAAC,EACnD,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD,IAAI,KAAK,EAAE,SAAS,oDAAoD,CAAC,EACzE;AAAA,EACC,OAAK,EAAE,KAAK,EAAE,SAAS;AAAA,EACvB,EAAE,SAAS,gDAAgD;AAC7D,EACC,SAAS,2NAA2N;AAGhO,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,UAAU,EACP,MAAM,eAAe,EAAE,OAAO,wCAAwC,CAAC,EACvE,IAAI,GAAG,EAAE,SAAS,0CAA0C,CAAC,EAC7D,IAAI,KAAK,EAAE,SAAS,uDAAuD,CAAC,EAC5E,SAAS,+OAA+O;AAAA,EAC3P,WAAW,EACR,OAAO,EAAE,OAAO,oCAAoC,CAAC,EACrD,IAAI,GAAG,EAAE,SAAS,sDAAsD,CAAC,EACzE,IAAI,KAAK,EAAE,SAAS,sDAAsD,CAAC,EAC3E,SAAS,kXAA6W;AAAA,EACzX,KAAK,EACF,QAAQ,EAAE,OAAO,oCAAoC,CAAC,EACtD,QAAQ,KAAK,EACb,SAAS,sNAAsN;AACpO,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN,OAAO,EACP,SAAS,gJAAgJ;AAAA,EAC5J,UAAU,EAAE,OAAO;AAAA,IACjB,gBAAgB,EACb,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,2CAA2C;AAAA,IACvD,eAAe,EACZ,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,8CAA8C;AAAA,IAC1D,mBAAmB,EAChB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,yCAAyC;AAAA,IACrD,mBAAmB,EAChB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,gDAAgD;AAAA,IAC5D,qBAAqB,EAClB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,iDAAiD;AAAA,IAC7D,qBAAqB,EAClB,OAAO,EACP,IAAI,EACJ,YAAY,EACZ,SAAS,EACT,SAAS,kEAAkE;AAAA,IAC9E,gBAAgB,EACb,QAAQ,EACR,SAAS,wDAAwD;AAAA,IACpE,WAAW,EACR,OAAO,EACP,SAAS,EACT,SAAS,wFAAwF;AAAA,IACpG,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,SAAS,EAAE,OAAO,EAAE,SAAS,qBAAqB;AAAA,MAClD,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,8CAA8C;AAAA,MACpG,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,mDAAmD;AAAA,IAC7F,CAAC,CAAC,EACD,SAAS,EACT,SAAS,+DAA+D;AAAA,IAC3E,oBAAoB,EACjB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,qCAAqC;AAAA,EACnD,CAAC,EAAE,OAAO,EAAE,SAAS,2DAA2D;AAClF,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|