mcp-researchpowerpack-http 3.11.1 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-use.json +2 -2
- package/dist/src/clients/reddit.js +1 -1
- package/dist/src/clients/reddit.js.map +2 -2
- package/dist/src/config/index.js +3 -4
- package/dist/src/config/index.js.map +2 -2
- package/dist/src/schemas/reddit.js +20 -31
- package/dist/src/schemas/reddit.js.map +2 -2
- package/dist/src/schemas/scrape-links.js +14 -22
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/schemas/web-search.js +20 -24
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/services/llm-processor.js +79 -0
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/reddit.js +76 -181
- package/dist/src/tools/reddit.js.map +2 -2
- package/dist/src/tools/scrape.js +14 -15
- package/dist/src/tools/scrape.js.map +2 -2
- package/dist/src/tools/search.js +129 -31
- package/dist/src/tools/search.js.map +2 -2
- package/package.json +1 -1
package/dist/mcp-use.json
CHANGED
|
@@ -13,7 +13,7 @@ import { mcpLog } from "../utils/logger.js";
|
|
|
13
13
|
const REDDIT_TOKEN_URL = "https://www.reddit.com/api/v1/access_token";
|
|
14
14
|
const REDDIT_API_BASE = "https://oauth.reddit.com";
|
|
15
15
|
const TOKEN_EXPIRY_MS = 55e3;
|
|
16
|
-
const FETCH_LIMIT =
|
|
16
|
+
const FETCH_LIMIT = 500;
|
|
17
17
|
let cachedToken = null;
|
|
18
18
|
let cachedTokenExpiry = 0;
|
|
19
19
|
const DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === "true";
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/clients/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Reddit OAuth API Client\n * Fetches posts and comments sorted by score (most upvoted first)\n * Implements robust error handling that NEVER crashes\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { REDDIT, CONCURRENCY } from '../config/index.js';\nimport { USER_AGENT_VERSION } from '../version.js';\nimport { calculateBackoff } from '../utils/retry.js';\nimport {\n classifyError,\n fetchWithTimeout,\n sleep,\n ErrorCode,\n type StructuredError,\n} from '../utils/errors.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport { mcpLog } from '../utils/logger.js';\n\n// \u2500\u2500 Constants \u2500\u2500\n\nconst REDDIT_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token' as const;\nconst REDDIT_API_BASE = 'https://oauth.reddit.com' as const;\nconst TOKEN_EXPIRY_MS = 55_000 as const; // 55 second expiry (conservative)\n\n// \u2500\u2500 Data Interfaces \u2500\u2500\n\ninterface Post {\n readonly title: string;\n readonly author: string;\n readonly subreddit: string;\n readonly body: string;\n readonly score: number;\n readonly commentCount: number;\n readonly url: string;\n readonly created: Date;\n readonly flair?: string;\n readonly isNsfw: boolean;\n readonly isPinned: boolean;\n}\n\nexport interface Comment {\n readonly author: string;\n readonly body: string;\n readonly score: number;\n readonly depth: number;\n readonly isOP: boolean;\n}\n\nexport interface PostResult {\n readonly post: Post;\n readonly comments: Comment[];\n readonly actualComments: number;\n}\n\ninterface BatchPostResult {\n readonly results: Map<string, PostResult | Error>;\n readonly batchesProcessed: number;\n readonly totalPosts: number;\n readonly rateLimitHits: number;\n}\n\n/** Reddit API \"Listing\" wrapper */\ninterface RedditListing<T> {\n readonly kind: string;\n readonly data: {\n readonly children: ReadonlyArray<{ readonly kind: string; readonly data: T }>;\n readonly after?: string;\n readonly before?: string;\n };\n}\n\n/** Reddit post data from API */\ninterface RedditPostData {\n readonly title: string;\n readonly selftext: string;\n readonly selftext_html?: string;\n readonly author: string;\n readonly subreddit: string;\n readonly score: number;\n readonly upvote_ratio: number;\n readonly num_comments: number;\n readonly created_utc: number;\n readonly url: string;\n readonly permalink: string;\n readonly is_self: boolean;\n readonly over_18: boolean;\n readonly stickied: boolean;\n readonly link_flair_text?: string;\n readonly [key: string]: unknown;\n}\n\n/** Reddit comment data from API */\ninterface RedditCommentData {\n readonly body?: string;\n readonly author?: string;\n readonly score?: number;\n readonly created_utc?: number;\n readonly replies?: RedditListing<RedditCommentData> | string;\n readonly [key: string]: unknown;\n}\n\ntype RedditPostResponse = [RedditListing<RedditPostData>, RedditListing<RedditCommentData>];\n\n/** Max comments to fetch per post from Reddit API */\nconst FETCH_LIMIT = REDDIT.FETCH_LIMIT_PER_POST;\n\n// ============================================================================\n// Module-Level Token Cache (shared across all RedditClient instances)\n// ============================================================================\nlet cachedToken: string | null = null;\nlet cachedTokenExpiry = 0;\n\n// Token cache logging only when DEBUG env is set\nconst DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === 'true';\nconst clientLogger = Logger.get('reddit-client');\n\n// Pending auth promise for deduplicating concurrent auth calls\nlet pendingAuthPromise: Promise<string | null> | null = null;\n\n// \u2500\u2500 Decomposed Helpers \u2500\u2500\n\n/**\n * Fetch a Reddit post's JSON from the API\n */\nasync function fetchRedditJson(\n sub: string,\n id: string,\n token: string,\n userAgent: string,\n): Promise<RedditPostResponse> {\n const limit = Math.min(FETCH_LIMIT, 500);\n const apiUrl = `${REDDIT_API_BASE}/r/${sub}/comments/${id}?sort=top&limit=${limit}&depth=10&raw_json=1`;\n\n const res = await fetchWithTimeout(apiUrl, {\n headers: {\n 'Authorization': `Bearer ${token}`,\n 'User-Agent': userAgent,\n },\n timeoutMs: 30000,\n });\n\n if (res.status === 429) {\n const err = new Error('Rate limited by Reddit API');\n (err as Error & { status: number }).status = 429;\n throw err;\n }\n\n if (res.status === 404) {\n throw new Error(`Post not found: /r/${sub}/comments/${id}`);\n }\n\n if (!res.ok) {\n const err = new Error(`Reddit API error: ${res.status}`);\n (err as Error & { status: number }).status = res.status;\n throw err;\n }\n\n try {\n return await res.json() as RedditPostResponse;\n } catch {\n throw new Error('Failed to parse Reddit API response');\n }\n}\n\n/**\n * Extract structured post data from a Reddit listing\n */\nfunction parsePostData(\n postListing: RedditListing<RedditPostData>,\n sub: string,\n): Post {\n const p = postListing?.data?.children?.[0]?.data;\n if (!p) {\n throw new Error(`Post data not found in response for /r/${sub}`);\n }\n\n return {\n title: p.title || 'Untitled',\n author: p.author || '[deleted]',\n subreddit: p.subreddit || sub,\n body: formatBody(p),\n score: p.score || 0,\n commentCount: p.num_comments || 0,\n url: `https://reddit.com${p.permalink || ''}`,\n created: new Date((p.created_utc || 0) * 1000),\n flair: p.link_flair_text || undefined,\n isNsfw: p.over_18 || false,\n isPinned: p.stickied || false,\n };\n}\n\nfunction formatBody(p: RedditPostData): string {\n if (p.selftext?.trim()) return p.selftext;\n if (p.is_self) return '';\n if (p.url) return `**Link:** ${p.url}`;\n return '';\n}\n\n/** Safety cap on comment tree recursion depth */\nconst MAX_COMMENT_DEPTH = 15 as const;\n\n/**\n * Extract and sort comments from a Reddit comment listing\n */\nfunction parseCommentTree(\n commentListing: RedditListing<RedditCommentData>,\n opAuthor: string,\n): Comment[] {\n const result: Comment[] = [];\n\n const extract = (items: ReadonlyArray<{ readonly kind: string; readonly data: RedditCommentData }>, depth = 0): void => {\n if (depth > MAX_COMMENT_DEPTH) return;\n const sorted = [...items].sort((a, b) => (b.data?.score || 0) - (a.data?.score || 0));\n\n for (const c of sorted) {\n if (c.kind !== 't1' || !c.data?.author || c.data.author === '[deleted]') continue;\n\n result.push({\n author: c.data.author,\n body: c.data.body || '',\n score: c.data.score || 0,\n depth,\n isOP: c.data.author === opAuthor,\n });\n\n if (typeof c.data.replies === 'object' && c.data.replies?.data?.children) {\n extract(c.data.replies.data.children, depth + 1);\n }\n }\n };\n\n extract(commentListing?.data?.children || []);\n return result;\n}\n\n// \u2500\u2500 Batch Helpers \u2500\u2500\n\n/**\n * Process a single batch of Reddit URLs, returning results keyed by URL\n */\nasync function processBatch(\n client: RedditClient,\n batchUrls: string[],\n): Promise<{ results: Map<string, PostResult | Error>; rateLimitHits: number }> {\n const results = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const batchResults = await pMapSettled(\n batchUrls,\n url => client.getPost(url),\n CONCURRENCY.REDDIT,\n );\n\n for (let i = 0; i < batchResults.length; i++) {\n const result = batchResults[i];\n if (!result) continue;\n const url = batchUrls[i] ?? '';\n\n if (result.status === 'fulfilled') {\n results.set(url, result.value);\n } else {\n const errorMsg = result.reason?.message || String(result.reason);\n if (errorMsg.includes('429') || errorMsg.includes('rate')) rateLimitHits++;\n results.set(url, new Error(errorMsg));\n }\n }\n\n return { results, rateLimitHits };\n}\n\n// \u2500\u2500 RedditClient Class \u2500\u2500\n\nexport class RedditClient {\n private userAgent = `script:${USER_AGENT_VERSION} (by /u/research-powerpack)`;\n\n constructor(private clientId: string, private clientSecret: string) {}\n\n /**\n * Authenticate with Reddit API with retry logic\n * Uses module-level token cache and promise deduplication to prevent\n * concurrent auth calls from firing multiple token requests\n * Returns null on failure instead of throwing\n */\n private async auth(): Promise<string | null> {\n if (cachedToken && Date.now() < cachedTokenExpiry - TOKEN_EXPIRY_MS) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache HIT');\n return cachedToken;\n }\n\n if (pendingAuthPromise) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Auth already in flight, awaiting...');\n return pendingAuthPromise;\n }\n\n pendingAuthPromise = this.performAuth();\n try {\n return await pendingAuthPromise;\n } finally {\n pendingAuthPromise = null;\n }\n }\n\n private async performAuth(): Promise<string | null> {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache MISS - authenticating');\n\n const credentials = Buffer.from(`${this.clientId}:${this.clientSecret}`).toString('base64');\n\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const res = await fetchWithTimeout(REDDIT_TOKEN_URL, {\n method: 'POST',\n headers: {\n 'Authorization': `Basic ${credentials}`,\n 'Content-Type': 'application/x-www-form-urlencoded',\n 'User-Agent': this.userAgent,\n },\n body: 'grant_type=client_credentials',\n timeoutMs: 15000,\n });\n\n if (!res.ok) {\n const text = await res.text().catch(() => '');\n mcpLog('error', `Auth failed (${res.status}): ${text}`, 'reddit');\n\n if (res.status === 401 || res.status === 403) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n return null;\n }\n\n if (res.status >= 500 && attempt < 2) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n\n const data = await res.json() as { access_token?: string; expires_in?: number };\n if (!data.access_token) {\n mcpLog('error', 'Auth response missing access_token', 'reddit');\n return null;\n }\n\n cachedToken = data.access_token;\n cachedTokenExpiry = Date.now() + (data.expires_in || 3600) * 1000;\n return cachedToken;\n\n } catch (error) {\n const err = classifyError(error);\n mcpLog('error', `Auth error (attempt ${attempt + 1}): ${err.message}`, 'reddit');\n\n if (err.code === ErrorCode.AUTH_ERROR) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n }\n\n if (attempt < 2 && err.retryable) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n }\n\n return null;\n }\n\n private parseUrl(url: string): { sub: string; id: string } | null {\n const m = url.match(/reddit\\.com\\/r\\/([^\\/]+)\\/comments\\/([a-z0-9]+)/i);\n return m ? { sub: m[1]!, id: m[2]! } : null;\n }\n\n /**\n * Get a single Reddit post with comments\n * Returns PostResult or throws Error (for use with Promise.allSettled)\n */\n async getPost(url: string): Promise<PostResult> {\n const parsed = this.parseUrl(url);\n if (!parsed) {\n throw new Error(`Invalid Reddit URL format: ${url}`);\n }\n\n const token = await this.auth();\n if (!token) {\n throw new Error('Reddit authentication failed - check credentials');\n }\n\n let lastError: StructuredError | null = null;\n\n for (let attempt = 0; attempt < REDDIT.RETRY_COUNT; attempt++) {\n try {\n const data = await fetchRedditJson(parsed.sub, parsed.id, token, this.userAgent);\n const [postListing, commentListing] = data;\n\n const post = parsePostData(postListing, parsed.sub);\n const comments = parseCommentTree(commentListing, post.author);\n\n return { post, comments, actualComments: post.commentCount };\n\n } catch (error) {\n lastError = classifyError(error);\n\n // Rate limited \u2014 always retry with backoff\n const status = (error as Error & { status?: number }).status;\n if (status === 429) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 32000;\n mcpLog('warning', `Rate limited. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT} after ${delay}ms`, 'reddit');\n await sleep(delay);\n continue;\n }\n\n if (!lastError.retryable) {\n throw error instanceof Error ? error : new Error(lastError.message);\n }\n\n if (attempt < REDDIT.RETRY_COUNT - 1) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 2000;\n mcpLog('warning', `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT}`, 'reddit');\n await sleep(delay);\n }\n }\n }\n\n throw new Error(lastError?.message || 'Failed to fetch Reddit post after retries');\n }\n\n async getPosts(urls: string[]): Promise<Map<string, PostResult | Error>> {\n if (urls.length <= REDDIT.BATCH_SIZE) {\n const results = await pMap(\n urls,\n u => this.getPost(u).catch(e => e as Error),\n CONCURRENCY.REDDIT,\n );\n return new Map(urls.map((u, i) => [u, results[i]!]));\n }\n return (await this.batchGetPosts(urls)).results;\n }\n\n async batchGetPosts(\n urls: string[],\n fetchComments = true,\n onBatchComplete?: (batchNum: number, totalBatches: number, processed: number) => void,\n ): Promise<BatchPostResult> {\n const allResults = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n mcpLog('info', `Fetching ${urls.length} posts in ${totalBatches} batch(es), up to ${FETCH_LIMIT} comments/post`, 'reddit');\n\n for (let batchNum = 0; batchNum < totalBatches; batchNum++) {\n const startIdx = batchNum * REDDIT.BATCH_SIZE;\n const batchUrls = urls.slice(startIdx, startIdx + REDDIT.BATCH_SIZE);\n\n mcpLog('info', `Batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} posts)`, 'reddit');\n\n const batchResult = await processBatch(this, batchUrls);\n for (const [url, result] of batchResult.results) {\n allResults.set(url, result);\n }\n rateLimitHits += batchResult.rateLimitHits;\n\n try {\n onBatchComplete?.(batchNum + 1, totalBatches, allResults.size);\n } catch (callbackError) {\n mcpLog('error', `onBatchComplete callback error: ${callbackError}`, 'reddit');\n }\n\n mcpLog('info', `Batch ${batchNum + 1} complete (${allResults.size}/${urls.length})`, 'reddit');\n\n if (batchNum < totalBatches - 1) {\n await sleep(500);\n }\n }\n\n return { results: allResults, batchesProcessed: totalBatches, totalPosts: urls.length, rateLimitHits };\n }\n}\n"],
|
|
5
|
-
"mappings": "AAMA,SAAS,cAAc;AAEvB,SAAS,QAAQ,mBAAmB;AACpC,SAAS,0BAA0B;AACnC,SAAS,wBAAwB;AACjC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,MAAM,mBAAmB;AAClC,SAAS,cAAc;AAIvB,MAAM,mBAAmB;AACzB,MAAM,kBAAkB;AACxB,MAAM,kBAAkB;
|
|
4
|
+
"sourcesContent": ["/**\n * Reddit OAuth API Client\n * Fetches posts and comments sorted by score (most upvoted first)\n * Implements robust error handling that NEVER crashes\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { REDDIT, CONCURRENCY } from '../config/index.js';\nimport { USER_AGENT_VERSION } from '../version.js';\nimport { calculateBackoff } from '../utils/retry.js';\nimport {\n classifyError,\n fetchWithTimeout,\n sleep,\n ErrorCode,\n type StructuredError,\n} from '../utils/errors.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport { mcpLog } from '../utils/logger.js';\n\n// \u2500\u2500 Constants \u2500\u2500\n\nconst REDDIT_TOKEN_URL = 'https://www.reddit.com/api/v1/access_token' as const;\nconst REDDIT_API_BASE = 'https://oauth.reddit.com' as const;\nconst TOKEN_EXPIRY_MS = 55_000 as const; // 55 second expiry (conservative)\n\n// \u2500\u2500 Data Interfaces \u2500\u2500\n\ninterface Post {\n readonly title: string;\n readonly author: string;\n readonly subreddit: string;\n readonly body: string;\n readonly score: number;\n readonly commentCount: number;\n readonly url: string;\n readonly created: Date;\n readonly flair?: string;\n readonly isNsfw: boolean;\n readonly isPinned: boolean;\n}\n\nexport interface Comment {\n readonly author: string;\n readonly body: string;\n readonly score: number;\n readonly depth: number;\n readonly isOP: boolean;\n}\n\nexport interface PostResult {\n readonly post: Post;\n readonly comments: Comment[];\n readonly actualComments: number;\n}\n\ninterface BatchPostResult {\n readonly results: Map<string, PostResult | Error>;\n readonly batchesProcessed: number;\n readonly totalPosts: number;\n readonly rateLimitHits: number;\n}\n\n/** Reddit API \"Listing\" wrapper */\ninterface RedditListing<T> {\n readonly kind: string;\n readonly data: {\n readonly children: ReadonlyArray<{ readonly kind: string; readonly data: T }>;\n readonly after?: string;\n readonly before?: string;\n };\n}\n\n/** Reddit post data from API */\ninterface RedditPostData {\n readonly title: string;\n readonly selftext: string;\n readonly selftext_html?: string;\n readonly author: string;\n readonly subreddit: string;\n readonly score: number;\n readonly upvote_ratio: number;\n readonly num_comments: number;\n readonly created_utc: number;\n readonly url: string;\n readonly permalink: string;\n readonly is_self: boolean;\n readonly over_18: boolean;\n readonly stickied: boolean;\n readonly link_flair_text?: string;\n readonly [key: string]: unknown;\n}\n\n/** Reddit comment data from API */\ninterface RedditCommentData {\n readonly body?: string;\n readonly author?: string;\n readonly score?: number;\n readonly created_utc?: number;\n readonly replies?: RedditListing<RedditCommentData> | string;\n readonly [key: string]: unknown;\n}\n\ntype RedditPostResponse = [RedditListing<RedditPostData>, RedditListing<RedditCommentData>];\n\n/** Max comments to fetch per post from Reddit API */\n/** Reddit API caps at 500 comments per request */\nconst FETCH_LIMIT = 500;\n\n// ============================================================================\n// Module-Level Token Cache (shared across all RedditClient instances)\n// ============================================================================\nlet cachedToken: string | null = null;\nlet cachedTokenExpiry = 0;\n\n// Token cache logging only when DEBUG env is set\nconst DEBUG_TOKEN_CACHE = process.env.DEBUG_REDDIT === 'true';\nconst clientLogger = Logger.get('reddit-client');\n\n// Pending auth promise for deduplicating concurrent auth calls\nlet pendingAuthPromise: Promise<string | null> | null = null;\n\n// \u2500\u2500 Decomposed Helpers \u2500\u2500\n\n/**\n * Fetch a Reddit post's JSON from the API\n */\nasync function fetchRedditJson(\n sub: string,\n id: string,\n token: string,\n userAgent: string,\n): Promise<RedditPostResponse> {\n const limit = Math.min(FETCH_LIMIT, 500);\n const apiUrl = `${REDDIT_API_BASE}/r/${sub}/comments/${id}?sort=top&limit=${limit}&depth=10&raw_json=1`;\n\n const res = await fetchWithTimeout(apiUrl, {\n headers: {\n 'Authorization': `Bearer ${token}`,\n 'User-Agent': userAgent,\n },\n timeoutMs: 30000,\n });\n\n if (res.status === 429) {\n const err = new Error('Rate limited by Reddit API');\n (err as Error & { status: number }).status = 429;\n throw err;\n }\n\n if (res.status === 404) {\n throw new Error(`Post not found: /r/${sub}/comments/${id}`);\n }\n\n if (!res.ok) {\n const err = new Error(`Reddit API error: ${res.status}`);\n (err as Error & { status: number }).status = res.status;\n throw err;\n }\n\n try {\n return await res.json() as RedditPostResponse;\n } catch {\n throw new Error('Failed to parse Reddit API response');\n }\n}\n\n/**\n * Extract structured post data from a Reddit listing\n */\nfunction parsePostData(\n postListing: RedditListing<RedditPostData>,\n sub: string,\n): Post {\n const p = postListing?.data?.children?.[0]?.data;\n if (!p) {\n throw new Error(`Post data not found in response for /r/${sub}`);\n }\n\n return {\n title: p.title || 'Untitled',\n author: p.author || '[deleted]',\n subreddit: p.subreddit || sub,\n body: formatBody(p),\n score: p.score || 0,\n commentCount: p.num_comments || 0,\n url: `https://reddit.com${p.permalink || ''}`,\n created: new Date((p.created_utc || 0) * 1000),\n flair: p.link_flair_text || undefined,\n isNsfw: p.over_18 || false,\n isPinned: p.stickied || false,\n };\n}\n\nfunction formatBody(p: RedditPostData): string {\n if (p.selftext?.trim()) return p.selftext;\n if (p.is_self) return '';\n if (p.url) return `**Link:** ${p.url}`;\n return '';\n}\n\n/** Safety cap on comment tree recursion depth */\nconst MAX_COMMENT_DEPTH = 15 as const;\n\n/**\n * Extract and sort comments from a Reddit comment listing\n */\nfunction parseCommentTree(\n commentListing: RedditListing<RedditCommentData>,\n opAuthor: string,\n): Comment[] {\n const result: Comment[] = [];\n\n const extract = (items: ReadonlyArray<{ readonly kind: string; readonly data: RedditCommentData }>, depth = 0): void => {\n if (depth > MAX_COMMENT_DEPTH) return;\n const sorted = [...items].sort((a, b) => (b.data?.score || 0) - (a.data?.score || 0));\n\n for (const c of sorted) {\n if (c.kind !== 't1' || !c.data?.author || c.data.author === '[deleted]') continue;\n\n result.push({\n author: c.data.author,\n body: c.data.body || '',\n score: c.data.score || 0,\n depth,\n isOP: c.data.author === opAuthor,\n });\n\n if (typeof c.data.replies === 'object' && c.data.replies?.data?.children) {\n extract(c.data.replies.data.children, depth + 1);\n }\n }\n };\n\n extract(commentListing?.data?.children || []);\n return result;\n}\n\n// \u2500\u2500 Batch Helpers \u2500\u2500\n\n/**\n * Process a single batch of Reddit URLs, returning results keyed by URL\n */\nasync function processBatch(\n client: RedditClient,\n batchUrls: string[],\n): Promise<{ results: Map<string, PostResult | Error>; rateLimitHits: number }> {\n const results = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const batchResults = await pMapSettled(\n batchUrls,\n url => client.getPost(url),\n CONCURRENCY.REDDIT,\n );\n\n for (let i = 0; i < batchResults.length; i++) {\n const result = batchResults[i];\n if (!result) continue;\n const url = batchUrls[i] ?? '';\n\n if (result.status === 'fulfilled') {\n results.set(url, result.value);\n } else {\n const errorMsg = result.reason?.message || String(result.reason);\n if (errorMsg.includes('429') || errorMsg.includes('rate')) rateLimitHits++;\n results.set(url, new Error(errorMsg));\n }\n }\n\n return { results, rateLimitHits };\n}\n\n// \u2500\u2500 RedditClient Class \u2500\u2500\n\nexport class RedditClient {\n private userAgent = `script:${USER_AGENT_VERSION} (by /u/research-powerpack)`;\n\n constructor(private clientId: string, private clientSecret: string) {}\n\n /**\n * Authenticate with Reddit API with retry logic\n * Uses module-level token cache and promise deduplication to prevent\n * concurrent auth calls from firing multiple token requests\n * Returns null on failure instead of throwing\n */\n private async auth(): Promise<string | null> {\n if (cachedToken && Date.now() < cachedTokenExpiry - TOKEN_EXPIRY_MS) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache HIT');\n return cachedToken;\n }\n\n if (pendingAuthPromise) {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Auth already in flight, awaiting...');\n return pendingAuthPromise;\n }\n\n pendingAuthPromise = this.performAuth();\n try {\n return await pendingAuthPromise;\n } finally {\n pendingAuthPromise = null;\n }\n }\n\n private async performAuth(): Promise<string | null> {\n if (DEBUG_TOKEN_CACHE) clientLogger.debug('Token cache MISS - authenticating');\n\n const credentials = Buffer.from(`${this.clientId}:${this.clientSecret}`).toString('base64');\n\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const res = await fetchWithTimeout(REDDIT_TOKEN_URL, {\n method: 'POST',\n headers: {\n 'Authorization': `Basic ${credentials}`,\n 'Content-Type': 'application/x-www-form-urlencoded',\n 'User-Agent': this.userAgent,\n },\n body: 'grant_type=client_credentials',\n timeoutMs: 15000,\n });\n\n if (!res.ok) {\n const text = await res.text().catch(() => '');\n mcpLog('error', `Auth failed (${res.status}): ${text}`, 'reddit');\n\n if (res.status === 401 || res.status === 403) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n return null;\n }\n\n if (res.status >= 500 && attempt < 2) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n\n const data = await res.json() as { access_token?: string; expires_in?: number };\n if (!data.access_token) {\n mcpLog('error', 'Auth response missing access_token', 'reddit');\n return null;\n }\n\n cachedToken = data.access_token;\n cachedTokenExpiry = Date.now() + (data.expires_in || 3600) * 1000;\n return cachedToken;\n\n } catch (error) {\n const err = classifyError(error);\n mcpLog('error', `Auth error (attempt ${attempt + 1}): ${err.message}`, 'reddit');\n\n if (err.code === ErrorCode.AUTH_ERROR) {\n cachedToken = null;\n cachedTokenExpiry = 0;\n }\n\n if (attempt < 2 && err.retryable) {\n await sleep(calculateBackoff(attempt));\n continue;\n }\n\n return null;\n }\n }\n\n return null;\n }\n\n private parseUrl(url: string): { sub: string; id: string } | null {\n const m = url.match(/reddit\\.com\\/r\\/([^\\/]+)\\/comments\\/([a-z0-9]+)/i);\n return m ? { sub: m[1]!, id: m[2]! } : null;\n }\n\n /**\n * Get a single Reddit post with comments\n * Returns PostResult or throws Error (for use with Promise.allSettled)\n */\n async getPost(url: string): Promise<PostResult> {\n const parsed = this.parseUrl(url);\n if (!parsed) {\n throw new Error(`Invalid Reddit URL format: ${url}`);\n }\n\n const token = await this.auth();\n if (!token) {\n throw new Error('Reddit authentication failed - check credentials');\n }\n\n let lastError: StructuredError | null = null;\n\n for (let attempt = 0; attempt < REDDIT.RETRY_COUNT; attempt++) {\n try {\n const data = await fetchRedditJson(parsed.sub, parsed.id, token, this.userAgent);\n const [postListing, commentListing] = data;\n\n const post = parsePostData(postListing, parsed.sub);\n const comments = parseCommentTree(commentListing, post.author);\n\n return { post, comments, actualComments: post.commentCount };\n\n } catch (error) {\n lastError = classifyError(error);\n\n // Rate limited \u2014 always retry with backoff\n const status = (error as Error & { status?: number }).status;\n if (status === 429) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 32000;\n mcpLog('warning', `Rate limited. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT} after ${delay}ms`, 'reddit');\n await sleep(delay);\n continue;\n }\n\n if (!lastError.retryable) {\n throw error instanceof Error ? error : new Error(lastError.message);\n }\n\n if (attempt < REDDIT.RETRY_COUNT - 1) {\n const delay = REDDIT.RETRY_DELAYS[attempt] || 2000;\n mcpLog('warning', `${lastError.code}: ${lastError.message}. Retry ${attempt + 1}/${REDDIT.RETRY_COUNT}`, 'reddit');\n await sleep(delay);\n }\n }\n }\n\n throw new Error(lastError?.message || 'Failed to fetch Reddit post after retries');\n }\n\n async getPosts(urls: string[]): Promise<Map<string, PostResult | Error>> {\n if (urls.length <= REDDIT.BATCH_SIZE) {\n const results = await pMap(\n urls,\n u => this.getPost(u).catch(e => e as Error),\n CONCURRENCY.REDDIT,\n );\n return new Map(urls.map((u, i) => [u, results[i]!]));\n }\n return (await this.batchGetPosts(urls)).results;\n }\n\n async batchGetPosts(\n urls: string[],\n fetchComments = true,\n onBatchComplete?: (batchNum: number, totalBatches: number, processed: number) => void,\n ): Promise<BatchPostResult> {\n const allResults = new Map<string, PostResult | Error>();\n let rateLimitHits = 0;\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n mcpLog('info', `Fetching ${urls.length} posts in ${totalBatches} batch(es), up to ${FETCH_LIMIT} comments/post`, 'reddit');\n\n for (let batchNum = 0; batchNum < totalBatches; batchNum++) {\n const startIdx = batchNum * REDDIT.BATCH_SIZE;\n const batchUrls = urls.slice(startIdx, startIdx + REDDIT.BATCH_SIZE);\n\n mcpLog('info', `Batch ${batchNum + 1}/${totalBatches} (${batchUrls.length} posts)`, 'reddit');\n\n const batchResult = await processBatch(this, batchUrls);\n for (const [url, result] of batchResult.results) {\n allResults.set(url, result);\n }\n rateLimitHits += batchResult.rateLimitHits;\n\n try {\n onBatchComplete?.(batchNum + 1, totalBatches, allResults.size);\n } catch (callbackError) {\n mcpLog('error', `onBatchComplete callback error: ${callbackError}`, 'reddit');\n }\n\n mcpLog('info', `Batch ${batchNum + 1} complete (${allResults.size}/${urls.length})`, 'reddit');\n\n if (batchNum < totalBatches - 1) {\n await sleep(500);\n }\n }\n\n return { results: allResults, batchesProcessed: totalBatches, totalPosts: urls.length, rateLimitHits };\n }\n}\n"],
|
|
5
|
+
"mappings": "AAMA,SAAS,cAAc;AAEvB,SAAS,QAAQ,mBAAmB;AACpC,SAAS,0BAA0B;AACnC,SAAS,wBAAwB;AACjC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,MAAM,mBAAmB;AAClC,SAAS,cAAc;AAIvB,MAAM,mBAAmB;AACzB,MAAM,kBAAkB;AACxB,MAAM,kBAAkB;AAmFxB,MAAM,cAAc;AAKpB,IAAI,cAA6B;AACjC,IAAI,oBAAoB;AAGxB,MAAM,oBAAoB,QAAQ,IAAI,iBAAiB;AACvD,MAAM,eAAe,OAAO,IAAI,eAAe;AAG/C,IAAI,qBAAoD;AAOxD,eAAe,gBACb,KACA,IACA,OACA,WAC6B;AAC7B,QAAM,QAAQ,KAAK,IAAI,aAAa,GAAG;AACvC,QAAM,SAAS,GAAG,eAAe,MAAM,GAAG,aAAa,EAAE,mBAAmB,KAAK;AAEjF,QAAM,MAAM,MAAM,iBAAiB,QAAQ;AAAA,IACzC,SAAS;AAAA,MACP,iBAAiB,UAAU,KAAK;AAAA,MAChC,cAAc;AAAA,IAChB;AAAA,IACA,WAAW;AAAA,EACb,CAAC;AAED,MAAI,IAAI,WAAW,KAAK;AACtB,UAAM,MAAM,IAAI,MAAM,4BAA4B;AAClD,IAAC,IAAmC,SAAS;AAC7C,UAAM;AAAA,EACR;AAEA,MAAI,IAAI,WAAW,KAAK;AACtB,UAAM,IAAI,MAAM,sBAAsB,GAAG,aAAa,EAAE,EAAE;AAAA,EAC5D;AAEA,MAAI,CAAC,IAAI,IAAI;AACX,UAAM,MAAM,IAAI,MAAM,qBAAqB,IAAI,MAAM,EAAE;AACvD,IAAC,IAAmC,SAAS,IAAI;AACjD,UAAM;AAAA,EACR;AAEA,MAAI;AACF,WAAO,MAAM,IAAI,KAAK;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,qCAAqC;AAAA,EACvD;AACF;AAKA,SAAS,cACP,aACA,KACM;AACN,QAAM,IAAI,aAAa,MAAM,WAAW,CAAC,GAAG;AAC5C,MAAI,CAAC,GAAG;AACN,UAAM,IAAI,MAAM,0CAA0C,GAAG,EAAE;AAAA,EACjE;AAEA,SAAO;AAAA,IACL,OAAO,EAAE,SAAS;AAAA,IAClB,QAAQ,EAAE,UAAU;AAAA,IACpB,WAAW,EAAE,aAAa;AAAA,IAC1B,MAAM,WAAW,CAAC;AAAA,IAClB,OAAO,EAAE,SAAS;AAAA,IAClB,cAAc,EAAE,gBAAgB;AAAA,IAChC,KAAK,qBAAqB,EAAE,aAAa,EAAE;AAAA,IAC3C,SAAS,IAAI,MAAM,EAAE,eAAe,KAAK,GAAI;AAAA,IAC7C,OAAO,EAAE,mBAAmB;AAAA,IAC5B,QAAQ,EAAE,WAAW;AAAA,IACrB,UAAU,EAAE,YAAY;AAAA,EAC1B;AACF;AAEA,SAAS,WAAW,GAA2B;AAC7C,MAAI,EAAE,UAAU,KAAK,EAAG,QAAO,EAAE;AACjC,MAAI,EAAE,QAAS,QAAO;AACtB,MAAI,EAAE,IAAK,QAAO,aAAa,EAAE,GAAG;AACpC,SAAO;AACT;AAGA,MAAM,oBAAoB;AAK1B,SAAS,iBACP,gBACA,UACW;AACX,QAAM,SAAoB,CAAC;AAE3B,QAAM,UAAU,CAAC,OAAmF,QAAQ,MAAY;AACtH,QAAI,QAAQ,kBAAmB;AAC/B,UAAM,SAAS,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,OAAO,EAAE,MAAM,SAAS,MAAM,EAAE,MAAM,SAAS,EAAE;AAEpF,eAAW,KAAK,QAAQ;AACtB,UAAI,EAAE,SAAS,QAAQ,CAAC,EAAE,MAAM,UAAU,EAAE,KAAK,WAAW,YAAa;AAEzE,aAAO,KAAK;AAAA,QACV,QAAQ,EAAE,KAAK;AAAA,QACf,MAAM,EAAE,KAAK,QAAQ;AAAA,QACrB,OAAO,EAAE,KAAK,SAAS;AAAA,QACvB;AAAA,QACA,MAAM,EAAE,KAAK,WAAW;AAAA,MAC1B,CAAC;AAED,UAAI,OAAO,EAAE,KAAK,YAAY,YAAY,EAAE,KAAK,SAAS,MAAM,UAAU;AACxE,gBAAQ,EAAE,KAAK,QAAQ,KAAK,UAAU,QAAQ,CAAC;AAAA,MACjD;AAAA,IACF;AAAA,EACF;AAEA,UAAQ,gBAAgB,MAAM,YAAY,CAAC,CAAC;AAC5C,SAAO;AACT;AAOA,eAAe,aACb,QACA,WAC8E;AAC9E,QAAM,UAAU,oBAAI,IAAgC;AACpD,MAAI,gBAAgB;AAEpB,QAAM,eAAe,MAAM;AAAA,IACzB;AAAA,IACA,SAAO,OAAO,QAAQ,GAAG;AAAA,IACzB,YAAY;AAAA,EACd;AAEA,WAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,UAAM,SAAS,aAAa,CAAC;AAC7B,QAAI,CAAC,OAAQ;AACb,UAAM,MAAM,UAAU,CAAC,KAAK;AAE5B,QAAI,OAAO,WAAW,aAAa;AACjC,cAAQ,IAAI,KAAK,OAAO,KAAK;AAAA,IAC/B,OAAO;AACL,YAAM,WAAW,OAAO,QAAQ,WAAW,OAAO,OAAO,MAAM;AAC/D,UAAI,SAAS,SAAS,KAAK,KAAK,SAAS,SAAS,MAAM,EAAG;AAC3D,cAAQ,IAAI,KAAK,IAAI,MAAM,QAAQ,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc;AAClC;AAIO,MAAM,aAAa;AAAA,EAGxB,YAAoB,UAA0B,cAAsB;AAAhD;AAA0B;AAAA,EAAuB;AAAA,EAF7D,YAAY,UAAU,kBAAkB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUhD,MAAc,OAA+B;AAC3C,QAAI,eAAe,KAAK,IAAI,IAAI,oBAAoB,iBAAiB;AACnE,UAAI,kBAAmB,cAAa,MAAM,iBAAiB;AAC3D,aAAO;AAAA,IACT;AAEA,QAAI,oBAAoB;AACtB,UAAI,kBAAmB,cAAa,MAAM,qCAAqC;AAC/E,aAAO;AAAA,IACT;AAEA,yBAAqB,KAAK,YAAY;AACtC,QAAI;AACF,aAAO,MAAM;AAAA,IACf,UAAE;AACA,2BAAqB;AAAA,IACvB;AAAA,EACF;AAAA,EAEA,MAAc,cAAsC;AAClD,QAAI,kBAAmB,cAAa,MAAM,mCAAmC;AAE7E,UAAM,cAAc,OAAO,KAAK,GAAG,KAAK,QAAQ,IAAI,KAAK,YAAY,EAAE,EAAE,SAAS,QAAQ;AAE1F,aAAS,UAAU,GAAG,UAAU,GAAG,WAAW;AAC5C,UAAI;AACF,cAAM,MAAM,MAAM,iBAAiB,kBAAkB;AAAA,UACnD,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,iBAAiB,SAAS,WAAW;AAAA,YACrC,gBAAgB;AAAA,YAChB,cAAc,KAAK;AAAA,UACrB;AAAA,UACA,MAAM;AAAA,UACN,WAAW;AAAA,QACb,CAAC;AAED,YAAI,CAAC,IAAI,IAAI;AACX,gBAAM,OAAO,MAAM,IAAI,KAAK,EAAE,MAAM,MAAM,EAAE;AAC5C,iBAAO,SAAS,gBAAgB,IAAI,MAAM,MAAM,IAAI,IAAI,QAAQ;AAEhE,cAAI,IAAI,WAAW,OAAO,IAAI,WAAW,KAAK;AAC5C,0BAAc;AACd,gCAAoB;AACpB,mBAAO;AAAA,UACT;AAEA,cAAI,IAAI,UAAU,OAAO,UAAU,GAAG;AACpC,kBAAM,MAAM,iBAAiB,OAAO,CAAC;AACrC;AAAA,UACF;AAEA,iBAAO;AAAA,QACT;AAEA,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,YAAI,CAAC,KAAK,cAAc;AACtB,iBAAO,SAAS,sCAAsC,QAAQ;AAC9D,iBAAO;AAAA,QACT;AAEA,sBAAc,KAAK;AACnB,4BAAoB,KAAK,IAAI,KAAK,KAAK,cAAc,QAAQ;AAC7D,eAAO;AAAA,MAET,SAAS,OAAO;AACd,cAAM,MAAM,cAAc,KAAK;AAC/B,eAAO,SAAS,uBAAuB,UAAU,CAAC,MAAM,IAAI,OAAO,IAAI,QAAQ;AAE/E,YAAI,IAAI,SAAS,UAAU,YAAY;AACrC,wBAAc;AACd,8BAAoB;AAAA,QACtB;AAEA,YAAI,UAAU,KAAK,IAAI,WAAW;AAChC,gBAAM,MAAM,iBAAiB,OAAO,CAAC;AACrC;AAAA,QACF;AAEA,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEQ,SAAS,KAAiD;AAChE,UAAM,IAAI,IAAI,MAAM,kDAAkD;AACtE,WAAO,IAAI,EAAE,KAAK,EAAE,CAAC,GAAI,IAAI,EAAE,CAAC,EAAG,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,QAAQ,KAAkC;AAC9C,UAAM,SAAS,KAAK,SAAS,GAAG;AAChC,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B,GAAG,EAAE;AAAA,IACrD;AAEA,UAAM,QAAQ,MAAM,KAAK,KAAK;AAC9B,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,kDAAkD;AAAA,IACpE;AAEA,QAAI,YAAoC;AAExC,aAAS,UAAU,GAAG,UAAU,OAAO,aAAa,WAAW;AAC7D,UAAI;AACF,cAAM,OAAO,MAAM,gBAAgB,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,SAAS;AAC/E,cAAM,CAAC,aAAa,cAAc,IAAI;AAEtC,cAAM,OAAO,cAAc,aAAa,OAAO,GAAG;AAClD,cAAM,WAAW,iBAAiB,gBAAgB,KAAK,MAAM;AAE7D,eAAO,EAAE,MAAM,UAAU,gBAAgB,KAAK,aAAa;AAAA,MAE7D,SAAS,OAAO;AACd,oBAAY,cAAc,KAAK;AAG/B,cAAM,SAAU,MAAsC;AACtD,YAAI,WAAW,KAAK;AAClB,gBAAM,QAAQ,OAAO,aAAa,OAAO,KAAK;AAC9C,iBAAO,WAAW,uBAAuB,UAAU,CAAC,IAAI,OAAO,WAAW,UAAU,KAAK,MAAM,QAAQ;AACvG,gBAAM,MAAM,KAAK;AACjB;AAAA,QACF;AAEA,YAAI,CAAC,UAAU,WAAW;AACxB,gBAAM,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,UAAU,OAAO;AAAA,QACpE;AAEA,YAAI,UAAU,OAAO,cAAc,GAAG;AACpC,gBAAM,QAAQ,OAAO,aAAa,OAAO,KAAK;AAC9C,iBAAO,WAAW,GAAG,UAAU,IAAI,KAAK,UAAU,OAAO,WAAW,UAAU,CAAC,IAAI,OAAO,WAAW,IAAI,QAAQ;AACjH,gBAAM,MAAM,KAAK;AAAA,QACnB;AAAA,MACF;AAAA,IACF;AAEA,UAAM,IAAI,MAAM,WAAW,WAAW,2CAA2C;AAAA,EACnF;AAAA,EAEA,MAAM,SAAS,MAA0D;AACvE,QAAI,KAAK,UAAU,OAAO,YAAY;AACpC,YAAM,UAAU,MAAM;AAAA,QACpB;AAAA,QACA,OAAK,KAAK,QAAQ,CAAC,EAAE,MAAM,OAAK,CAAU;AAAA,QAC1C,YAAY;AAAA,MACd;AACA,aAAO,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAE,CAAC,CAAC;AAAA,IACrD;AACA,YAAQ,MAAM,KAAK,cAAc,IAAI,GAAG;AAAA,EAC1C;AAAA,EAEA,MAAM,cACJ,MACA,gBAAgB,MAChB,iBAC0B;AAC1B,UAAM,aAAa,oBAAI,IAAgC;AACvD,QAAI,gBAAgB;AAEpB,UAAM,eAAe,KAAK,KAAK,KAAK,SAAS,OAAO,UAAU;AAC9D,WAAO,QAAQ,YAAY,KAAK,MAAM,aAAa,YAAY,qBAAqB,WAAW,kBAAkB,QAAQ;AAEzH,aAAS,WAAW,GAAG,WAAW,cAAc,YAAY;AAC1D,YAAM,WAAW,WAAW,OAAO;AACnC,YAAM,YAAY,KAAK,MAAM,UAAU,WAAW,OAAO,UAAU;AAEnE,aAAO,QAAQ,SAAS,WAAW,CAAC,IAAI,YAAY,KAAK,UAAU,MAAM,WAAW,QAAQ;AAE5F,YAAM,cAAc,MAAM,aAAa,MAAM,SAAS;AACtD,iBAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,mBAAW,IAAI,KAAK,MAAM;AAAA,MAC5B;AACA,uBAAiB,YAAY;AAE7B,UAAI;AACF,0BAAkB,WAAW,GAAG,cAAc,WAAW,IAAI;AAAA,MAC/D,SAAS,eAAe;AACtB,eAAO,SAAS,mCAAmC,aAAa,IAAI,QAAQ;AAAA,MAC9E;AAEA,aAAO,QAAQ,SAAS,WAAW,CAAC,cAAc,WAAW,IAAI,IAAI,KAAK,MAAM,KAAK,QAAQ;AAE7F,UAAI,WAAW,eAAe,GAAG;AAC/B,cAAM,MAAM,GAAG;AAAA,MACjB;AAAA,IACF;AAEA,WAAO,EAAE,SAAS,YAAY,kBAAkB,cAAc,YAAY,KAAK,QAAQ,cAAc;AAAA,EACvG;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/config/index.js
CHANGED
|
@@ -102,9 +102,8 @@ const SCRAPER = {
|
|
|
102
102
|
};
|
|
103
103
|
const REDDIT = {
|
|
104
104
|
BATCH_SIZE: 10,
|
|
105
|
-
MAX_WORDS_PER_POST:
|
|
106
|
-
MAX_WORDS_TOTAL:
|
|
107
|
-
FETCH_LIMIT_PER_POST: 500,
|
|
105
|
+
MAX_WORDS_PER_POST: 5e4,
|
|
106
|
+
MAX_WORDS_TOTAL: 5e5,
|
|
108
107
|
MIN_POSTS: 1,
|
|
109
108
|
MAX_POSTS: 50,
|
|
110
109
|
RETRY_COUNT: 5,
|
|
@@ -146,7 +145,7 @@ let cachedLlmExtraction = null;
|
|
|
146
145
|
function getLlmExtraction() {
|
|
147
146
|
if (cachedLlmExtraction) return cachedLlmExtraction;
|
|
148
147
|
cachedLlmExtraction = {
|
|
149
|
-
MODEL: process.env.LLM_EXTRACTION_MODEL || "
|
|
148
|
+
MODEL: process.env.LLM_EXTRACTION_MODEL || "gpt-5.4-mini",
|
|
150
149
|
BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1",
|
|
151
150
|
API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || "",
|
|
152
151
|
MAX_TOKENS: 8e3,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/config/index.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// Import version utilities (not re-exported - use directly from version.ts if needed externally)\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n * @param value - The string value to parse (from process.env)\n * @param defaultVal - Default value if parsing fails or value is undefined\n * @param min - Minimum allowed value (clamped if below)\n * @param max - Maximum allowed value (clamped if above)\n * @returns Parsed integer within bounds, or default value\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n \n const parsed = parseInt(value, 10);\n \n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n \n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n \n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n \n return parsed;\n}\n\n// ============================================================================\n// Reasoning Effort Validation\n// ============================================================================\n\nconst VALID_REASONING_EFFORTS = ['low', 'medium', 'high'] as const;\ntype ReasoningEffort = typeof VALID_REASONING_EFFORTS[number];\n\nfunction parseReasoningEffort(value: string | undefined): ReasoningEffort {\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'high';\n}\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function resetEnvCache(): void {\n cachedEnv = null;\n cachedResearch = null;\n cachedLlmExtraction = null;\n}\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// Research API Configuration\n// ============================================================================\n\ninterface ResearchConfig {\n readonly BASE_URL: string;\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly API_KEY: string;\n readonly TIMEOUT_MS: number;\n readonly REASONING_EFFORT: 'low' | 'medium' | 'high';\n readonly MAX_URLS: number;\n}\n\nlet cachedResearch: ResearchConfig | null = null;\n\nfunction getResearch(): ResearchConfig {\n if (cachedResearch) return cachedResearch;\n cachedResearch = {\n BASE_URL: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n MODEL: process.env.RESEARCH_MODEL || 'x-ai/grok-4-fast',\n FALLBACK_MODEL: process.env.RESEARCH_FALLBACK_MODEL || 'google/gemini-2.5-flash',\n API_KEY: process.env.OPENROUTER_API_KEY || '',\n TIMEOUT_MS: safeParseInt(process.env.API_TIMEOUT_MS, 1800000, 1000, 3600000),\n REASONING_EFFORT: parseReasoningEffort(process.env.DEFAULT_REASONING_EFFORT),\n MAX_URLS: safeParseInt(process.env.DEFAULT_MAX_URLS, 100, 10, 200),\n };\n return cachedResearch;\n}\n\n// Lazy proxy so existing code using RESEARCH.X still works\nexport const RESEARCH: ResearchConfig = new Proxy({} as ResearchConfig, {\n get(_target, prop: string) {\n return getResearch()[prop as keyof ResearchConfig];\n },\n});\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\n// Version is now automatically read from package.json via version.ts\n// No need to manually update version strings anymore!\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: !!LLM_EXTRACTION.API_KEY,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` and `search-reddit`.\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** The `use_llm` and `what_to_extract` features require `LLM_EXTRACTION_API_KEY` or `OPENROUTER_API_KEY`.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Scraper Configuration (Scrape.do implementation)\n// ============================================================================\n\n// ============================================================================\n// Concurrency Limits \u2014 tuned for 2-core deployments\n//\n// Bottleneck analysis (Node.js single-threaded event loop):\n// SEARCH: Pure I/O + tiny JSON parse (~5KB). High concurrency safe.\n// SCRAPER: I/O + Turndown HTML\u2192MD conversion (20-50ms/page, synchronous).\n// Too many concurrent = burst of responses blocks event loop.\n// 20 concurrent \u00D7 30ms avg = 600ms worst-case event loop stall.\n// REDDIT: I/O + moderate JSON. Reddit rate-limits at ~60 req/min.\n// LLM: I/O-only locally, but remote inference uses multiple cores per\n// request. Default 10 keeps remote server responsive.\n// ============================================================================\n\nexport const CONCURRENCY = {\n /** Serper API \u2014 tiny JSON responses, pure I/O, no CPU cost */\n SEARCH: 30,\n /** Scrape.do \u2014 HTML responses (50-500KB), Turndown conversion is CPU work */\n SCRAPER: 20,\n /** Reddit API \u2014 moderate payloads, aggressive rate limiting (60 req/min) */\n REDDIT: 10,\n /** LLM extraction \u2014 remote inference bottleneck. Tune via LLM_CONCURRENCY env */\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 10, 1, 50),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Research Compression Prefix/Suffix\n// ============================================================================\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 20_000,\n MAX_WORDS_TOTAL: 100_000,\n FETCH_LIMIT_PER_POST: 500,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n EXTRACTION_SUFFIX: `\n---\n\n\u26A0\uFE0F IMPORTANT: Extract and synthesize the key insights, opinions, and recommendations from these Reddit discussions. Focus on:\n- Common themes and consensus across posts\n- Specific recommendations with context\n- Contrasting viewpoints and debates\n- Real-world experiences and lessons learned\n- Technical details and implementation tips\n\nBe comprehensive but concise. Prioritize actionable insights.\n\n---`,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Extraction Model (uses OPENROUTER for scrape-links AI extraction)\n// ============================================================================\n\ntype LlmReasoningEffort = ReasoningEffort | 'none';\n\nfunction parseLlmReasoningEffort(value: string | undefined): LlmReasoningEffort {\n if (value === 'none') return 'none';\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'low';\n}\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n readonly MAX_TOKENS: number;\n readonly REASONING_EFFORT: LlmReasoningEffort;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n cachedLlmExtraction = {\n MODEL: process.env.LLM_EXTRACTION_MODEL || 'openai/gpt-oss-120b:nitro',\n BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || '',\n MAX_TOKENS: 8000,\n REASONING_EFFORT: parseLlmReasoningEffort(process.env.LLM_EXTRACTION_REASONING),\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n\n"],
|
|
5
|
-
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAgB3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAMA,MAAM,0BAA0B,CAAC,OAAO,UAAU,MAAM;AAGxD,SAAS,qBAAqB,OAA4C;AACxE,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAaA,IAAI,YAA8B;AAE3B,SAAS,gBAAsB;AACpC,cAAY;AACZ,mBAAiB;AACjB,wBAAsB;AACxB;AAEO,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,EAC5D;AACA,SAAO;AACT;AAgBA,IAAI,iBAAwC;AAE5C,SAAS,cAA8B;AACrC,MAAI,eAAgB,QAAO;AAC3B,mBAAiB;AAAA,IACf,UAAU,QAAQ,IAAI,uBAAuB;AAAA,IAC7C,OAAO,QAAQ,IAAI,kBAAkB;AAAA,IACrC,gBAAgB,QAAQ,IAAI,2BAA2B;AAAA,IACvD,SAAS,QAAQ,IAAI,sBAAsB;AAAA,IAC3C,YAAY,aAAa,QAAQ,IAAI,gBAAgB,MAAS,KAAM,IAAO;AAAA,IAC3E,kBAAkB,qBAAqB,QAAQ,IAAI,wBAAwB;AAAA,IAC3E,UAAU,aAAa,QAAQ,IAAI,kBAAkB,KAAK,IAAI,GAAG;AAAA,EACnE;AACA,SAAO;AACT;AAGO,MAAM,WAA2B,IAAI,MAAM,CAAC,GAAqB;AAAA,EACtE,IAAI,SAAS,MAAc;AACzB,WAAO,YAAY,EAAE,IAA4B;AAAA,EACnD;AACF,CAAC;AAQM,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,CAAC,CAAC,eAAe;AAAA,EAClC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAmBO,MAAM,cAAc;AAAA;AAAA,EAEzB,QAAQ;AAAA;AAAA,EAER,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA;AAAA,EAER,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,EAAE;AACrE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAUO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,
|
|
4
|
+
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// Import version utilities (not re-exported - use directly from version.ts if needed externally)\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n * @param value - The string value to parse (from process.env)\n * @param defaultVal - Default value if parsing fails or value is undefined\n * @param min - Minimum allowed value (clamped if below)\n * @param max - Maximum allowed value (clamped if above)\n * @returns Parsed integer within bounds, or default value\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n \n const parsed = parseInt(value, 10);\n \n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n \n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n \n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n \n return parsed;\n}\n\n// ============================================================================\n// Reasoning Effort Validation\n// ============================================================================\n\nconst VALID_REASONING_EFFORTS = ['low', 'medium', 'high'] as const;\ntype ReasoningEffort = typeof VALID_REASONING_EFFORTS[number];\n\nfunction parseReasoningEffort(value: string | undefined): ReasoningEffort {\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'high';\n}\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function resetEnvCache(): void {\n cachedEnv = null;\n cachedResearch = null;\n cachedLlmExtraction = null;\n}\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// Research API Configuration\n// ============================================================================\n\ninterface ResearchConfig {\n readonly BASE_URL: string;\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly API_KEY: string;\n readonly TIMEOUT_MS: number;\n readonly REASONING_EFFORT: 'low' | 'medium' | 'high';\n readonly MAX_URLS: number;\n}\n\nlet cachedResearch: ResearchConfig | null = null;\n\nfunction getResearch(): ResearchConfig {\n if (cachedResearch) return cachedResearch;\n cachedResearch = {\n BASE_URL: process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n MODEL: process.env.RESEARCH_MODEL || 'x-ai/grok-4-fast',\n FALLBACK_MODEL: process.env.RESEARCH_FALLBACK_MODEL || 'google/gemini-2.5-flash',\n API_KEY: process.env.OPENROUTER_API_KEY || '',\n TIMEOUT_MS: safeParseInt(process.env.API_TIMEOUT_MS, 1800000, 1000, 3600000),\n REASONING_EFFORT: parseReasoningEffort(process.env.DEFAULT_REASONING_EFFORT),\n MAX_URLS: safeParseInt(process.env.DEFAULT_MAX_URLS, 100, 10, 200),\n };\n return cachedResearch;\n}\n\n// Lazy proxy so existing code using RESEARCH.X still works\nexport const RESEARCH: ResearchConfig = new Proxy({} as ResearchConfig, {\n get(_target, prop: string) {\n return getResearch()[prop as keyof ResearchConfig];\n },\n});\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\n// Version is now automatically read from package.json via version.ts\n// No need to manually update version strings anymore!\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: !!LLM_EXTRACTION.API_KEY,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` and `search-reddit`.\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** The `use_llm` and `what_to_extract` features require `LLM_EXTRACTION_API_KEY` or `OPENROUTER_API_KEY`.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Scraper Configuration (Scrape.do implementation)\n// ============================================================================\n\n// ============================================================================\n// Concurrency Limits \u2014 tuned for 2-core deployments\n//\n// Bottleneck analysis (Node.js single-threaded event loop):\n// SEARCH: Pure I/O + tiny JSON parse (~5KB). High concurrency safe.\n// SCRAPER: I/O + Turndown HTML\u2192MD conversion (20-50ms/page, synchronous).\n// Too many concurrent = burst of responses blocks event loop.\n// 20 concurrent \u00D7 30ms avg = 600ms worst-case event loop stall.\n// REDDIT: I/O + moderate JSON. Reddit rate-limits at ~60 req/min.\n// LLM: I/O-only locally, but remote inference uses multiple cores per\n// request. Default 10 keeps remote server responsive.\n// ============================================================================\n\nexport const CONCURRENCY = {\n /** Serper API \u2014 tiny JSON responses, pure I/O, no CPU cost */\n SEARCH: 30,\n /** Scrape.do \u2014 HTML responses (50-500KB), Turndown conversion is CPU work */\n SCRAPER: 20,\n /** Reddit API \u2014 moderate payloads, aggressive rate limiting (60 req/min) */\n REDDIT: 10,\n /** LLM extraction \u2014 remote inference bottleneck. Tune via LLM_CONCURRENCY env */\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 10, 1, 50),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Research Compression Prefix/Suffix\n// ============================================================================\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 50_000,\n MAX_WORDS_TOTAL: 500_000,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n EXTRACTION_SUFFIX: `\n---\n\n\u26A0\uFE0F IMPORTANT: Extract and synthesize the key insights, opinions, and recommendations from these Reddit discussions. Focus on:\n- Common themes and consensus across posts\n- Specific recommendations with context\n- Contrasting viewpoints and debates\n- Real-world experiences and lessons learned\n- Technical details and implementation tips\n\nBe comprehensive but concise. Prioritize actionable insights.\n\n---`,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Extraction Model (uses OPENROUTER for scrape-links AI extraction)\n// ============================================================================\n\ntype LlmReasoningEffort = ReasoningEffort | 'none';\n\nfunction parseLlmReasoningEffort(value: string | undefined): LlmReasoningEffort {\n if (value === 'none') return 'none';\n if (value && VALID_REASONING_EFFORTS.includes(value as ReasoningEffort)) {\n return value as ReasoningEffort;\n }\n return 'low';\n}\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n readonly MAX_TOKENS: number;\n readonly REASONING_EFFORT: LlmReasoningEffort;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n cachedLlmExtraction = {\n MODEL: process.env.LLM_EXTRACTION_MODEL || 'gpt-5.4-mini',\n BASE_URL: process.env.LLM_EXTRACTION_BASE_URL || process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1',\n API_KEY: process.env.LLM_EXTRACTION_API_KEY || process.env.OPENROUTER_API_KEY || '',\n MAX_TOKENS: 8000,\n REASONING_EFFORT: parseLlmReasoningEffort(process.env.LLM_EXTRACTION_REASONING),\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n\n"],
|
|
5
|
+
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAgB3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAMA,MAAM,0BAA0B,CAAC,OAAO,UAAU,MAAM;AAGxD,SAAS,qBAAqB,OAA4C;AACxE,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAaA,IAAI,YAA8B;AAE3B,SAAS,gBAAsB;AACpC,cAAY;AACZ,mBAAiB;AACjB,wBAAsB;AACxB;AAEO,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,EAC5D;AACA,SAAO;AACT;AAgBA,IAAI,iBAAwC;AAE5C,SAAS,cAA8B;AACrC,MAAI,eAAgB,QAAO;AAC3B,mBAAiB;AAAA,IACf,UAAU,QAAQ,IAAI,uBAAuB;AAAA,IAC7C,OAAO,QAAQ,IAAI,kBAAkB;AAAA,IACrC,gBAAgB,QAAQ,IAAI,2BAA2B;AAAA,IACvD,SAAS,QAAQ,IAAI,sBAAsB;AAAA,IAC3C,YAAY,aAAa,QAAQ,IAAI,gBAAgB,MAAS,KAAM,IAAO;AAAA,IAC3E,kBAAkB,qBAAqB,QAAQ,IAAI,wBAAwB;AAAA,IAC3E,UAAU,aAAa,QAAQ,IAAI,kBAAkB,KAAK,IAAI,GAAG;AAAA,EACnE;AACA,SAAO;AACT;AAGO,MAAM,WAA2B,IAAI,MAAM,CAAC,GAAqB;AAAA,EACtE,IAAI,SAAS,MAAc;AACzB,WAAO,YAAY,EAAE,IAA4B;AAAA,EACnD;AACF,CAAC;AAQM,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,CAAC,CAAC,eAAe;AAAA,EAClC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAmBO,MAAM,cAAc;AAAA;AAAA,EAEzB,QAAQ;AAAA;AAAA,EAER,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA;AAAA,EAER,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,EAAE;AACrE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAUO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,WAAW;AAAA,EACX,aAAa;AAAA,EACb,cAAc,CAAC,KAAM,KAAM,KAAM,MAAO,IAAK;AAAA,EAC7C,mBAAmB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAarB;AAMO,MAAM,cAAsC;AAAA,EACjD,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,IAAI;AACN;AAQA,SAAS,wBAAwB,OAA+C;AAC9E,MAAI,UAAU,OAAQ,QAAO;AAC7B,MAAI,SAAS,wBAAwB,SAAS,KAAwB,GAAG;AACvE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAUA,IAAI,sBAAkD;AAEtD,SAAS,mBAAwC;AAC/C,MAAI,oBAAqB,QAAO;AAChC,wBAAsB;AAAA,IACpB,OAAO,QAAQ,IAAI,wBAAwB;AAAA,IAC3C,UAAU,QAAQ,IAAI,2BAA2B,QAAQ,IAAI,uBAAuB;AAAA,IACpF,SAAS,QAAQ,IAAI,0BAA0B,QAAQ,IAAI,sBAAsB;AAAA,IACjF,YAAY;AAAA,IACZ,kBAAkB,wBAAwB,QAAQ,IAAI,wBAAwB;AAAA,EAChF;AACA,SAAO;AACT;AAEO,MAAM,iBAAsC,IAAI,MAAM,CAAC,GAA0B;AAAA,EACtF,IAAI,SAAS,MAAc;AACzB,WAAO,iBAAiB,EAAE,IAAiC;AAAA,EAC7D;AACF,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,43 +1,32 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
const searchRedditParamsSchema = z.object({
|
|
3
3
|
queries: z.array(
|
|
4
|
-
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A
|
|
5
|
-
).min(1, { message: "search-reddit: At least 1 query
|
|
6
|
-
'Array of 1\u201350 Reddit search queries. RECOMMENDED: 3\u20137 for solid consensus ranking (results are aggregated across queries and URLs appearing in multiple searches are flagged as high-confidence). Each query should target a different angle: direct topic, "best of" lists, comparisons, pain points, subreddit-specific (e.g., "r/programming topic"), or year-specific. Single-query lookups work but produce no consensus signal. More queries = better signal-to-noise.'
|
|
7
|
-
),
|
|
8
|
-
date_after: z.string().optional().describe("Optional lower date bound in YYYY-MM-DD format.")
|
|
9
|
-
}).strict();
|
|
10
|
-
const getRedditPostParamsSchema = z.object({
|
|
11
|
-
urls: z.array(
|
|
12
|
-
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe('A full Reddit post URL (e.g., "https://www.reddit.com/r/subreddit/comments/id/title/"). Must be a valid URL pointing to a Reddit post. Typically sourced from search-reddit results.')
|
|
13
|
-
).min(1, { message: "get-reddit-post: At least 1 Reddit post URL is required" }).max(50, { message: "get-reddit-post: Maximum 50 Reddit post URLs allowed" }).describe("Array of 1\u201350 Reddit post URLs. RECOMMENDED: 2\u201310 for comparative research across multiple discussions. Supply URLs from search-reddit output or any Reddit post links. Each post gets up to 20K words of threaded comments within a 100K total word budget. More URLs = broader community perspective but less depth per post."),
|
|
14
|
-
fetch_comments: z.boolean().default(true).describe("Fetch threaded comment trees for each post. Defaults to true. Comments include author, score, OP markers, and nested replies up to the word budget. Set false only when you need post titles/selftext without community discussion."),
|
|
15
|
-
use_llm: z.boolean().default(false).describe("Run AI synthesis over fetched Reddit content. Defaults to false (recommended) \u2014 raw threaded comments preserve the full community voice. Only set true when you have lots of posts and individual comments don't matter, e.g., scanning 20+ threads for a quick consensus summary."),
|
|
16
|
-
what_to_extract: z.string().max(1e3, { message: "get-reddit-post: what_to_extract is too long" }).optional().describe("Optional extraction instructions used only when use_llm=true.")
|
|
4
|
+
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A Reddit search query. "site:reddit.com" is appended automatically.')
|
|
5
|
+
).min(1, { message: "search-reddit: At least 1 query required" }).max(100, { message: "search-reddit: Maximum 100 queries allowed" }).describe("Search queries for Reddit. Each query is automatically scoped to reddit.com via Google. Returns deduplicated Reddit post URLs.")
|
|
17
6
|
}).strict();
|
|
18
7
|
const searchRedditOutputSchema = z.object({
|
|
19
|
-
content: z.string().describe("
|
|
8
|
+
content: z.string().describe("Newline-separated list of unique Reddit URLs."),
|
|
20
9
|
metadata: z.object({
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
10
|
+
total_items: z.number().int().nonnegative().describe("Number of queries executed."),
|
|
11
|
+
successful: z.number().int().nonnegative().describe("Queries that returned results."),
|
|
12
|
+
failed: z.number().int().nonnegative().describe("Queries that failed."),
|
|
13
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds.")
|
|
14
|
+
}).strict()
|
|
15
|
+
}).strict();
|
|
16
|
+
const getRedditPostParamsSchema = z.object({
|
|
17
|
+
urls: z.array(
|
|
18
|
+
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe("A Reddit post URL.")
|
|
19
|
+
).min(1, { message: "get-reddit-post: At least 1 URL required" }).max(100, { message: "get-reddit-post: Maximum 100 URLs allowed" }).describe("Reddit post URLs to fetch. Each post is returned with its full threaded comment tree.")
|
|
25
20
|
}).strict();
|
|
26
21
|
const getRedditPostOutputSchema = z.object({
|
|
27
|
-
content: z.string().describe("
|
|
22
|
+
content: z.string().describe("Raw Reddit posts with threaded comments including author, score, and OP markers."),
|
|
28
23
|
metadata: z.object({
|
|
29
|
-
|
|
30
|
-
successful: z.number().int().nonnegative().describe("
|
|
31
|
-
failed: z.number().int().nonnegative().describe("
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
llm_requested: z.boolean().describe("Whether LLM extraction was requested."),
|
|
36
|
-
llm_available: z.boolean().describe("Whether LLM extraction was actually available at runtime."),
|
|
37
|
-
llm_failures: z.number().int().nonnegative().describe("Count of posts where optional LLM extraction failed or was skipped."),
|
|
38
|
-
total_batches: z.number().int().nonnegative().describe("Number of Reddit API batches executed."),
|
|
39
|
-
rate_limit_hits: z.number().int().nonnegative().describe("Observed Reddit API rate-limit retries during the batch.")
|
|
40
|
-
}).strict().describe("Structured metadata about the Reddit post fetch batch.")
|
|
24
|
+
total_items: z.number().int().nonnegative().describe("Number of URLs processed."),
|
|
25
|
+
successful: z.number().int().nonnegative().describe("Posts fetched successfully."),
|
|
26
|
+
failed: z.number().int().nonnegative().describe("Posts that failed to fetch."),
|
|
27
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
28
|
+
rate_limit_hits: z.number().int().nonnegative().describe("Reddit API rate-limit retries.")
|
|
29
|
+
}).strict()
|
|
41
30
|
}).strict();
|
|
42
31
|
export {
|
|
43
32
|
getRedditPostOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit\n// ============================================================================\n\nexport const searchRedditParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'search-reddit: Query cannot be empty' })\n .describe('A Reddit search query. \"site:reddit.com\" is appended automatically.'),\n )\n .min(1, { message: 'search-reddit: At least 1 query required' })\n .max(100, { message: 'search-reddit: Maximum 100 queries allowed' })\n .describe('Search queries for Reddit. Each query is automatically scoped to reddit.com via Google. Returns deduplicated Reddit post URLs.'),\n}).strict();\n\nexport type SearchRedditParams = z.infer<typeof searchRedditParamsSchema>;\n\nexport const searchRedditOutputSchema = z.object({\n content: z\n .string()\n .describe('Newline-separated list of unique Reddit URLs.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n }).strict(),\n}).strict();\n\nexport type SearchRedditOutput = z.infer<typeof searchRedditOutputSchema>;\n\n// ============================================================================\n// get-reddit-post\n// ============================================================================\n\nexport const getRedditPostParamsSchema = z.object({\n urls: z\n .array(\n z.string()\n .url({ message: 'get-reddit-post: Each URL must be valid' })\n .describe('A Reddit post URL.'),\n )\n .min(1, { message: 'get-reddit-post: At least 1 URL required' })\n .max(100, { message: 'get-reddit-post: Maximum 100 URLs allowed' })\n .describe('Reddit post URLs to fetch. Each post is returned with its full threaded comment tree.'),\n}).strict();\n\nexport type GetRedditPostParams = z.infer<typeof getRedditPostParamsSchema>;\n\nexport const getRedditPostOutputSchema = z.object({\n content: z\n .string()\n .describe('Raw Reddit posts with threaded comments including author, score, and OP markers.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('Posts fetched successfully.'),\n failed: z.number().int().nonnegative().describe('Posts that failed to fetch.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n rate_limit_hits: z.number().int().nonnegative().describe('Reddit API rate-limit retries.'),\n }).strict(),\n}).strict();\n\nexport type GetRedditPostOutput = z.infer<typeof getRedditPostOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,qEAAqE;AAAA,EACnF,EACC,IAAI,GAAG,EAAE,SAAS,2CAA2C,CAAC,EAC9D,IAAI,KAAK,EAAE,SAAS,6CAA6C,CAAC,EAClE,SAAS,gIAAgI;AAC9I,CAAC,EAAE,OAAO;AAIH,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN,OAAO,EACP,SAAS,+CAA+C;AAAA,EAC3D,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,EAC/F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;AAQH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH;AAAA,IACC,EAAE,OAAO,EACN,IAAI,EAAE,SAAS,0CAA0C,CAAC,EAC1D,SAAS,oBAAoB;AAAA,EAClC,EACC,IAAI,GAAG,EAAE,SAAS,2CAA2C,CAAC,EAC9D,IAAI,KAAK,EAAE,SAAS,4CAA4C,CAAC,EACjE,SAAS,uFAAuF;AACrG,CAAC,EAAE,OAAO;AAIH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,SAAS,EACN,OAAO,EACP,SAAS,kFAAkF;AAAA,EAC9F,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IACjF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAC7E,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,iBAAiB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,EAC3F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,29 +1,21 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
const urlSchema = z.string(
|
|
2
|
+
const urlSchema = z.string().url({ message: "scrape-links: Invalid URL format" }).refine(
|
|
3
3
|
(url) => url.startsWith("http://") || url.startsWith("https://"),
|
|
4
|
-
{ message: "scrape-links: URL must use http:// or https://
|
|
5
|
-
).describe("A fully-qualified HTTP or HTTPS URL to
|
|
6
|
-
const
|
|
7
|
-
urls: z.array(urlSchema, {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
timeout: z.number({ error: "scrape-links: Timeout must be a number" }).min(5, { message: "scrape-links: Timeout must be at least 5 seconds" }).max(120, { message: "scrape-links: Timeout cannot exceed 120 seconds" }).default(30).describe("Timeout in seconds for each URL"),
|
|
11
|
-
use_llm: z.boolean({ error: "scrape-links: use_llm must be a boolean" }).default(true).describe("AI extraction enabled by default (requires OPENROUTER_API_KEY). Auto-filters nav/ads/footers, extracts ONLY what you specify. Set false only for raw HTML debugging."),
|
|
12
|
-
what_to_extract: z.string().max(1e3, { message: "scrape-links: Extraction instructions too long (max 1000 characters)" }).optional().describe('Extraction instructions for AI. Will be wrapped with compression prefix+suffix automatically. Formula: "Extract [target1] | [target2] | [target3] with focus on [aspect1], [aspect2]". Min 3 targets with | separator. Be specific (pricing tiers not pricing). Aim 5-10 targets.')
|
|
13
|
-
};
|
|
14
|
-
const scrapeLinksParamsSchema = z.object(scrapeLinksParamsShape).strict();
|
|
4
|
+
{ message: "scrape-links: URL must use http:// or https://" }
|
|
5
|
+
).describe("A fully-qualified HTTP or HTTPS URL to scrape.");
|
|
6
|
+
const scrapeLinksParamsSchema = z.object({
|
|
7
|
+
urls: z.array(urlSchema).min(1, { message: "scrape-links: At least 1 URL required" }).max(100, { message: "scrape-links: Maximum 100 URLs allowed" }).describe("Web page URLs to scrape and extract content from."),
|
|
8
|
+
extract: z.string().min(5, { message: "scrape-links: extract must be at least 5 characters" }).max(1e3, { message: "scrape-links: extract too long (max 1000 chars)" }).describe('What to pull from each page. The LLM reads the scraped content and returns only what you specify. Be specific: "pricing tiers | free tier limits | enterprise contact info" not "pricing".')
|
|
9
|
+
}).strict();
|
|
15
10
|
const scrapeLinksOutputSchema = z.object({
|
|
16
|
-
content: z.string().describe("
|
|
11
|
+
content: z.string().describe("LLM-extracted content from scraped pages per the extract instructions."),
|
|
17
12
|
metadata: z.object({
|
|
18
|
-
|
|
19
|
-
successful: z.number().int().nonnegative().describe("
|
|
20
|
-
failed: z.number().int().nonnegative().describe("
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
total_token_budget: z.number().int().nonnegative().optional().describe("Overall token budget available for extraction."),
|
|
25
|
-
batches_processed: z.number().int().nonnegative().optional().describe("Number of scrape batches executed.")
|
|
26
|
-
}).strict().describe("Structured metadata about the scrape batch.")
|
|
13
|
+
total_items: z.number().int().nonnegative().describe("Number of URLs processed."),
|
|
14
|
+
successful: z.number().int().nonnegative().describe("URLs fetched successfully."),
|
|
15
|
+
failed: z.number().int().nonnegative().describe("URLs that failed."),
|
|
16
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
17
|
+
total_credits: z.number().int().nonnegative().describe("External scraping credits consumed.")
|
|
18
|
+
}).strict()
|
|
27
19
|
}).strict();
|
|
28
20
|
export {
|
|
29
21
|
scrapeLinksOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/scrape-links.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .max(100, { message: 'scrape-links: Maximum 100 URLs allowed' })\n .describe('Web page URLs to scrape and extract content from.'),\n extract: z\n .string()\n .min(5, { message: 'scrape-links: extract must be at least 5 characters' })\n .max(1000, { message: 'scrape-links: extract too long (max 1000 chars)' })\n .describe('What to pull from each page. The LLM reads the scraped content and returns only what you specify. Be specific: \"pricing tiers | free tier limits | enterprise contact info\" not \"pricing\".'),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n content: z\n .string()\n .describe('LLM-extracted content from scraped pages per the extract instructions.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,IAAI,KAAK,EAAE,SAAS,yCAAyC,CAAC,EAC9D,SAAS,mDAAmD;AAAA,EAC/D,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sDAAsD,CAAC,EACzE,IAAI,KAAM,EAAE,SAAS,kDAAkD,CAAC,EACxE,SAAS,4LAA4L;AAC1M,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,SAAS,EACN,OAAO,EACP,SAAS,wEAAwE;AAAA,EACpF,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,31 +1,27 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
const
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
).
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
}).
|
|
9
|
-
const webSearchParamsShape = {
|
|
10
|
-
keywords: keywordsSchema
|
|
11
|
-
};
|
|
12
|
-
const webSearchParamsSchema = z.object(webSearchParamsShape).strict();
|
|
2
|
+
const webSearchParamsSchema = z.object({
|
|
3
|
+
queries: z.array(
|
|
4
|
+
z.string().min(1, { message: "web-search: Query cannot be empty" }).max(500, { message: "web-search: Query too long (max 500 chars)" }).describe("A single Google search query. Each query runs as a separate parallel search.")
|
|
5
|
+
).min(1, { message: "web-search: At least 1 query required" }).max(100, { message: "web-search: Maximum 100 queries allowed" }).describe("Search queries to run in parallel via Google. More queries = broader coverage and stronger consensus signals across results."),
|
|
6
|
+
extract: z.string().min(5, { message: "web-search: extract must be at least 5 characters" }).max(500, { message: "web-search: extract too long (max 500 chars)" }).describe('What you are looking for. The LLM classifies each result by relevance and generates a synthesis. Be specific: "TypeScript MCP server frameworks with OAuth support" not "MCP servers".'),
|
|
7
|
+
raw: z.boolean().default(false).describe("Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.")
|
|
8
|
+
}).strict();
|
|
13
9
|
const webSearchOutputSchema = z.object({
|
|
14
|
-
content: z.string().describe("
|
|
10
|
+
content: z.string().describe("Markdown report with tiered results (LLM mode) or ranked URL list (raw mode)."),
|
|
15
11
|
metadata: z.object({
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
12
|
+
total_items: z.number().int().nonnegative().describe("Number of queries executed."),
|
|
13
|
+
successful: z.number().int().nonnegative().describe("Queries that returned results."),
|
|
14
|
+
failed: z.number().int().nonnegative().describe("Queries that failed."),
|
|
15
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
16
|
+
llm_classified: z.boolean().describe("Whether LLM classification was applied."),
|
|
17
|
+
llm_error: z.string().optional().describe("LLM error if classification failed and fell back to raw."),
|
|
22
18
|
coverage_summary: z.array(z.object({
|
|
23
|
-
keyword: z.string().describe("The search
|
|
24
|
-
result_count: z.number().int().nonnegative().describe("
|
|
25
|
-
top_url: z.string().optional().describe("Domain of the top
|
|
26
|
-
})).optional().describe("Per-
|
|
27
|
-
low_yield_keywords: z.array(z.string()).optional().describe("
|
|
28
|
-
}).strict()
|
|
19
|
+
keyword: z.string().describe("The search query."),
|
|
20
|
+
result_count: z.number().int().nonnegative().describe("Results returned for this query."),
|
|
21
|
+
top_url: z.string().optional().describe("Domain of the top result.")
|
|
22
|
+
})).optional().describe("Per-query result counts and top URLs."),
|
|
23
|
+
low_yield_keywords: z.array(z.string()).optional().describe("Queries that produced 0-1 results.")
|
|
24
|
+
}).strict()
|
|
29
25
|
}).strict();
|
|
30
26
|
export {
|
|
31
27
|
webSearchOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/web-search.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .max(500, { message: 'web-search: Query too long (max 500 chars)' })\n .describe('A single Google search query. Each query runs as a separate parallel search.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .max(100, { message: 'web-search: Maximum 100 queries allowed' })\n .describe('Search queries to run in parallel via Google. More queries = broader coverage and stronger consensus signals across results.'),\n extract: z\n .string()\n .min(5, { message: 'web-search: extract must be at least 5 characters' })\n .max(500, { message: 'web-search: extract too long (max 500 chars)' })\n .describe('What you are looking for. The LLM classifies each result by relevance and generates a synthesis. Be specific: \"TypeScript MCP server frameworks with OAuth support\" not \"MCP servers\".'),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n content: z\n .string()\n .describe('Markdown report with tiered results (LLM mode) or ranked URL list (raw mode).'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n coverage_summary: z\n .array(z.object({\n keyword: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_keywords: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,IAAI,KAAK,EAAE,SAAS,6CAA6C,CAAC,EAClE,SAAS,8EAA8E;AAAA,EAC5F,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,IAAI,KAAK,EAAE,SAAS,0CAA0C,CAAC,EAC/D,SAAS,8HAA8H;AAAA,EAC1I,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,oDAAoD,CAAC,EACvE,IAAI,KAAK,EAAE,SAAS,+CAA+C,CAAC,EACpE,SAAS,wLAAwL;AAAA,EACpM,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAClH,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN,OAAO,EACP,SAAS,+EAA+E;AAAA,EAC3F,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,SAAS,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAChD,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,oBAAoB,EACjB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|