mcp-researchpowerpack-http 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-use.json +2 -2
- package/dist/src/schemas/reddit.js +20 -23
- package/dist/src/schemas/reddit.js.map +2 -2
- package/dist/src/schemas/scrape-links.js +12 -16
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/schemas/web-search.js +18 -22
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/tools/reddit.js +38 -109
- package/dist/src/tools/reddit.js.map +2 -2
- package/dist/src/tools/scrape.js +5 -8
- package/dist/src/tools/scrape.js.map +2 -2
- package/dist/src/tools/search.js +15 -17
- package/dist/src/tools/search.js.map +2 -2
- package/package.json +1 -1
package/dist/mcp-use.json
CHANGED
|
@@ -1,35 +1,32 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
const searchRedditParamsSchema = z.object({
|
|
3
3
|
queries: z.array(
|
|
4
|
-
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A Reddit search query.
|
|
5
|
-
).min(1, { message: "search-reddit: At least 1 query
|
|
6
|
-
}).strict();
|
|
7
|
-
const getRedditPostParamsSchema = z.object({
|
|
8
|
-
urls: z.array(
|
|
9
|
-
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe('A full Reddit post URL (e.g., "https://www.reddit.com/r/subreddit/comments/id/title/").')
|
|
10
|
-
).min(1, { message: "get-reddit-post: At least 1 Reddit post URL is required" }).max(50, { message: "get-reddit-post: Maximum 50 Reddit post URLs allowed" }).describe("Array of 1-50 Reddit post URLs. Each post is fetched with full comment trees, then the LLM extracts insights per what_to_extract. Best used after search-reddit."),
|
|
11
|
-
fetch_comments: z.boolean().default(true).describe("Fetch threaded comment trees for each post. Defaults to true. Comments include author, score, OP markers, and nested replies. Set false only when you need post titles/selftext without community discussion."),
|
|
12
|
-
what_to_extract: z.string({ error: "get-reddit-post: what_to_extract is required" }).min(5, { message: "get-reddit-post: what_to_extract must be at least 5 characters" }).max(1e3, { message: "get-reddit-post: what_to_extract is too long (max 1000 characters)" }).describe('REQUIRED. Extraction instructions for the LLM. Describes what insights, opinions, or data to pull from each post and its comments. Use pipe separators for multiple targets: "Extract recommendations | pain points | consensus on best practices | specific tools mentioned".')
|
|
4
|
+
z.string().min(1, { message: "search-reddit: Query cannot be empty" }).describe('A Reddit search query. "site:reddit.com" is appended automatically.')
|
|
5
|
+
).min(1, { message: "search-reddit: At least 1 query required" }).max(100, { message: "search-reddit: Maximum 100 queries allowed" }).describe("Search queries for Reddit. Each query is automatically scoped to reddit.com via Google. Returns deduplicated Reddit post URLs.")
|
|
13
6
|
}).strict();
|
|
14
7
|
const searchRedditOutputSchema = z.object({
|
|
15
|
-
content: z.string().describe("Newline-separated list of unique Reddit URLs
|
|
8
|
+
content: z.string().describe("Newline-separated list of unique Reddit URLs."),
|
|
16
9
|
metadata: z.object({
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
total_items: z.number().int().nonnegative().describe("Number of queries executed."),
|
|
11
|
+
successful: z.number().int().nonnegative().describe("Queries that returned results."),
|
|
12
|
+
failed: z.number().int().nonnegative().describe("Queries that failed."),
|
|
13
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds.")
|
|
14
|
+
}).strict()
|
|
15
|
+
}).strict();
|
|
16
|
+
const getRedditPostParamsSchema = z.object({
|
|
17
|
+
urls: z.array(
|
|
18
|
+
z.string().url({ message: "get-reddit-post: Each URL must be valid" }).describe("A Reddit post URL.")
|
|
19
|
+
).min(1, { message: "get-reddit-post: At least 1 URL required" }).max(100, { message: "get-reddit-post: Maximum 100 URLs allowed" }).describe("Reddit post URLs to fetch. Each post is returned with its full threaded comment tree.")
|
|
20
20
|
}).strict();
|
|
21
21
|
const getRedditPostOutputSchema = z.object({
|
|
22
|
-
content: z.string().describe("
|
|
22
|
+
content: z.string().describe("Raw Reddit posts with threaded comments including author, score, and OP markers."),
|
|
23
23
|
metadata: z.object({
|
|
24
|
-
|
|
25
|
-
successful: z.number().int().nonnegative().describe("
|
|
26
|
-
failed: z.number().int().nonnegative().describe("
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
total_batches: z.number().int().nonnegative().describe("Number of Reddit API batches executed."),
|
|
31
|
-
rate_limit_hits: z.number().int().nonnegative().describe("Observed Reddit API rate-limit retries during the batch.")
|
|
32
|
-
}).strict().describe("Metadata about the Reddit post fetch and extraction.")
|
|
24
|
+
total_items: z.number().int().nonnegative().describe("Number of URLs processed."),
|
|
25
|
+
successful: z.number().int().nonnegative().describe("Posts fetched successfully."),
|
|
26
|
+
failed: z.number().int().nonnegative().describe("Posts that failed to fetch."),
|
|
27
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
28
|
+
rate_limit_hits: z.number().int().nonnegative().describe("Reddit API rate-limit retries.")
|
|
29
|
+
}).strict()
|
|
33
30
|
}).strict();
|
|
34
31
|
export {
|
|
35
32
|
getRedditPostOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\n// ============================================================================\n// search-reddit\n// ============================================================================\n\nexport const searchRedditParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'search-reddit: Query cannot be empty' })\n .describe('A Reddit search query. \"site:reddit.com\" is appended automatically.'),\n )\n .min(1, { message: 'search-reddit: At least 1 query required' })\n .max(100, { message: 'search-reddit: Maximum 100 queries allowed' })\n .describe('Search queries for Reddit. Each query is automatically scoped to reddit.com via Google. Returns deduplicated Reddit post URLs.'),\n}).strict();\n\nexport type SearchRedditParams = z.infer<typeof searchRedditParamsSchema>;\n\nexport const searchRedditOutputSchema = z.object({\n content: z\n .string()\n .describe('Newline-separated list of unique Reddit URLs.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n }).strict(),\n}).strict();\n\nexport type SearchRedditOutput = z.infer<typeof searchRedditOutputSchema>;\n\n// ============================================================================\n// get-reddit-post\n// ============================================================================\n\nexport const getRedditPostParamsSchema = z.object({\n urls: z\n .array(\n z.string()\n .url({ message: 'get-reddit-post: Each URL must be valid' })\n .describe('A Reddit post URL.'),\n )\n .min(1, { message: 'get-reddit-post: At least 1 URL required' })\n .max(100, { message: 'get-reddit-post: Maximum 100 URLs allowed' })\n .describe('Reddit post URLs to fetch. Each post is returned with its full threaded comment tree.'),\n}).strict();\n\nexport type GetRedditPostParams = z.infer<typeof getRedditPostParamsSchema>;\n\nexport const getRedditPostOutputSchema = z.object({\n content: z\n .string()\n .describe('Raw Reddit posts with threaded comments including author, score, and OP markers.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('Posts fetched successfully.'),\n failed: z.number().int().nonnegative().describe('Posts that failed to fetch.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n rate_limit_hits: z.number().int().nonnegative().describe('Reddit API rate-limit retries.'),\n }).strict(),\n}).strict();\n\nexport type GetRedditPostOutput = z.infer<typeof getRedditPostOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAMX,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,qEAAqE;AAAA,EACnF,EACC,IAAI,GAAG,EAAE,SAAS,2CAA2C,CAAC,EAC9D,IAAI,KAAK,EAAE,SAAS,6CAA6C,CAAC,EAClE,SAAS,gIAAgI;AAC9I,CAAC,EAAE,OAAO;AAIH,MAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,SAAS,EACN,OAAO,EACP,SAAS,+CAA+C;AAAA,EAC3D,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,EAC/F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;AAQH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH;AAAA,IACC,EAAE,OAAO,EACN,IAAI,EAAE,SAAS,0CAA0C,CAAC,EAC1D,SAAS,oBAAoB;AAAA,EAClC,EACC,IAAI,GAAG,EAAE,SAAS,2CAA2C,CAAC,EAC9D,IAAI,KAAK,EAAE,SAAS,4CAA4C,CAAC,EACjE,SAAS,uFAAuF;AACrG,CAAC,EAAE,OAAO;AAIH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,SAAS,EACN,OAAO,EACP,SAAS,kFAAkF;AAAA,EAC9F,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IACjF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAC7E,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,iBAAiB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,EAC3F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,25 +1,21 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
const urlSchema = z.string(
|
|
2
|
+
const urlSchema = z.string().url({ message: "scrape-links: Invalid URL format" }).refine(
|
|
3
3
|
(url) => url.startsWith("http://") || url.startsWith("https://"),
|
|
4
|
-
{ message: "scrape-links: URL must use http:// or https://
|
|
5
|
-
).describe("A fully-qualified HTTP or HTTPS URL to
|
|
4
|
+
{ message: "scrape-links: URL must use http:// or https://" }
|
|
5
|
+
).describe("A fully-qualified HTTP or HTTPS URL to scrape.");
|
|
6
6
|
const scrapeLinksParamsSchema = z.object({
|
|
7
|
-
urls: z.array(urlSchema
|
|
8
|
-
|
|
9
|
-
what_to_extract: z.string({ error: "scrape-links: what_to_extract is required" }).min(5, { message: "scrape-links: what_to_extract must be at least 5 characters" }).max(1e3, { message: "scrape-links: Extraction instructions too long (max 1000 characters)" }).describe('REQUIRED. Extraction instructions for the LLM. The LLM processes each scraped page and extracts ONLY what you specify. Formula: "Extract [target1] | [target2] | [target3] with focus on [aspect]". Be specific: "pricing tiers | monthly vs annual cost | free tier limits" not just "pricing".')
|
|
7
|
+
urls: z.array(urlSchema).min(1, { message: "scrape-links: At least 1 URL required" }).max(100, { message: "scrape-links: Maximum 100 URLs allowed" }).describe("Web page URLs to scrape and extract content from."),
|
|
8
|
+
extract: z.string().min(5, { message: "scrape-links: extract must be at least 5 characters" }).max(1e3, { message: "scrape-links: extract too long (max 1000 chars)" }).describe('What to pull from each page. The LLM reads the scraped content and returns only what you specify. Be specific: "pricing tiers | free tier limits | enterprise contact info" not "pricing".')
|
|
10
9
|
}).strict();
|
|
11
10
|
const scrapeLinksOutputSchema = z.object({
|
|
12
|
-
content: z.string().describe("LLM-extracted content from scraped pages
|
|
11
|
+
content: z.string().describe("LLM-extracted content from scraped pages per the extract instructions."),
|
|
13
12
|
metadata: z.object({
|
|
14
|
-
|
|
15
|
-
successful: z.number().int().nonnegative().describe("
|
|
16
|
-
failed: z.number().int().nonnegative().describe("
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
total_token_budget: z.number().int().nonnegative().optional().describe("Overall token budget available for extraction."),
|
|
21
|
-
batches_processed: z.number().int().nonnegative().optional().describe("Number of scrape batches executed.")
|
|
22
|
-
}).strict().describe("Structured metadata about the scrape and extraction batch.")
|
|
13
|
+
total_items: z.number().int().nonnegative().describe("Number of URLs processed."),
|
|
14
|
+
successful: z.number().int().nonnegative().describe("URLs fetched successfully."),
|
|
15
|
+
failed: z.number().int().nonnegative().describe("URLs that failed."),
|
|
16
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
17
|
+
total_credits: z.number().int().nonnegative().describe("External scraping credits consumed.")
|
|
18
|
+
}).strict()
|
|
23
19
|
}).strict();
|
|
24
20
|
export {
|
|
25
21
|
scrapeLinksOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/scrape-links.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .max(100, { message: 'scrape-links: Maximum 100 URLs allowed' })\n .describe('Web page URLs to scrape and extract content from.'),\n extract: z\n .string()\n .min(5, { message: 'scrape-links: extract must be at least 5 characters' })\n .max(1000, { message: 'scrape-links: extract too long (max 1000 chars)' })\n .describe('What to pull from each page. The LLM reads the scraped content and returns only what you specify. Be specific: \"pricing tiers | free tier limits | enterprise contact info\" not \"pricing\".'),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n content: z\n .string()\n .describe('LLM-extracted content from scraped pages per the extract instructions.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,IAAI,KAAK,EAAE,SAAS,yCAAyC,CAAC,EAC9D,SAAS,mDAAmD;AAAA,EAC/D,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sDAAsD,CAAC,EACzE,IAAI,KAAM,EAAE,SAAS,kDAAkD,CAAC,EACxE,SAAS,4LAA4L;AAC1M,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,SAAS,EACN,OAAO,EACP,SAAS,wEAAwE;AAAA,EACpF,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,31 +1,27 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
const keywordSchema = z.string({ error: "web-search: Keyword is required" }).min(1, { message: "web-search: Keyword cannot be empty" }).max(500, { message: "web-search: Keyword too long (max 500 characters)" }).refine(
|
|
3
|
-
(k) => k.trim().length > 0,
|
|
4
|
-
{ message: "web-search: Keyword cannot be whitespace only" }
|
|
5
|
-
).describe('A single Google search query (1-500 chars). Each keyword runs as a separate parallel search. Use varied angles: direct topic, comparisons, "best of" lists, year-specific, site-specific (e.g., "site:github.com topic").');
|
|
6
2
|
const webSearchParamsSchema = z.object({
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
3
|
+
queries: z.array(
|
|
4
|
+
z.string().min(1, { message: "web-search: Query cannot be empty" }).max(500, { message: "web-search: Query too long (max 500 chars)" }).describe("A single Google search query. Each query runs as a separate parallel search.")
|
|
5
|
+
).min(1, { message: "web-search: At least 1 query required" }).max(100, { message: "web-search: Maximum 100 queries allowed" }).describe("Search queries to run in parallel via Google. More queries = broader coverage and stronger consensus signals across results."),
|
|
6
|
+
extract: z.string().min(5, { message: "web-search: extract must be at least 5 characters" }).max(500, { message: "web-search: extract too long (max 500 chars)" }).describe('What you are looking for. The LLM classifies each result by relevance and generates a synthesis. Be specific: "TypeScript MCP server frameworks with OAuth support" not "MCP servers".'),
|
|
7
|
+
raw: z.boolean().default(false).describe("Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.")
|
|
10
8
|
}).strict();
|
|
11
9
|
const webSearchOutputSchema = z.object({
|
|
12
|
-
content: z.string().describe("Markdown report
|
|
10
|
+
content: z.string().describe("Markdown report with tiered results (LLM mode) or ranked URL list (raw mode)."),
|
|
13
11
|
metadata: z.object({
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
llm_classified: z.boolean().describe("Whether LLM classification was applied to the results."),
|
|
21
|
-
llm_error: z.string().optional().describe("LLM classification error message if classification failed and fell back to raw output."),
|
|
12
|
+
total_items: z.number().int().nonnegative().describe("Number of queries executed."),
|
|
13
|
+
successful: z.number().int().nonnegative().describe("Queries that returned results."),
|
|
14
|
+
failed: z.number().int().nonnegative().describe("Queries that failed."),
|
|
15
|
+
execution_time_ms: z.number().int().nonnegative().describe("Wall clock time in milliseconds."),
|
|
16
|
+
llm_classified: z.boolean().describe("Whether LLM classification was applied."),
|
|
17
|
+
llm_error: z.string().optional().describe("LLM error if classification failed and fell back to raw."),
|
|
22
18
|
coverage_summary: z.array(z.object({
|
|
23
|
-
keyword: z.string().describe("The search
|
|
24
|
-
result_count: z.number().int().nonnegative().describe("
|
|
25
|
-
top_url: z.string().optional().describe("Domain of the top
|
|
26
|
-
})).optional().describe("Per-
|
|
27
|
-
low_yield_keywords: z.array(z.string()).optional().describe("
|
|
28
|
-
}).strict()
|
|
19
|
+
keyword: z.string().describe("The search query."),
|
|
20
|
+
result_count: z.number().int().nonnegative().describe("Results returned for this query."),
|
|
21
|
+
top_url: z.string().optional().describe("Domain of the top result.")
|
|
22
|
+
})).optional().describe("Per-query result counts and top URLs."),
|
|
23
|
+
low_yield_keywords: z.array(z.string()).optional().describe("Queries that produced 0-1 results.")
|
|
24
|
+
}).strict()
|
|
29
25
|
}).strict();
|
|
30
26
|
export {
|
|
31
27
|
webSearchOutputSchema,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/web-search.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .max(500, { message: 'web-search: Query too long (max 500 chars)' })\n .describe('A single Google search query. Each query runs as a separate parallel search.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .max(100, { message: 'web-search: Maximum 100 queries allowed' })\n .describe('Search queries to run in parallel via Google. More queries = broader coverage and stronger consensus signals across results.'),\n extract: z\n .string()\n .min(5, { message: 'web-search: extract must be at least 5 characters' })\n .max(500, { message: 'web-search: extract too long (max 500 chars)' })\n .describe('What you are looking for. The LLM classifies each result by relevance and generates a synthesis. Be specific: \"TypeScript MCP server frameworks with OAuth support\" not \"MCP servers\".'),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n content: z\n .string()\n .describe('Markdown report with tiered results (LLM mode) or ranked URL list (raw mode).'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n coverage_summary: z\n .array(z.object({\n keyword: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_keywords: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,IAAI,KAAK,EAAE,SAAS,6CAA6C,CAAC,EAClE,SAAS,8EAA8E;AAAA,EAC5F,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,IAAI,KAAK,EAAE,SAAS,0CAA0C,CAAC,EAC/D,SAAS,8HAA8H;AAAA,EAC1I,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,oDAAoD,CAAC,EACvE,IAAI,KAAK,EAAE,SAAS,+CAA+C,CAAC,EACpE,SAAS,wLAAwL;AAAA,EACpM,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAClH,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN,OAAO,EACP,SAAS,+EAA+E;AAAA,EAC3F,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,SAAS,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAChD,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,oBAAoB,EACjB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/reddit.js
CHANGED
|
@@ -6,16 +6,12 @@ import {
|
|
|
6
6
|
} from "../schemas/reddit.js";
|
|
7
7
|
import { SearchClient } from "../clients/search.js";
|
|
8
8
|
import { RedditClient } from "../clients/reddit.js";
|
|
9
|
-
import { REDDIT,
|
|
9
|
+
import { REDDIT, getCapabilities, getMissingEnvMessage, parseEnv } from "../config/index.js";
|
|
10
10
|
import { classifyError } from "../utils/errors.js";
|
|
11
|
-
import { createLLMProcessor, processContentWithLLM } from "../services/llm-processor.js";
|
|
12
|
-
import { pMap } from "../utils/concurrency.js";
|
|
13
11
|
import {
|
|
14
|
-
mcpLog,
|
|
15
12
|
formatSuccess,
|
|
16
13
|
formatError,
|
|
17
|
-
formatBatchHeader
|
|
18
|
-
TOKEN_BUDGETS
|
|
14
|
+
formatBatchHeader
|
|
19
15
|
} from "./utils.js";
|
|
20
16
|
import {
|
|
21
17
|
createToolReporter,
|
|
@@ -84,20 +80,17 @@ ${post.body}
|
|
|
84
80
|
wordsUsed += commentsResult.wordsUsed;
|
|
85
81
|
if (commentsResult.truncated > 0) {
|
|
86
82
|
md += `
|
|
87
|
-
_${commentsResult.truncated} more comments not shown (word budget reached).
|
|
83
|
+
_${commentsResult.truncated} more comments not shown (word budget reached)._
|
|
88
84
|
|
|
89
85
|
`;
|
|
90
86
|
}
|
|
91
|
-
} else if (!fetchComments) {
|
|
92
|
-
md += `_Comments not fetched (fetch_comments=false)_
|
|
93
|
-
|
|
94
|
-
`;
|
|
95
87
|
}
|
|
96
88
|
return { md, wordsUsed, commentsShown, commentsTruncated };
|
|
97
89
|
}
|
|
98
90
|
async function handleSearchReddit(queries, apiKey, reporter = NOOP_REPORTER) {
|
|
99
91
|
try {
|
|
100
|
-
const
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
const limited = queries.slice(0, 100);
|
|
101
94
|
const client = new SearchClient(apiKey);
|
|
102
95
|
await reporter.log("info", `Searching Reddit with ${limited.length} queries`);
|
|
103
96
|
await reporter.progress(15, 100, "Searching Reddit");
|
|
@@ -114,18 +107,21 @@ async function handleSearchReddit(queries, apiKey, reporter = NOOP_REPORTER) {
|
|
|
114
107
|
message: `No Reddit URLs found for any of the ${limited.length} queries`,
|
|
115
108
|
toolName: "search-reddit",
|
|
116
109
|
howToFix: ["Try broader or simpler search terms", "Check spelling"],
|
|
117
|
-
alternatives: ['web-search(
|
|
110
|
+
alternatives: ['web-search(queries=["topic reddit discussion"], extract="...") \u2014 broader Google search']
|
|
118
111
|
}));
|
|
119
112
|
}
|
|
120
113
|
const urlList = [...allUrls];
|
|
121
114
|
const content = urlList.join("\n");
|
|
122
115
|
await reporter.log("info", `Found ${urlList.length} unique Reddit URLs across ${limited.length} queries`);
|
|
123
116
|
await reporter.progress(100, 100, "Reddit search complete");
|
|
117
|
+
const executionTime = Date.now() - startTime;
|
|
124
118
|
return toolSuccess(content, {
|
|
125
119
|
content,
|
|
126
120
|
metadata: {
|
|
127
|
-
|
|
128
|
-
|
|
121
|
+
total_items: limited.length,
|
|
122
|
+
successful: urlList.length,
|
|
123
|
+
failed: 0,
|
|
124
|
+
execution_time_ms: executionTime
|
|
129
125
|
}
|
|
130
126
|
});
|
|
131
127
|
} catch (error) {
|
|
@@ -139,11 +135,6 @@ async function handleSearchReddit(queries, apiKey, reporter = NOOP_REPORTER) {
|
|
|
139
135
|
}));
|
|
140
136
|
}
|
|
141
137
|
}
|
|
142
|
-
function enhanceExtractionInstruction(instruction) {
|
|
143
|
-
return `${instruction}
|
|
144
|
-
|
|
145
|
-
${REDDIT.EXTRACTION_SUFFIX}`;
|
|
146
|
-
}
|
|
147
138
|
function validatePostCount(urlCount) {
|
|
148
139
|
if (urlCount < REDDIT.MIN_POSTS) {
|
|
149
140
|
return formatError({
|
|
@@ -166,35 +157,13 @@ function validatePostCount(urlCount) {
|
|
|
166
157
|
}
|
|
167
158
|
return null;
|
|
168
159
|
}
|
|
169
|
-
async function
|
|
170
|
-
mcpLog("info", `[${index}/${total}] Applying LLM extraction to ${url}`, "reddit");
|
|
171
|
-
const llmResult = await processContentWithLLM(
|
|
172
|
-
postContent,
|
|
173
|
-
{ use_llm: true, what_to_extract: enhancedInstruction, max_tokens: tokensPerUrl },
|
|
174
|
-
llmProcessor
|
|
175
|
-
);
|
|
176
|
-
if (llmResult.processed) {
|
|
177
|
-
mcpLog("debug", `[${index}/${total}] LLM extraction complete`, "reddit");
|
|
178
|
-
const header = `## LLM Analysis: ${result.post.title}
|
|
179
|
-
|
|
180
|
-
**r/${result.post.subreddit}** \u2022 u/${result.post.author} \u2022 \u2B06\uFE0F ${result.post.score} \u2022 \u{1F4AC} ${result.post.commentCount} comments
|
|
181
|
-
\u{1F517} ${result.post.url}
|
|
182
|
-
|
|
183
|
-
`;
|
|
184
|
-
return { content: header + llmResult.content, llmFailed: false };
|
|
185
|
-
}
|
|
186
|
-
mcpLog("warning", `[${index}/${total}] LLM extraction failed: ${llmResult.error || "unknown"}`, "reddit");
|
|
187
|
-
return { content: postContent, llmFailed: true };
|
|
188
|
-
}
|
|
189
|
-
async function fetchAndProcessPosts(results, urls, fetchComments, what_to_extract) {
|
|
190
|
-
const llmProcessor = createLLMProcessor();
|
|
191
|
-
const tokensPerUrl = Math.floor(TOKEN_BUDGETS.RESEARCH / urls.length);
|
|
192
|
-
const enhancedInstruction = enhanceExtractionInstruction(what_to_extract);
|
|
160
|
+
async function fetchAndProcessPosts(results) {
|
|
193
161
|
let failed = 0;
|
|
194
162
|
const failedContents = [];
|
|
195
|
-
const
|
|
196
|
-
|
|
163
|
+
const successContents = [];
|
|
164
|
+
let successful = 0;
|
|
197
165
|
let totalWordsUsed = 0;
|
|
166
|
+
const skippedUrls = [];
|
|
198
167
|
for (const [url, result] of results) {
|
|
199
168
|
if (result instanceof Error) {
|
|
200
169
|
failed++;
|
|
@@ -207,36 +176,13 @@ _${result.message}_`);
|
|
|
207
176
|
skippedUrls.push(url);
|
|
208
177
|
continue;
|
|
209
178
|
}
|
|
210
|
-
const formatted = formatPost(result,
|
|
179
|
+
const formatted = formatPost(result, true, REDDIT.MAX_WORDS_PER_POST);
|
|
211
180
|
totalWordsUsed += formatted.wordsUsed;
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
let llmErrors = 0;
|
|
215
|
-
let processedEntries;
|
|
216
|
-
if (llmProcessor && successEntries.length > 0) {
|
|
217
|
-
const llmResults = await pMap(successEntries, async (entry, index) => {
|
|
218
|
-
const llmOut = await applyLlmToPost(
|
|
219
|
-
entry.content,
|
|
220
|
-
entry.result,
|
|
221
|
-
entry.url,
|
|
222
|
-
llmProcessor,
|
|
223
|
-
enhancedInstruction,
|
|
224
|
-
tokensPerUrl,
|
|
225
|
-
index + 1,
|
|
226
|
-
successEntries.length
|
|
227
|
-
);
|
|
228
|
-
if (llmOut.llmFailed) llmErrors++;
|
|
229
|
-
return { ...entry, content: llmOut.content };
|
|
230
|
-
}, CONCURRENCY.LLM_EXTRACTION);
|
|
231
|
-
processedEntries = llmResults;
|
|
232
|
-
} else {
|
|
233
|
-
if (!llmProcessor) {
|
|
234
|
-
mcpLog("warning", "LLM unavailable (LLM_EXTRACTION_API_KEY not set). Returning raw content.", "reddit");
|
|
235
|
-
}
|
|
236
|
-
processedEntries = successEntries;
|
|
181
|
+
successContents.push(formatted.md);
|
|
182
|
+
successful++;
|
|
237
183
|
}
|
|
238
|
-
const contents = [...failedContents, ...
|
|
239
|
-
return { successful
|
|
184
|
+
const contents = [...failedContents, ...successContents];
|
|
185
|
+
return { successful, failed, llmErrors: 0, llmAvailable: false, contents, totalWordsUsed, skippedUrls };
|
|
240
186
|
}
|
|
241
187
|
function buildRedditStatusExtras(rateLimitHits, llmAvailable, llmErrors) {
|
|
242
188
|
const extras = [];
|
|
@@ -296,56 +242,40 @@ function formatGetRedditPostsError(error) {
|
|
|
296
242
|
]
|
|
297
243
|
});
|
|
298
244
|
}
|
|
299
|
-
async function handleGetRedditPosts(urls, clientId, clientSecret,
|
|
245
|
+
async function handleGetRedditPosts(urls, clientId, clientSecret, reporter = NOOP_REPORTER) {
|
|
246
|
+
const startTime = Date.now();
|
|
300
247
|
try {
|
|
301
|
-
const { fetchComments = true, what_to_extract } = options;
|
|
302
248
|
const validationError = validatePostCount(urls.length);
|
|
303
249
|
if (validationError) return toolFailure(validationError);
|
|
304
250
|
const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);
|
|
305
|
-
await reporter.log("info", `Fetching ${urls.length} Reddit post(s)
|
|
306
|
-
await reporter.progress(20, 100, "Fetching Reddit
|
|
251
|
+
await reporter.log("info", `Fetching ${urls.length} Reddit post(s)`);
|
|
252
|
+
await reporter.progress(20, 100, "Fetching Reddit posts");
|
|
307
253
|
const client = new RedditClient(clientId, clientSecret);
|
|
308
|
-
const batchResult = await client.batchGetPosts(urls,
|
|
309
|
-
await reporter.
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
);
|
|
313
|
-
await reporter.progress(55, 100, "Processing Reddit posts and LLM extraction");
|
|
314
|
-
const processResult = await fetchAndProcessPosts(
|
|
315
|
-
batchResult.results,
|
|
316
|
-
urls,
|
|
317
|
-
fetchComments,
|
|
318
|
-
what_to_extract
|
|
319
|
-
);
|
|
320
|
-
await reporter.log(
|
|
321
|
-
"info",
|
|
322
|
-
`Processed ${processResult.successful} successful post(s) with ${processResult.failed} failure(s), ${processResult.totalWordsUsed.toLocaleString()} words`
|
|
323
|
-
);
|
|
324
|
-
await reporter.progress(85, 100, "Formatting Reddit output");
|
|
325
|
-
const tokensPerUrl = Math.floor(TOKEN_BUDGETS.RESEARCH / urls.length);
|
|
254
|
+
const batchResult = await client.batchGetPosts(urls, true);
|
|
255
|
+
await reporter.progress(55, 100, "Formatting posts and comments");
|
|
256
|
+
const processResult = await fetchAndProcessPosts(batchResult.results);
|
|
257
|
+
await reporter.progress(85, 100, "Building output");
|
|
326
258
|
const extraStatus = buildRedditStatusExtras(
|
|
327
259
|
batchResult.rateLimitHits,
|
|
328
|
-
|
|
329
|
-
|
|
260
|
+
false,
|
|
261
|
+
0
|
|
330
262
|
);
|
|
331
263
|
const content = formatRedditOutput(
|
|
332
264
|
urls,
|
|
333
265
|
processResult,
|
|
334
|
-
|
|
266
|
+
true,
|
|
335
267
|
totalBatches,
|
|
336
|
-
|
|
268
|
+
0,
|
|
337
269
|
extraStatus
|
|
338
270
|
);
|
|
271
|
+
const executionTime = Date.now() - startTime;
|
|
339
272
|
return toolSuccess(content, {
|
|
340
273
|
content,
|
|
341
274
|
metadata: {
|
|
342
|
-
|
|
275
|
+
total_items: urls.length,
|
|
343
276
|
successful: processResult.successful,
|
|
344
277
|
failed: processResult.failed,
|
|
345
|
-
|
|
346
|
-
total_words_used: processResult.totalWordsUsed,
|
|
347
|
-
llm_failures: processResult.llmErrors,
|
|
348
|
-
total_batches: totalBatches,
|
|
278
|
+
execution_time_ms: executionTime,
|
|
349
279
|
rate_limit_hits: batchResult.rateLimitHits
|
|
350
280
|
}
|
|
351
281
|
});
|
|
@@ -358,7 +288,7 @@ function registerSearchRedditTool(server) {
|
|
|
358
288
|
{
|
|
359
289
|
name: "search-reddit",
|
|
360
290
|
title: "Search Reddit",
|
|
361
|
-
description:
|
|
291
|
+
description: "Search Google for Reddit posts matching up to 100 queries. Returns a flat list of unique Reddit URLs ready to pipe into get-reddit-post.",
|
|
362
292
|
schema: searchRedditParamsSchema,
|
|
363
293
|
outputSchema: searchRedditOutputSchema,
|
|
364
294
|
annotations: {
|
|
@@ -385,7 +315,7 @@ function registerGetRedditPostTool(server) {
|
|
|
385
315
|
{
|
|
386
316
|
name: "get-reddit-post",
|
|
387
317
|
title: "Get Reddit Post",
|
|
388
|
-
description:
|
|
318
|
+
description: "Fetch up to 100 Reddit posts with full threaded comment trees. Returns the raw post content and all comments with author, score, and OP markers.",
|
|
389
319
|
schema: getRedditPostParamsSchema,
|
|
390
320
|
outputSchema: getRedditPostOutputSchema,
|
|
391
321
|
annotations: {
|
|
@@ -395,7 +325,7 @@ function registerGetRedditPostTool(server) {
|
|
|
395
325
|
openWorldHint: true
|
|
396
326
|
}
|
|
397
327
|
},
|
|
398
|
-
async ({ urls
|
|
328
|
+
async ({ urls }, ctx) => {
|
|
399
329
|
if (!getCapabilities().reddit) {
|
|
400
330
|
return toToolResponse(toolFailure(getMissingEnvMessage("reddit")));
|
|
401
331
|
}
|
|
@@ -405,7 +335,6 @@ function registerGetRedditPostTool(server) {
|
|
|
405
335
|
urls,
|
|
406
336
|
env.REDDIT_CLIENT_ID,
|
|
407
337
|
env.REDDIT_CLIENT_SECRET,
|
|
408
|
-
{ fetchComments: fetch_comments, what_to_extract },
|
|
409
338
|
reporter
|
|
410
339
|
);
|
|
411
340
|
await reporter.progress(100, 100, result.isError ? "Reddit fetch failed" : "Reddit fetch complete");
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/tools/reddit.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Reddit Tools - Search and Fetch\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n searchRedditParamsSchema,\n searchRedditOutputSchema,\n getRedditPostParamsSchema,\n getRedditPostOutputSchema,\n type SearchRedditOutput,\n type GetRedditPostOutput,\n} from '../schemas/reddit.js';\nimport { SearchClient } from '../clients/search.js';\nimport { RedditClient, type PostResult, type Comment } from '../clients/reddit.js';\nimport { REDDIT, CONCURRENCY, getCapabilities, getMissingEnvMessage, parseEnv } from '../config/index.js';\nimport { classifyError } from '../utils/errors.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { pMap } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n TOKEN_BUDGETS,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// ============================================================================\n// Formatters\n// ============================================================================\n\nfunction countWords(text: string): number {\n const plain = text.replace(/[*_~`#>|[\\]()!-]/g, '');\n return plain.split(/\\s+/).filter(w => w.length > 0).length;\n}\n\ninterface FormattedCommentsResult {\n md: string;\n wordsUsed: number;\n shown: number;\n truncated: number;\n}\n\nfunction formatComments(comments: Comment[], maxWords: number): FormattedCommentsResult {\n let md = '';\n let wordsUsed = 0;\n let shown = 0;\n\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n const authorLine = `${indent}- **u/${c.author}**${op} _(${score})_\\n`;\n const bodyLines = c.body.split('\\n').map(line => `${indent} ${line}`).join('\\n');\n const commentMd = `${authorLine}${bodyLines}\\n\\n`;\n const commentWords = countWords(commentMd);\n\n if (wordsUsed + commentWords > maxWords && shown > 0) break;\n\n md += commentMd;\n wordsUsed += commentWords;\n shown++;\n }\n\n return { md, wordsUsed, shown, truncated: comments.length - shown };\n}\n\ninterface FormattedPostResult {\n md: string;\n wordsUsed: number;\n commentsShown: number;\n commentsTruncated: number;\n}\n\nfunction formatPost(result: PostResult, fetchComments: boolean, maxWords: number): FormattedPostResult {\n const { post, comments } = result;\n let md = `## ${post.title}\\n\\n`;\n md += `**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments\\n`;\n md += `\uD83D\uDD17 ${post.url}\\n\\n`;\n\n let wordsUsed = countWords(md);\n\n if (post.body) {\n const bodySection = `### Post Content\\n\\n${post.body}\\n\\n`;\n wordsUsed += countWords(bodySection);\n md += bodySection;\n }\n\n let commentsShown = 0;\n let commentsTruncated = 0;\n\n if (fetchComments && comments.length > 0) {\n const remainingWords = Math.max(0, maxWords - wordsUsed);\n const commentsResult = formatComments(comments, remainingWords);\n commentsShown = commentsResult.shown;\n commentsTruncated = commentsResult.truncated;\n\n md += `### Top Comments (${commentsResult.shown}/${post.commentCount} shown, ${commentsResult.wordsUsed.toLocaleString()} words)\\n\\n`;\n md += commentsResult.md;\n wordsUsed += commentsResult.wordsUsed;\n\n if (commentsResult.truncated > 0) {\n md += `\\n_${commentsResult.truncated} more comments not shown (word budget reached). Use use_llm=true for AI-synthesized summary._\\n\\n`;\n }\n } else if (!fetchComments) {\n md += `_Comments not fetched (fetch_comments=false)_\\n\\n`;\n }\n\n return { md, wordsUsed, commentsShown, commentsTruncated };\n}\n\n// ============================================================================\n// Search Reddit Handler (simplified \u2014 returns flat URL list)\n// ============================================================================\n\nexport async function handleSearchReddit(\n queries: string[],\n apiKey: string,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<SearchRedditOutput>> {\n try {\n const limited = queries.slice(0, 50);\n const client = new SearchClient(apiKey);\n await reporter.log('info', `Searching Reddit with ${limited.length} queries`);\n await reporter.progress(15, 100, 'Searching Reddit');\n const results = await client.searchRedditMultiple(limited);\n\n // Collect all unique URLs\n const allUrls = new Set<string>();\n for (const resultSet of results.values()) {\n for (const result of resultSet) {\n if (result.url) allUrls.add(result.url);\n }\n }\n\n if (allUrls.size === 0) {\n return toolFailure(formatError({\n code: 'NO_RESULTS',\n message: `No Reddit URLs found for any of the ${limited.length} queries`,\n toolName: 'search-reddit',\n howToFix: ['Try broader or simpler search terms', 'Check spelling'],\n alternatives: ['web-search(keywords=[\"topic reddit discussion\"], objective=\"...\") \u2014 broader Google search'],\n }));\n }\n\n const urlList = [...allUrls];\n const content = urlList.join('\\n');\n\n await reporter.log('info', `Found ${urlList.length} unique Reddit URLs across ${limited.length} queries`);\n await reporter.progress(100, 100, 'Reddit search complete');\n\n return toolSuccess(content, {\n content,\n metadata: {\n query_count: limited.length,\n total_urls: urlList.length,\n },\n });\n } catch (error) {\n const structuredError = classifyError(error);\n return toolFailure(formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'search-reddit',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n }));\n }\n}\n\n// ============================================================================\n// Get Reddit Posts Handler\n// ============================================================================\n\ninterface GetRedditPostsOptions {\n fetchComments?: boolean;\n what_to_extract: string;\n}\n\nfunction enhanceExtractionInstruction(instruction: string): string {\n return `${instruction}\\n\\n${REDDIT.EXTRACTION_SUFFIX}`;\n}\n\n// --- Internal types ---\n\ninterface PostProcessResult {\n successful: number;\n failed: number;\n llmErrors: number;\n llmAvailable: boolean;\n contents: string[];\n totalWordsUsed: number;\n skippedUrls: string[];\n}\n\n// --- Helpers ---\n\nfunction validatePostCount(urlCount: number): string | null {\n if (urlCount < REDDIT.MIN_POSTS) {\n return formatError({\n code: 'MIN_POSTS',\n message: `Minimum ${REDDIT.MIN_POSTS} Reddit posts required. Received: ${urlCount}`,\n toolName: 'get-reddit-post',\n howToFix: [`Add at least ${REDDIT.MIN_POSTS - urlCount} more Reddit URL(s)`],\n alternatives: [\n `search-reddit(queries=[\"topic discussion\", \"topic recommendations\", \"topic experiences\"]) \u2014 find more Reddit posts first, then call get-reddit-post with ${REDDIT.MIN_POSTS}+ URLs`,\n ],\n });\n }\n if (urlCount > REDDIT.MAX_POSTS) {\n return formatError({\n code: 'MAX_POSTS',\n message: `Maximum ${REDDIT.MAX_POSTS} Reddit posts allowed. Received: ${urlCount}`,\n toolName: 'get-reddit-post',\n howToFix: [`Remove ${urlCount - REDDIT.MAX_POSTS} URL(s) and retry`],\n });\n }\n return null;\n}\n\nasync function applyLlmToPost(\n postContent: string,\n result: PostResult,\n url: string,\n llmProcessor: NonNullable<ReturnType<typeof createLLMProcessor>>,\n enhancedInstruction: string | undefined,\n tokensPerUrl: number,\n index: number,\n total: number,\n): Promise<{ content: string; llmFailed: boolean }> {\n mcpLog('info', `[${index}/${total}] Applying LLM extraction to ${url}`, 'reddit');\n\n const llmResult = await processContentWithLLM(\n postContent,\n { use_llm: true, what_to_extract: enhancedInstruction, max_tokens: tokensPerUrl },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n mcpLog('debug', `[${index}/${total}] LLM extraction complete`, 'reddit');\n const header = `## LLM Analysis: ${result.post.title}\\n\\n**r/${result.post.subreddit}** \u2022 u/${result.post.author} \u2022 \u2B06\uFE0F ${result.post.score} \u2022 \uD83D\uDCAC ${result.post.commentCount} comments\\n\uD83D\uDD17 ${result.post.url}\\n\\n`;\n return { content: header + llmResult.content, llmFailed: false };\n }\n\n mcpLog('warning', `[${index}/${total}] LLM extraction failed: ${llmResult.error || 'unknown'}`, 'reddit');\n return { content: postContent, llmFailed: true };\n}\n\nasync function fetchAndProcessPosts(\n results: Map<string, PostResult | Error>,\n urls: string[],\n fetchComments: boolean,\n what_to_extract: string,\n): Promise<PostProcessResult> {\n const llmProcessor = createLLMProcessor();\n const tokensPerUrl = Math.floor(TOKEN_BUDGETS.RESEARCH / urls.length);\n const enhancedInstruction = enhanceExtractionInstruction(what_to_extract);\n\n let failed = 0;\n const failedContents: string[] = [];\n const successEntries: { url: string; result: PostResult; content: string; wordsUsed: number }[] = [];\n const skippedUrls: string[] = [];\n let totalWordsUsed = 0;\n\n for (const [url, result] of results) {\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## \u274C Failed: ${url}\\n\\n_${result.message}_`);\n continue;\n }\n\n if (totalWordsUsed >= REDDIT.MAX_WORDS_TOTAL) {\n skippedUrls.push(url);\n continue;\n }\n\n const formatted = formatPost(result, fetchComments, REDDIT.MAX_WORDS_PER_POST);\n totalWordsUsed += formatted.wordsUsed;\n successEntries.push({ url, result, content: formatted.md, wordsUsed: formatted.wordsUsed });\n }\n\n let llmErrors = 0;\n let processedEntries: typeof successEntries;\n\n // Always run LLM when available\n if (llmProcessor && successEntries.length > 0) {\n const llmResults = await pMap(successEntries, async (entry, index) => {\n const llmOut = await applyLlmToPost(\n entry.content, entry.result, entry.url, llmProcessor, enhancedInstruction,\n tokensPerUrl, index + 1, successEntries.length,\n );\n if (llmOut.llmFailed) llmErrors++;\n return { ...entry, content: llmOut.content };\n }, CONCURRENCY.LLM_EXTRACTION);\n processedEntries = llmResults;\n } else {\n if (!llmProcessor) {\n mcpLog('warning', 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Returning raw content.', 'reddit');\n }\n processedEntries = successEntries;\n }\n\n const contents = [...failedContents, ...processedEntries.map(e => e.content)];\n\n return { successful: successEntries.length, failed, llmErrors, llmAvailable: llmProcessor !== null, contents, totalWordsUsed, skippedUrls };\n}\n\nfunction buildRedditStatusExtras(\n rateLimitHits: number,\n llmAvailable: boolean,\n llmErrors: number,\n): string {\n const extras: string[] = [];\n if (rateLimitHits > 0) extras.push(`\u26A0\uFE0F ${rateLimitHits} rate limit retries`);\n if (!llmAvailable) {\n extras.push('\u26A0\uFE0F LLM unavailable (LLM_EXTRACTION_API_KEY not set) \u2014 raw content returned');\n } else if (llmErrors > 0) {\n extras.push(`\u26A0\uFE0F ${llmErrors} LLM extraction failures`);\n }\n return extras.length > 0 ? `\\n${extras.join(' | ')}` : '';\n}\n\nfunction formatRedditOutput(\n urls: string[],\n processResult: PostProcessResult,\n fetchComments: boolean,\n totalBatches: number,\n tokensPerUrl: number,\n extraStatus: string,\n): string {\n const batchHeader = formatBatchHeader({\n title: `Reddit Posts`,\n totalItems: urls.length,\n successful: processResult.successful,\n failed: processResult.failed,\n ...(fetchComments ? { extras: { 'Words used': processResult.totalWordsUsed.toLocaleString() } } : {}),\n tokensPerItem: tokensPerUrl,\n batches: totalBatches,\n });\n\n let data = processResult.contents.join('\\n\\n---\\n\\n');\n\n if (processResult.skippedUrls.length > 0) {\n data += '\\n\\n---\\n\\n';\n data += `**Word limit reached (${REDDIT.MAX_WORDS_TOTAL.toLocaleString()} words).** The following posts were not included:\\n`;\n for (const url of processResult.skippedUrls) {\n data += `- ${url}\\n`;\n }\n data += `\\nCall get-reddit-post again with just these URLs.`;\n }\n\n return formatSuccess({\n title: `Reddit Posts Fetched (${processResult.successful}/${urls.length})`,\n summary: batchHeader + extraStatus,\n data,\n nextSteps: [\n processResult.successful > 0 ? 'web-search to verify claims from Reddit discussions' : null,\n processResult.successful > 0 ? 'scrape-links on URLs referenced in comments' : null,\n processResult.failed > 0 ? 'Retry failed URLs individually' : null,\n ].filter(Boolean) as string[],\n });\n}\n\nfunction formatGetRedditPostsError(error: unknown): string {\n const structuredError = classifyError(error);\n return formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'get-reddit-post',\n howToFix: ['Verify REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET are set'],\n alternatives: [\n 'web-search(keywords=[\"topic reddit discussion\", \"topic reddit recommendations\"]) \u2014 search for Reddit content via web search instead',\n 'scrape-links(urls=[...the Reddit URLs...], use_llm=true, what_to_extract=\"Extract post content | top comments | recommendations\") \u2014 scrape Reddit pages directly as a fallback',\n ],\n });\n}\n\nexport async function handleGetRedditPosts(\n urls: string[],\n clientId: string,\n clientSecret: string,\n options: GetRedditPostsOptions,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<GetRedditPostOutput>> {\n try {\n const { fetchComments = true, what_to_extract } = options;\n\n const validationError = validatePostCount(urls.length);\n if (validationError) return toolFailure(validationError);\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n\n await reporter.log('info', `Fetching ${urls.length} Reddit post(s) across ${totalBatches} batch(es)`);\n await reporter.progress(20, 100, 'Fetching Reddit post content');\n const client = new RedditClient(clientId, clientSecret);\n const batchResult = await client.batchGetPosts(urls, fetchComments);\n await reporter.log(\n 'info',\n `Fetched Reddit batch results with ${batchResult.rateLimitHits} rate-limit retry/retries`,\n );\n await reporter.progress(55, 100, 'Processing Reddit posts and LLM extraction');\n\n const processResult = await fetchAndProcessPosts(\n batchResult.results, urls, fetchComments, what_to_extract,\n );\n await reporter.log(\n 'info',\n `Processed ${processResult.successful} successful post(s) with ${processResult.failed} failure(s), ${processResult.totalWordsUsed.toLocaleString()} words`,\n );\n await reporter.progress(85, 100, 'Formatting Reddit output');\n\n const tokensPerUrl = Math.floor(TOKEN_BUDGETS.RESEARCH / urls.length);\n const extraStatus = buildRedditStatusExtras(\n batchResult.rateLimitHits, processResult.llmAvailable, processResult.llmErrors,\n );\n const content = formatRedditOutput(\n urls,\n processResult,\n fetchComments,\n totalBatches,\n tokensPerUrl,\n extraStatus,\n );\n\n return toolSuccess(content, {\n content,\n metadata: {\n total_urls: urls.length,\n successful: processResult.successful,\n failed: processResult.failed,\n fetch_comments: fetchComments,\n total_words_used: processResult.totalWordsUsed,\n llm_failures: processResult.llmErrors,\n total_batches: totalBatches,\n rate_limit_hits: batchResult.rateLimitHits,\n },\n });\n } catch (error) {\n return toolFailure(formatGetRedditPostsError(error));\n }\n}\n\nexport function registerSearchRedditTool(server: MCPServer): void {\n server.tool(\n {\n name: 'search-reddit',\n title: 'Search Reddit',\n description:\n 'Search for Reddit posts by appending \"site:reddit.com\" to 1-50 queries via Google. Returns a flat list of unique Reddit URLs. No ranking, no LLM processing \u2014 just URL discovery. Pipe results into get-reddit-post to fetch and analyze the actual content.',\n schema: searchRedditParamsSchema,\n outputSchema: searchRedditOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async ({ queries }, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const env = parseEnv();\n const reporter = createToolReporter(ctx, 'search-reddit');\n const result = await handleSearchReddit(queries, env.SEARCH_API_KEY!, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Reddit search failed' : 'Reddit search complete');\n return toToolResponse(result);\n },\n );\n}\n\nexport function registerGetRedditPostTool(server: MCPServer): void {\n server.tool(\n {\n name: 'get-reddit-post',\n title: 'Get Reddit Post',\n description:\n 'Fetch 1-50 Reddit posts with full comment trees and run LLM extraction. Provide what_to_extract with specific instructions (e.g., \"Extract recommendations | pain points | consensus opinions\"). The LLM synthesizes posts and comments into focused insights. Best used after search-reddit.',\n schema: getRedditPostParamsSchema,\n outputSchema: getRedditPostOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async ({ urls, fetch_comments, what_to_extract }, ctx) => {\n if (!getCapabilities().reddit) {\n return toToolResponse(toolFailure(getMissingEnvMessage('reddit')));\n }\n\n const env = parseEnv();\n const reporter = createToolReporter(ctx, 'get-reddit-post');\n const result = await handleGetRedditPosts(\n urls,\n env.REDDIT_CLIENT_ID!,\n env.REDDIT_CLIENT_SECRET!,\n { fetchComments: fetch_comments, what_to_extract },\n reporter,\n );\n\n await reporter.progress(100, 100, result.isError ? 'Reddit fetch failed' : 'Reddit fetch complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
-
"mappings": "AAOA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B,SAAS,oBAAmD;AAC5D,SAAS,QAAQ,
|
|
4
|
+
"sourcesContent": ["/**\n * Reddit Tools - Search and Fetch\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n searchRedditParamsSchema,\n searchRedditOutputSchema,\n getRedditPostParamsSchema,\n getRedditPostOutputSchema,\n type SearchRedditOutput,\n type GetRedditPostOutput,\n} from '../schemas/reddit.js';\nimport { SearchClient } from '../clients/search.js';\nimport { RedditClient, type PostResult, type Comment } from '../clients/reddit.js';\nimport { REDDIT, getCapabilities, getMissingEnvMessage, parseEnv } from '../config/index.js';\nimport { classifyError } from '../utils/errors.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// ============================================================================\n// Formatters\n// ============================================================================\n\nfunction countWords(text: string): number {\n const plain = text.replace(/[*_~`#>|[\\]()!-]/g, '');\n return plain.split(/\\s+/).filter(w => w.length > 0).length;\n}\n\ninterface FormattedCommentsResult {\n md: string;\n wordsUsed: number;\n shown: number;\n truncated: number;\n}\n\nfunction formatComments(comments: Comment[], maxWords: number): FormattedCommentsResult {\n let md = '';\n let wordsUsed = 0;\n let shown = 0;\n\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n const authorLine = `${indent}- **u/${c.author}**${op} _(${score})_\\n`;\n const bodyLines = c.body.split('\\n').map(line => `${indent} ${line}`).join('\\n');\n const commentMd = `${authorLine}${bodyLines}\\n\\n`;\n const commentWords = countWords(commentMd);\n\n if (wordsUsed + commentWords > maxWords && shown > 0) break;\n\n md += commentMd;\n wordsUsed += commentWords;\n shown++;\n }\n\n return { md, wordsUsed, shown, truncated: comments.length - shown };\n}\n\ninterface FormattedPostResult {\n md: string;\n wordsUsed: number;\n commentsShown: number;\n commentsTruncated: number;\n}\n\nfunction formatPost(result: PostResult, fetchComments: boolean, maxWords: number): FormattedPostResult {\n const { post, comments } = result;\n let md = `## ${post.title}\\n\\n`;\n md += `**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments\\n`;\n md += `\uD83D\uDD17 ${post.url}\\n\\n`;\n\n let wordsUsed = countWords(md);\n\n if (post.body) {\n const bodySection = `### Post Content\\n\\n${post.body}\\n\\n`;\n wordsUsed += countWords(bodySection);\n md += bodySection;\n }\n\n let commentsShown = 0;\n let commentsTruncated = 0;\n\n if (fetchComments && comments.length > 0) {\n const remainingWords = Math.max(0, maxWords - wordsUsed);\n const commentsResult = formatComments(comments, remainingWords);\n commentsShown = commentsResult.shown;\n commentsTruncated = commentsResult.truncated;\n\n md += `### Top Comments (${commentsResult.shown}/${post.commentCount} shown, ${commentsResult.wordsUsed.toLocaleString()} words)\\n\\n`;\n md += commentsResult.md;\n wordsUsed += commentsResult.wordsUsed;\n\n if (commentsResult.truncated > 0) {\n md += `\\n_${commentsResult.truncated} more comments not shown (word budget reached)._\\n\\n`;\n }\n }\n\n return { md, wordsUsed, commentsShown, commentsTruncated };\n}\n\n// ============================================================================\n// Search Reddit Handler (simplified \u2014 returns flat URL list)\n// ============================================================================\n\nexport async function handleSearchReddit(\n queries: string[],\n apiKey: string,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<SearchRedditOutput>> {\n try {\n const startTime = Date.now();\n const limited = queries.slice(0, 100);\n const client = new SearchClient(apiKey);\n await reporter.log('info', `Searching Reddit with ${limited.length} queries`);\n await reporter.progress(15, 100, 'Searching Reddit');\n const results = await client.searchRedditMultiple(limited);\n\n // Collect all unique URLs\n const allUrls = new Set<string>();\n for (const resultSet of results.values()) {\n for (const result of resultSet) {\n if (result.url) allUrls.add(result.url);\n }\n }\n\n if (allUrls.size === 0) {\n return toolFailure(formatError({\n code: 'NO_RESULTS',\n message: `No Reddit URLs found for any of the ${limited.length} queries`,\n toolName: 'search-reddit',\n howToFix: ['Try broader or simpler search terms', 'Check spelling'],\n alternatives: ['web-search(queries=[\"topic reddit discussion\"], extract=\"...\") \u2014 broader Google search'],\n }));\n }\n\n const urlList = [...allUrls];\n const content = urlList.join('\\n');\n\n await reporter.log('info', `Found ${urlList.length} unique Reddit URLs across ${limited.length} queries`);\n await reporter.progress(100, 100, 'Reddit search complete');\n\n const executionTime = Date.now() - startTime;\n return toolSuccess(content, {\n content,\n metadata: {\n total_items: limited.length,\n successful: urlList.length,\n failed: 0,\n execution_time_ms: executionTime,\n },\n });\n } catch (error) {\n const structuredError = classifyError(error);\n return toolFailure(formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'search-reddit',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n }));\n }\n}\n\n// ============================================================================\n// Get Reddit Posts Handler\n// ============================================================================\n\n// get-reddit-post no longer uses LLM \u2014 returns raw posts + comments\n\n// --- Internal types ---\n\ninterface PostProcessResult {\n successful: number;\n failed: number;\n llmErrors: number;\n llmAvailable: boolean;\n contents: string[];\n totalWordsUsed: number;\n skippedUrls: string[];\n}\n\n// --- Helpers ---\n\nfunction validatePostCount(urlCount: number): string | null {\n if (urlCount < REDDIT.MIN_POSTS) {\n return formatError({\n code: 'MIN_POSTS',\n message: `Minimum ${REDDIT.MIN_POSTS} Reddit posts required. Received: ${urlCount}`,\n toolName: 'get-reddit-post',\n howToFix: [`Add at least ${REDDIT.MIN_POSTS - urlCount} more Reddit URL(s)`],\n alternatives: [\n `search-reddit(queries=[\"topic discussion\", \"topic recommendations\", \"topic experiences\"]) \u2014 find more Reddit posts first, then call get-reddit-post with ${REDDIT.MIN_POSTS}+ URLs`,\n ],\n });\n }\n if (urlCount > REDDIT.MAX_POSTS) {\n return formatError({\n code: 'MAX_POSTS',\n message: `Maximum ${REDDIT.MAX_POSTS} Reddit posts allowed. Received: ${urlCount}`,\n toolName: 'get-reddit-post',\n howToFix: [`Remove ${urlCount - REDDIT.MAX_POSTS} URL(s) and retry`],\n });\n }\n return null;\n}\n\nasync function fetchAndProcessPosts(\n results: Map<string, PostResult | Error>,\n): Promise<PostProcessResult> {\n let failed = 0;\n const failedContents: string[] = [];\n const successContents: string[] = [];\n let successful = 0;\n let totalWordsUsed = 0;\n const skippedUrls: string[] = [];\n\n for (const [url, result] of results) {\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## \u274C Failed: ${url}\\n\\n_${result.message}_`);\n continue;\n }\n\n if (totalWordsUsed >= REDDIT.MAX_WORDS_TOTAL) {\n skippedUrls.push(url);\n continue;\n }\n\n const formatted = formatPost(result, true, REDDIT.MAX_WORDS_PER_POST);\n totalWordsUsed += formatted.wordsUsed;\n successContents.push(formatted.md);\n successful++;\n }\n\n const contents = [...failedContents, ...successContents];\n return { successful, failed, llmErrors: 0, llmAvailable: false, contents, totalWordsUsed, skippedUrls };\n}\n\nfunction buildRedditStatusExtras(\n rateLimitHits: number,\n llmAvailable: boolean,\n llmErrors: number,\n): string {\n const extras: string[] = [];\n if (rateLimitHits > 0) extras.push(`\u26A0\uFE0F ${rateLimitHits} rate limit retries`);\n if (!llmAvailable) {\n extras.push('\u26A0\uFE0F LLM unavailable (LLM_EXTRACTION_API_KEY not set) \u2014 raw content returned');\n } else if (llmErrors > 0) {\n extras.push(`\u26A0\uFE0F ${llmErrors} LLM extraction failures`);\n }\n return extras.length > 0 ? `\\n${extras.join(' | ')}` : '';\n}\n\nfunction formatRedditOutput(\n urls: string[],\n processResult: PostProcessResult,\n fetchComments: boolean,\n totalBatches: number,\n tokensPerUrl: number,\n extraStatus: string,\n): string {\n const batchHeader = formatBatchHeader({\n title: `Reddit Posts`,\n totalItems: urls.length,\n successful: processResult.successful,\n failed: processResult.failed,\n ...(fetchComments ? { extras: { 'Words used': processResult.totalWordsUsed.toLocaleString() } } : {}),\n tokensPerItem: tokensPerUrl,\n batches: totalBatches,\n });\n\n let data = processResult.contents.join('\\n\\n---\\n\\n');\n\n if (processResult.skippedUrls.length > 0) {\n data += '\\n\\n---\\n\\n';\n data += `**Word limit reached (${REDDIT.MAX_WORDS_TOTAL.toLocaleString()} words).** The following posts were not included:\\n`;\n for (const url of processResult.skippedUrls) {\n data += `- ${url}\\n`;\n }\n data += `\\nCall get-reddit-post again with just these URLs.`;\n }\n\n return formatSuccess({\n title: `Reddit Posts Fetched (${processResult.successful}/${urls.length})`,\n summary: batchHeader + extraStatus,\n data,\n nextSteps: [\n processResult.successful > 0 ? 'web-search to verify claims from Reddit discussions' : null,\n processResult.successful > 0 ? 'scrape-links on URLs referenced in comments' : null,\n processResult.failed > 0 ? 'Retry failed URLs individually' : null,\n ].filter(Boolean) as string[],\n });\n}\n\nfunction formatGetRedditPostsError(error: unknown): string {\n const structuredError = classifyError(error);\n return formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'get-reddit-post',\n howToFix: ['Verify REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET are set'],\n alternatives: [\n 'web-search(keywords=[\"topic reddit discussion\", \"topic reddit recommendations\"]) \u2014 search for Reddit content via web search instead',\n 'scrape-links(urls=[...the Reddit URLs...], use_llm=true, what_to_extract=\"Extract post content | top comments | recommendations\") \u2014 scrape Reddit pages directly as a fallback',\n ],\n });\n}\n\nexport async function handleGetRedditPosts(\n urls: string[],\n clientId: string,\n clientSecret: string,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<GetRedditPostOutput>> {\n const startTime = Date.now();\n try {\n const validationError = validatePostCount(urls.length);\n if (validationError) return toolFailure(validationError);\n\n const totalBatches = Math.ceil(urls.length / REDDIT.BATCH_SIZE);\n\n await reporter.log('info', `Fetching ${urls.length} Reddit post(s)`);\n await reporter.progress(20, 100, 'Fetching Reddit posts');\n const client = new RedditClient(clientId, clientSecret);\n const batchResult = await client.batchGetPosts(urls, true);\n await reporter.progress(55, 100, 'Formatting posts and comments');\n\n const processResult = await fetchAndProcessPosts(batchResult.results);\n await reporter.progress(85, 100, 'Building output');\n\n const extraStatus = buildRedditStatusExtras(\n batchResult.rateLimitHits, false, 0,\n );\n const content = formatRedditOutput(\n urls, processResult, true, totalBatches, 0, extraStatus,\n );\n\n const executionTime = Date.now() - startTime;\n return toolSuccess(content, {\n content,\n metadata: {\n total_items: urls.length,\n successful: processResult.successful,\n failed: processResult.failed,\n execution_time_ms: executionTime,\n rate_limit_hits: batchResult.rateLimitHits,\n },\n });\n } catch (error) {\n return toolFailure(formatGetRedditPostsError(error));\n }\n}\n\nexport function registerSearchRedditTool(server: MCPServer): void {\n server.tool(\n {\n name: 'search-reddit',\n title: 'Search Reddit',\n description:\n 'Search Google for Reddit posts matching up to 100 queries. Returns a flat list of unique Reddit URLs ready to pipe into get-reddit-post.',\n schema: searchRedditParamsSchema,\n outputSchema: searchRedditOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async ({ queries }, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const env = parseEnv();\n const reporter = createToolReporter(ctx, 'search-reddit');\n const result = await handleSearchReddit(queries, env.SEARCH_API_KEY!, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Reddit search failed' : 'Reddit search complete');\n return toToolResponse(result);\n },\n );\n}\n\nexport function registerGetRedditPostTool(server: MCPServer): void {\n server.tool(\n {\n name: 'get-reddit-post',\n title: 'Get Reddit Post',\n description:\n 'Fetch up to 100 Reddit posts with full threaded comment trees. Returns the raw post content and all comments with author, score, and OP markers.',\n schema: getRedditPostParamsSchema,\n outputSchema: getRedditPostOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async ({ urls }, ctx) => {\n if (!getCapabilities().reddit) {\n return toToolResponse(toolFailure(getMissingEnvMessage('reddit')));\n }\n\n const env = parseEnv();\n const reporter = createToolReporter(ctx, 'get-reddit-post');\n const result = await handleGetRedditPosts(\n urls,\n env.REDDIT_CLIENT_ID!,\n env.REDDIT_CLIENT_SECRET!,\n reporter,\n );\n\n await reporter.progress(100, 100, result.isError ? 'Reddit fetch failed' : 'Reddit fetch complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
+
"mappings": "AAOA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B,SAAS,oBAAmD;AAC5D,SAAS,QAAQ,iBAAiB,sBAAsB,gBAAgB;AACxE,SAAS,qBAAqB;AAC9B;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAMP,SAAS,WAAW,MAAsB;AACxC,QAAM,QAAQ,KAAK,QAAQ,qBAAqB,EAAE;AAClD,SAAO,MAAM,MAAM,KAAK,EAAE,OAAO,OAAK,EAAE,SAAS,CAAC,EAAE;AACtD;AASA,SAAS,eAAe,UAAqB,UAA2C;AACtF,MAAI,KAAK;AACT,MAAI,YAAY;AAChB,MAAI,QAAQ;AAEZ,aAAW,KAAK,UAAU;AACxB,UAAM,SAAS,KAAK,OAAO,EAAE,KAAK;AAClC,UAAM,KAAK,EAAE,OAAO,cAAc;AAClC,UAAM,QAAQ,EAAE,SAAS,IAAI,IAAI,EAAE,KAAK,KAAK,GAAG,EAAE,KAAK;AACvD,UAAM,aAAa,GAAG,MAAM,SAAS,EAAE,MAAM,KAAK,EAAE,MAAM,KAAK;AAAA;AAC/D,UAAM,YAAY,EAAE,KAAK,MAAM,IAAI,EAAE,IAAI,UAAQ,GAAG,MAAM,KAAK,IAAI,EAAE,EAAE,KAAK,IAAI;AAChF,UAAM,YAAY,GAAG,UAAU,GAAG,SAAS;AAAA;AAAA;AAC3C,UAAM,eAAe,WAAW,SAAS;AAEzC,QAAI,YAAY,eAAe,YAAY,QAAQ,EAAG;AAEtD,UAAM;AACN,iBAAa;AACb;AAAA,EACF;AAEA,SAAO,EAAE,IAAI,WAAW,OAAO,WAAW,SAAS,SAAS,MAAM;AACpE;AASA,SAAS,WAAW,QAAoB,eAAwB,UAAuC;AACrG,QAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,MAAI,KAAK,MAAM,KAAK,KAAK;AAAA;AAAA;AACzB,QAAM,OAAO,KAAK,SAAS,eAAU,KAAK,MAAM,wBAAS,KAAK,KAAK,qBAAS,KAAK,YAAY;AAAA;AAC7F,QAAM,aAAM,KAAK,GAAG;AAAA;AAAA;AAEpB,MAAI,YAAY,WAAW,EAAE;AAE7B,MAAI,KAAK,MAAM;AACb,UAAM,cAAc;AAAA;AAAA,EAAuB,KAAK,IAAI;AAAA;AAAA;AACpD,iBAAa,WAAW,WAAW;AACnC,UAAM;AAAA,EACR;AAEA,MAAI,gBAAgB;AACpB,MAAI,oBAAoB;AAExB,MAAI,iBAAiB,SAAS,SAAS,GAAG;AACxC,UAAM,iBAAiB,KAAK,IAAI,GAAG,WAAW,SAAS;AACvD,UAAM,iBAAiB,eAAe,UAAU,cAAc;AAC9D,oBAAgB,eAAe;AAC/B,wBAAoB,eAAe;AAEnC,UAAM,qBAAqB,eAAe,KAAK,IAAI,KAAK,YAAY,WAAW,eAAe,UAAU,eAAe,CAAC;AAAA;AAAA;AACxH,UAAM,eAAe;AACrB,iBAAa,eAAe;AAE5B,QAAI,eAAe,YAAY,GAAG;AAChC,YAAM;AAAA,GAAM,eAAe,SAAS;AAAA;AAAA;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,IAAI,WAAW,eAAe,kBAAkB;AAC3D;AAMA,eAAsB,mBACpB,SACA,QACA,WAAyB,eACyB;AAClD,MAAI;AACF,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,UAAU,QAAQ,MAAM,GAAG,GAAG;AACpC,UAAM,SAAS,IAAI,aAAa,MAAM;AACtC,UAAM,SAAS,IAAI,QAAQ,yBAAyB,QAAQ,MAAM,UAAU;AAC5E,UAAM,SAAS,SAAS,IAAI,KAAK,kBAAkB;AACnD,UAAM,UAAU,MAAM,OAAO,qBAAqB,OAAO;AAGzD,UAAM,UAAU,oBAAI,IAAY;AAChC,eAAW,aAAa,QAAQ,OAAO,GAAG;AACxC,iBAAW,UAAU,WAAW;AAC9B,YAAI,OAAO,IAAK,SAAQ,IAAI,OAAO,GAAG;AAAA,MACxC;AAAA,IACF;AAEA,QAAI,QAAQ,SAAS,GAAG;AACtB,aAAO,YAAY,YAAY;AAAA,QAC7B,MAAM;AAAA,QACN,SAAS,uCAAuC,QAAQ,MAAM;AAAA,QAC9D,UAAU;AAAA,QACV,UAAU,CAAC,uCAAuC,gBAAgB;AAAA,QAClE,cAAc,CAAC,6FAAwF;AAAA,MACzG,CAAC,CAAC;AAAA,IACJ;AAEA,UAAM,UAAU,CAAC,GAAG,OAAO;AAC3B,UAAM,UAAU,QAAQ,KAAK,IAAI;AAEjC,UAAM,SAAS,IAAI,QAAQ,SAAS,QAAQ,MAAM,8BAA8B,QAAQ,MAAM,UAAU;AACxG,UAAM,SAAS,SAAS,KAAK,KAAK,wBAAwB;AAE1D,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,WAAO,YAAY,SAAS;AAAA,MAC1B;AAAA,MACA,UAAU;AAAA,QACR,aAAa,QAAQ;AAAA,QACrB,YAAY,QAAQ;AAAA,QACpB,QAAQ;AAAA,QACR,mBAAmB;AAAA,MACrB;AAAA,IACF,CAAC;AAAA,EACH,SAAS,OAAO;AACd,UAAM,kBAAkB,cAAc,KAAK;AAC3C,WAAO,YAAY,YAAY;AAAA,MAC7B,MAAM,gBAAgB;AAAA,MACtB,SAAS,gBAAgB;AAAA,MACzB,WAAW,gBAAgB;AAAA,MAC3B,UAAU;AAAA,MACV,UAAU,CAAC,wCAAwC;AAAA,IACrD,CAAC,CAAC;AAAA,EACJ;AACF;AAsBA,SAAS,kBAAkB,UAAiC;AAC1D,MAAI,WAAW,OAAO,WAAW;AAC/B,WAAO,YAAY;AAAA,MACjB,MAAM;AAAA,MACN,SAAS,WAAW,OAAO,SAAS,qCAAqC,QAAQ;AAAA,MACjF,UAAU;AAAA,MACV,UAAU,CAAC,gBAAgB,OAAO,YAAY,QAAQ,qBAAqB;AAAA,MAC3E,cAAc;AAAA,QACZ,iKAA4J,OAAO,SAAS;AAAA,MAC9K;AAAA,IACF,CAAC;AAAA,EACH;AACA,MAAI,WAAW,OAAO,WAAW;AAC/B,WAAO,YAAY;AAAA,MACjB,MAAM;AAAA,MACN,SAAS,WAAW,OAAO,SAAS,oCAAoC,QAAQ;AAAA,MAChF,UAAU;AAAA,MACV,UAAU,CAAC,UAAU,WAAW,OAAO,SAAS,mBAAmB;AAAA,IACrE,CAAC;AAAA,EACH;AACA,SAAO;AACT;AAEA,eAAe,qBACb,SAC4B;AAC5B,MAAI,SAAS;AACb,QAAM,iBAA2B,CAAC;AAClC,QAAM,kBAA4B,CAAC;AACnC,MAAI,aAAa;AACjB,MAAI,iBAAiB;AACrB,QAAM,cAAwB,CAAC;AAE/B,aAAW,CAAC,KAAK,MAAM,KAAK,SAAS;AACnC,QAAI,kBAAkB,OAAO;AAC3B;AACA,qBAAe,KAAK,qBAAgB,GAAG;AAAA;AAAA,GAAQ,OAAO,OAAO,GAAG;AAChE;AAAA,IACF;AAEA,QAAI,kBAAkB,OAAO,iBAAiB;AAC5C,kBAAY,KAAK,GAAG;AACpB;AAAA,IACF;AAEA,UAAM,YAAY,WAAW,QAAQ,MAAM,OAAO,kBAAkB;AACpE,sBAAkB,UAAU;AAC5B,oBAAgB,KAAK,UAAU,EAAE;AACjC;AAAA,EACF;AAEA,QAAM,WAAW,CAAC,GAAG,gBAAgB,GAAG,eAAe;AACvD,SAAO,EAAE,YAAY,QAAQ,WAAW,GAAG,cAAc,OAAO,UAAU,gBAAgB,YAAY;AACxG;AAEA,SAAS,wBACP,eACA,cACA,WACQ;AACR,QAAM,SAAmB,CAAC;AAC1B,MAAI,gBAAgB,EAAG,QAAO,KAAK,gBAAM,aAAa,qBAAqB;AAC3E,MAAI,CAAC,cAAc;AACjB,WAAO,KAAK,2FAA4E;AAAA,EAC1F,WAAW,YAAY,GAAG;AACxB,WAAO,KAAK,gBAAM,SAAS,0BAA0B;AAAA,EACvD;AACA,SAAO,OAAO,SAAS,IAAI;AAAA,EAAK,OAAO,KAAK,KAAK,CAAC,KAAK;AACzD;AAEA,SAAS,mBACP,MACA,eACA,eACA,cACA,cACA,aACQ;AACR,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO;AAAA,IACP,YAAY,KAAK;AAAA,IACjB,YAAY,cAAc;AAAA,IAC1B,QAAQ,cAAc;AAAA,IACtB,GAAI,gBAAgB,EAAE,QAAQ,EAAE,cAAc,cAAc,eAAe,eAAe,EAAE,EAAE,IAAI,CAAC;AAAA,IACnG,eAAe;AAAA,IACf,SAAS;AAAA,EACX,CAAC;AAED,MAAI,OAAO,cAAc,SAAS,KAAK,aAAa;AAEpD,MAAI,cAAc,YAAY,SAAS,GAAG;AACxC,YAAQ;AACR,YAAQ,yBAAyB,OAAO,gBAAgB,eAAe,CAAC;AAAA;AACxE,eAAW,OAAO,cAAc,aAAa;AAC3C,cAAQ,KAAK,GAAG;AAAA;AAAA,IAClB;AACA,YAAQ;AAAA;AAAA,EACV;AAEA,SAAO,cAAc;AAAA,IACnB,OAAO,yBAAyB,cAAc,UAAU,IAAI,KAAK,MAAM;AAAA,IACvE,SAAS,cAAc;AAAA,IACvB;AAAA,IACA,WAAW;AAAA,MACT,cAAc,aAAa,IAAI,wDAAwD;AAAA,MACvF,cAAc,aAAa,IAAI,gDAAgD;AAAA,MAC/E,cAAc,SAAS,IAAI,mCAAmC;AAAA,IAChE,EAAE,OAAO,OAAO;AAAA,EAClB,CAAC;AACH;AAEA,SAAS,0BAA0B,OAAwB;AACzD,QAAM,kBAAkB,cAAc,KAAK;AAC3C,SAAO,YAAY;AAAA,IACjB,MAAM,gBAAgB;AAAA,IACtB,SAAS,gBAAgB;AAAA,IACzB,WAAW,gBAAgB;AAAA,IAC3B,UAAU;AAAA,IACV,UAAU,CAAC,0DAA0D;AAAA,IACrE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AACH;AAEA,eAAsB,qBACpB,MACA,UACA,cACA,WAAyB,eAC0B;AACnD,QAAM,YAAY,KAAK,IAAI;AAC3B,MAAI;AACF,UAAM,kBAAkB,kBAAkB,KAAK,MAAM;AACrD,QAAI,gBAAiB,QAAO,YAAY,eAAe;AAEvD,UAAM,eAAe,KAAK,KAAK,KAAK,SAAS,OAAO,UAAU;AAE9D,UAAM,SAAS,IAAI,QAAQ,YAAY,KAAK,MAAM,iBAAiB;AACnE,UAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AACxD,UAAM,SAAS,IAAI,aAAa,UAAU,YAAY;AACtD,UAAM,cAAc,MAAM,OAAO,cAAc,MAAM,IAAI;AACzD,UAAM,SAAS,SAAS,IAAI,KAAK,+BAA+B;AAEhE,UAAM,gBAAgB,MAAM,qBAAqB,YAAY,OAAO;AACpE,UAAM,SAAS,SAAS,IAAI,KAAK,iBAAiB;AAElD,UAAM,cAAc;AAAA,MAClB,YAAY;AAAA,MAAe;AAAA,MAAO;AAAA,IACpC;AACA,UAAM,UAAU;AAAA,MACd;AAAA,MAAM;AAAA,MAAe;AAAA,MAAM;AAAA,MAAc;AAAA,MAAG;AAAA,IAC9C;AAEA,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,WAAO,YAAY,SAAS;AAAA,MAC1B;AAAA,MACA,UAAU;AAAA,QACR,aAAa,KAAK;AAAA,QAClB,YAAY,cAAc;AAAA,QAC1B,QAAQ,cAAc;AAAA,QACtB,mBAAmB;AAAA,QACnB,iBAAiB,YAAY;AAAA,MAC/B;AAAA,IACF,CAAC;AAAA,EACH,SAAS,OAAO;AACd,WAAO,YAAY,0BAA0B,KAAK,CAAC;AAAA,EACrD;AACF;AAEO,SAAS,yBAAyB,QAAyB;AAChE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,EAAE,QAAQ,GAAG,QAAQ;AAC1B,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,MAAM,SAAS;AACrB,YAAM,WAAW,mBAAmB,KAAK,eAAe;AACxD,YAAM,SAAS,MAAM,mBAAmB,SAAS,IAAI,gBAAiB,QAAQ;AAE9E,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,yBAAyB,wBAAwB;AACpG,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;AAEO,SAAS,0BAA0B,QAAyB;AACjE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,EAAE,KAAK,GAAG,QAAQ;AACvB,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,MAAM,SAAS;AACrB,YAAM,WAAW,mBAAmB,KAAK,iBAAiB;AAC1D,YAAM,SAAS,MAAM;AAAA,QACnB;AAAA,QACA,IAAI;AAAA,QACJ,IAAI;AAAA,QACJ;AAAA,MACF;AAEA,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,wBAAwB,uBAAuB;AAClG,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/scrape.js
CHANGED
|
@@ -157,14 +157,11 @@ ${content}`);
|
|
|
157
157
|
}
|
|
158
158
|
function buildScrapeMetadata(params, metrics, tokensPerUrl, totalBatches, executionTime) {
|
|
159
159
|
return {
|
|
160
|
-
|
|
160
|
+
total_items: params.urls.length,
|
|
161
161
|
successful: metrics.successful,
|
|
162
162
|
failed: metrics.failed,
|
|
163
|
-
total_credits: metrics.totalCredits,
|
|
164
163
|
execution_time_ms: executionTime,
|
|
165
|
-
|
|
166
|
-
total_token_budget: TOKEN_BUDGETS.SCRAPER,
|
|
167
|
-
batches_processed: totalBatches
|
|
164
|
+
total_credits: metrics.totalCredits
|
|
168
165
|
};
|
|
169
166
|
}
|
|
170
167
|
function buildScrapeResponse(params, contents, metrics, tokensPerUrl, totalBatches, llmErrors, executionTime) {
|
|
@@ -223,9 +220,9 @@ async function handleScrapeLinks(params, reporter = NOOP_REPORTER) {
|
|
|
223
220
|
'search-reddit(queries=["topic discussion", "topic recommendations"]) \u2014 get community insights as an alternative'
|
|
224
221
|
]);
|
|
225
222
|
}
|
|
226
|
-
const enhancedInstruction = enhanceExtractionInstruction(params.
|
|
223
|
+
const enhancedInstruction = enhanceExtractionInstruction(params.extract);
|
|
227
224
|
await reporter.progress(35, 100, "Fetching page content");
|
|
228
|
-
const results = await clients.client.scrapeMultiple(validUrls, { timeout:
|
|
225
|
+
const results = await clients.client.scrapeMultiple(validUrls, { timeout: 300 });
|
|
229
226
|
mcpLog("info", `Scraping complete. Processing ${results.length} results...`, "scrape");
|
|
230
227
|
await reporter.log("info", `Fetched ${results.length} scrape response(s) from the provider`);
|
|
231
228
|
await reporter.progress(60, 100, "Cleaning and classifying scrape results");
|
|
@@ -262,7 +259,7 @@ function registerScrapeLinksTool(server) {
|
|
|
262
259
|
{
|
|
263
260
|
name: "scrape-links",
|
|
264
261
|
title: "Scrape Links",
|
|
265
|
-
description:
|
|
262
|
+
description: "Scrape up to 100 web pages and run LLM extraction on each. Returns only the data you specify in the extract field \u2014 everything else is filtered out.",
|
|
266
263
|
schema: scrapeLinksParamsSchema,
|
|
267
264
|
outputSchema: scrapeLinksOutputSchema,
|
|
268
265
|
annotations: {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/tools/scrape.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Scrape Links Tool Handler\n * Implements robust error handling that NEVER crashes the MCP server\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { SCRAPER, CONCURRENCY, getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { classifyError } from '../utils/errors.js';\nimport { pMap } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n TOKEN_BUDGETS,\n calculateTokenAllocation,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// Module-level singleton - MarkdownCleaner is stateless\nconst markdownCleaner = new MarkdownCleaner();\n\n// Extraction prefix/suffix are kept in runtime config to avoid YAML indirection.\nfunction getExtractionPrefix(): string {\n return SCRAPER.EXTRACTION_PREFIX;\n}\n\nfunction getExtractionSuffix(): string {\n return SCRAPER.EXTRACTION_SUFFIX;\n}\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${getExtractionPrefix()}\\n\\n${base}\\n\\n${getExtractionSuffix()}`;\n}\n\n// --- Internal types for decomposed helpers ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number;\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n// --- Helpers ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n totalUrls: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\nfunction validateAndPartitionUrls(urls: string[]): { validUrls: string[]; invalidUrls: string[] } {\n const validUrls: string[] = [];\n const invalidUrls: string[] = [];\n for (const url of urls) {\n try {\n new URL(url);\n validUrls.push(url);\n } catch {\n invalidUrls.push(url);\n }\n }\n return { validUrls, invalidUrls };\n}\n\nfunction initializeScrapeClients(): ScrapeClients {\n const client = new ScraperClient();\n const llmProcessor = createLLMProcessor();\n return { client, llmProcessor };\n}\n\nfunction processScrapeResults(\n results: Awaited<ReturnType<ScraperClient['scrapeMultiple']>>,\n invalidUrls: string[],\n): ScrapePhaseResult {\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (const invalidUrl of invalidUrls) {\n failed++;\n failedContents.push(`## ${invalidUrl}\\n\\n\u274C Invalid URL format`);\n }\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n if (!result) {\n failed++;\n failedContents.push(`## Unknown URL\\n\\n\u274C No result returned`);\n continue;\n }\n\n mcpLog('debug', `[${i + 1}/${results.length}] Processing ${result.url}`, 'scrape');\n\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || result.content || `HTTP ${result.statusCode}`;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: ${errorMsg}`);\n mcpLog('warning', `[${i + 1}/${results.length}] Failed: ${errorMsg}`, 'scrape');\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n content = markdownCleaner.processContent(result.content);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: i });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits } };\n}\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string,\n tokensPerUrl: number,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n): Promise<{ items: ProcessedResult[]; llmErrors: number }> {\n let llmErrors = 0;\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Returning raw scraped content.', 'scrape');\n }\n return { items: successItems, llmErrors };\n }\n\n mcpLog('info', `Starting parallel LLM extraction for ${successItems.length} pages (concurrency: ${CONCURRENCY.LLM_EXTRACTION})`, 'scrape');\n\n const llmResults = await pMap(successItems, async (item) => {\n mcpLog('debug', `LLM extracting ${item.url} (${tokensPerUrl} tokens)...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { use_llm: true, what_to_extract: enhancedInstruction, max_tokens: tokensPerUrl },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n mcpLog('debug', `LLM extraction complete for ${item.url}`, 'scrape');\n return { ...item, content: llmResult.content };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n return item;\n }, CONCURRENCY.LLM_EXTRACTION);\n\n return { items: llmResults, llmErrors };\n}\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const contents = [...failedContents];\n for (const item of successItems) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeMetadata(\n params: ScrapeLinksParams,\n metrics: ScrapeMetrics,\n tokensPerUrl: number,\n totalBatches: number,\n executionTime: number,\n): ScrapeLinksOutput['metadata'] {\n return {\n total_urls: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n total_credits: metrics.totalCredits,\n execution_time_ms: executionTime,\n tokens_per_url: tokensPerUrl,\n total_token_budget: TOKEN_BUDGETS.SCRAPER,\n batches_processed: totalBatches,\n };\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n tokensPerUrl: number,\n totalBatches: number,\n llmErrors: number,\n executionTime: number,\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n tokensPerItem: tokensPerUrl,\n batches: totalBatches,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...(llmErrors > 0 ? { 'LLM extraction failures': llmErrors } : {}),\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n nextSteps: [\n metrics.successful > 0 ? 'web-search or search-reddit to cross-check claims from scraped content' : null,\n metrics.failed > 0 ? 'Retry failed URLs with timeout=60' : null,\n ].filter(Boolean) as string[],\n metadata: {\n 'Execution time': formatDuration(executionTime),\n 'Token budget': TOKEN_BUDGETS.SCRAPER.toLocaleString(),\n },\n });\n\n const metadata = buildScrapeMetadata(params, metrics, tokensPerUrl, totalBatches, executionTime);\n return { content: formattedContent, structuredContent: { content: formattedContent, metadata } };\n}\n\n/**\n * Handle scrape links request\n * NEVER throws - always returns a valid response with content and metadata\n */\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime, params.urls?.length || 0);\n }\n\n const { validUrls, invalidUrls } = validateAndPartitionUrls(params.urls);\n await reporter.log('info', `Validated ${validUrls.length} scrapeable URL(s) and ${invalidUrls.length} invalid URL(s)`);\n\n if (validUrls.length === 0) {\n return createScrapeErrorResponse('INVALID_URLS', `All ${params.urls.length} URLs are invalid`, startTime, params.urls.length, false, [\n 'web-search(keywords=[\"topic documentation\", \"topic guide\"]) \u2014 search for valid URLs first, then scrape the results',\n 'search-reddit(queries=[\"topic recommendations\"]) \u2014 find discussion URLs to scrape instead',\n ]);\n }\n\n const tokensPerUrl = calculateTokenAllocation(validUrls.length, TOKEN_BUDGETS.SCRAPER);\n const totalBatches = Math.ceil(validUrls.length / SCRAPER.BATCH_SIZE);\n\n mcpLog('info', `Starting scrape: ${validUrls.length} URL(s), ${tokensPerUrl} tokens/URL, ${totalBatches} batch(es)`, 'scrape');\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n let clients: ScrapeClients;\n try {\n clients = initializeScrapeClients();\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse('CLIENT_INIT_FAILED', `Failed to initialize scraper: ${err.message}`, startTime, params.urls.length, false, [\n 'web-search(keywords=[\"topic key findings\", \"topic summary\", \"topic overview\"]) \u2014 search for information instead of scraping',\n 'search-reddit(queries=[\"topic discussion\", \"topic recommendations\"]) \u2014 get community insights as an alternative',\n ]);\n }\n\n const enhancedInstruction = enhanceExtractionInstruction(params.what_to_extract);\n\n await reporter.progress(35, 100, 'Fetching page content');\n const results = await clients.client.scrapeMultiple(validUrls, { timeout: params.timeout });\n mcpLog('info', `Scraping complete. Processing ${results.length} results...`, 'scrape');\n await reporter.log('info', `Fetched ${results.length} scrape response(s) from the provider`);\n await reporter.progress(60, 100, 'Cleaning and classifying scrape results');\n\n const { successItems, failedContents, metrics } = processScrapeResults(results, invalidUrls);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over scraped pages');\n }\n const { items: processedItems, llmErrors } = await processItemsWithLlm(\n successItems, enhancedInstruction, tokensPerUrl, clients.llmProcessor,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog('info', `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`, 'scrape');\n await reporter.log(\n 'info',\n `Scrape completed with ${metrics.successful} success(es), ${metrics.failed} failure(s), and ${llmErrors} LLM extraction issue(s)`,\n );\n\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n tokensPerUrl,\n totalBatches,\n llmErrors,\n executionTime,\n );\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Scrape 1-50 web pages and run LLM extraction on each. Provide what_to_extract with specific targets (e.g., \"Extract pricing tiers | feature limits | API rate limits\"). Token budget (32K) is split across URLs: 3 URLs get ~10K tokens each (deep), 10 get ~3K (balanced), 50 get ~640 (scan).',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
-
"mappings": "AAOA,SAAS,SAAS,aAAa,iBAAiB,4BAA4B;AAC5E;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAGP,MAAM,kBAAkB,IAAI,gBAAgB;AAG5C,SAAS,sBAA8B;AACrC,SAAO,QAAQ;AACjB;AAEA,SAAS,sBAA8B;AACrC,SAAO,QAAQ;AACjB;AAEA,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,oBAAoB,CAAC;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,oBAAoB,CAAC;AACxE;AA6BA,SAAS,0BACP,MACA,SACA,WACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAEA,SAAS,yBAAyB,MAAgE;AAChG,QAAM,YAAsB,CAAC;AAC7B,QAAM,cAAwB,CAAC;AAC/B,aAAW,OAAO,MAAM;AACtB,QAAI;AACF,UAAI,IAAI,GAAG;AACX,gBAAU,KAAK,GAAG;AAAA,IACpB,QAAQ;AACN,kBAAY,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AACA,SAAO,EAAE,WAAW,YAAY;AAClC;AAEA,SAAS,0BAAyC;AAChD,QAAM,SAAS,IAAI,cAAc;AACjC,QAAM,eAAe,mBAAmB;AACxC,SAAO,EAAE,QAAQ,aAAa;AAChC;AAEA,SAAS,qBACP,SACA,aACmB;AACnB,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,aAAW,cAAc,aAAa;AACpC;AACA,mBAAe,KAAK,MAAM,UAAU;AAAA;AAAA,0BAA0B;AAAA,EAChE;AAEA,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK;AAAA;AAAA,0BAAwC;AAC5D;AAAA,IACF;AAEA,WAAO,SAAS,IAAI,IAAI,CAAC,IAAI,QAAQ,MAAM,gBAAgB,OAAO,GAAG,IAAI,QAAQ;AAEjF,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AACrF,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,2BAA2B,QAAQ,EAAE;AACzE,aAAO,WAAW,IAAI,IAAI,CAAC,IAAI,QAAQ,MAAM,aAAa,QAAQ,IAAI,QAAQ;AAC9E;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,gBAAU,gBAAgB,eAAe,OAAO,OAAO;AAAA,IACzD,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,EAAE,CAAC;AAAA,EAC1D;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,aAAa,EAAE;AACvF;AAEA,eAAe,oBACb,cACA,qBACA,cACA,cAC0D;AAC1D,MAAI,YAAY;AAEhB,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,oFAAoF,QAAQ;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,UAAU;AAAA,EAC1C;AAEA,SAAO,QAAQ,wCAAwC,aAAa,MAAM,wBAAwB,YAAY,cAAc,KAAK,QAAQ;AAEzI,QAAM,aAAa,MAAM,KAAK,cAAc,OAAO,SAAS;AAC1D,WAAO,SAAS,kBAAkB,KAAK,GAAG,KAAK,YAAY,eAAe,QAAQ;AAElF,UAAM,YAAY,MAAM;AAAA,MACtB,KAAK;AAAA,MACL,EAAE,SAAS,MAAM,iBAAiB,qBAAqB,YAAY,aAAa;AAAA,MAChF;AAAA,IACF;AAEA,QAAI,UAAU,WAAW;AACvB,aAAO,SAAS,+BAA+B,KAAK,GAAG,IAAI,QAAQ;AACnE,aAAO,EAAE,GAAG,MAAM,SAAS,UAAU,QAAQ;AAAA,IAC/C;AAEA;AACA,WAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAO;AAAA,EACT,GAAG,YAAY,cAAc;AAE7B,SAAO,EAAE,OAAO,YAAY,UAAU;AACxC;AAEA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,cAAc;AAC/B,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,SACA,cACA,cACA,eAC+B;AAC/B,SAAO;AAAA,IACL,
|
|
4
|
+
"sourcesContent": ["/**\n * Scrape Links Tool Handler\n * Implements robust error handling that NEVER crashes the MCP server\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { SCRAPER, CONCURRENCY, getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { classifyError } from '../utils/errors.js';\nimport { pMap } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n TOKEN_BUDGETS,\n calculateTokenAllocation,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// Module-level singleton - MarkdownCleaner is stateless\nconst markdownCleaner = new MarkdownCleaner();\n\n// Extraction prefix/suffix are kept in runtime config to avoid YAML indirection.\nfunction getExtractionPrefix(): string {\n return SCRAPER.EXTRACTION_PREFIX;\n}\n\nfunction getExtractionSuffix(): string {\n return SCRAPER.EXTRACTION_SUFFIX;\n}\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${getExtractionPrefix()}\\n\\n${base}\\n\\n${getExtractionSuffix()}`;\n}\n\n// --- Internal types for decomposed helpers ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number;\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n// --- Helpers ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n totalUrls: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\nfunction validateAndPartitionUrls(urls: string[]): { validUrls: string[]; invalidUrls: string[] } {\n const validUrls: string[] = [];\n const invalidUrls: string[] = [];\n for (const url of urls) {\n try {\n new URL(url);\n validUrls.push(url);\n } catch {\n invalidUrls.push(url);\n }\n }\n return { validUrls, invalidUrls };\n}\n\nfunction initializeScrapeClients(): ScrapeClients {\n const client = new ScraperClient();\n const llmProcessor = createLLMProcessor();\n return { client, llmProcessor };\n}\n\nfunction processScrapeResults(\n results: Awaited<ReturnType<ScraperClient['scrapeMultiple']>>,\n invalidUrls: string[],\n): ScrapePhaseResult {\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (const invalidUrl of invalidUrls) {\n failed++;\n failedContents.push(`## ${invalidUrl}\\n\\n\u274C Invalid URL format`);\n }\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n if (!result) {\n failed++;\n failedContents.push(`## Unknown URL\\n\\n\u274C No result returned`);\n continue;\n }\n\n mcpLog('debug', `[${i + 1}/${results.length}] Processing ${result.url}`, 'scrape');\n\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || result.content || `HTTP ${result.statusCode}`;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: ${errorMsg}`);\n mcpLog('warning', `[${i + 1}/${results.length}] Failed: ${errorMsg}`, 'scrape');\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n content = markdownCleaner.processContent(result.content);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: i });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits } };\n}\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string,\n tokensPerUrl: number,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n): Promise<{ items: ProcessedResult[]; llmErrors: number }> {\n let llmErrors = 0;\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Returning raw scraped content.', 'scrape');\n }\n return { items: successItems, llmErrors };\n }\n\n mcpLog('info', `Starting parallel LLM extraction for ${successItems.length} pages (concurrency: ${CONCURRENCY.LLM_EXTRACTION})`, 'scrape');\n\n const llmResults = await pMap(successItems, async (item) => {\n mcpLog('debug', `LLM extracting ${item.url} (${tokensPerUrl} tokens)...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { use_llm: true, what_to_extract: enhancedInstruction, max_tokens: tokensPerUrl },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n mcpLog('debug', `LLM extraction complete for ${item.url}`, 'scrape');\n return { ...item, content: llmResult.content };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n return item;\n }, CONCURRENCY.LLM_EXTRACTION);\n\n return { items: llmResults, llmErrors };\n}\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const contents = [...failedContents];\n for (const item of successItems) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeMetadata(\n params: ScrapeLinksParams,\n metrics: ScrapeMetrics,\n tokensPerUrl: number,\n totalBatches: number,\n executionTime: number,\n): ScrapeLinksOutput['metadata'] {\n return {\n total_items: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n execution_time_ms: executionTime,\n total_credits: metrics.totalCredits,\n };\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n tokensPerUrl: number,\n totalBatches: number,\n llmErrors: number,\n executionTime: number,\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n tokensPerItem: tokensPerUrl,\n batches: totalBatches,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...(llmErrors > 0 ? { 'LLM extraction failures': llmErrors } : {}),\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n nextSteps: [\n metrics.successful > 0 ? 'web-search or search-reddit to cross-check claims from scraped content' : null,\n metrics.failed > 0 ? 'Retry failed URLs with timeout=60' : null,\n ].filter(Boolean) as string[],\n metadata: {\n 'Execution time': formatDuration(executionTime),\n 'Token budget': TOKEN_BUDGETS.SCRAPER.toLocaleString(),\n },\n });\n\n const metadata = buildScrapeMetadata(params, metrics, tokensPerUrl, totalBatches, executionTime);\n return { content: formattedContent, structuredContent: { content: formattedContent, metadata } };\n}\n\n/**\n * Handle scrape links request\n * NEVER throws - always returns a valid response with content and metadata\n */\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime, params.urls?.length || 0);\n }\n\n const { validUrls, invalidUrls } = validateAndPartitionUrls(params.urls);\n await reporter.log('info', `Validated ${validUrls.length} scrapeable URL(s) and ${invalidUrls.length} invalid URL(s)`);\n\n if (validUrls.length === 0) {\n return createScrapeErrorResponse('INVALID_URLS', `All ${params.urls.length} URLs are invalid`, startTime, params.urls.length, false, [\n 'web-search(keywords=[\"topic documentation\", \"topic guide\"]) \u2014 search for valid URLs first, then scrape the results',\n 'search-reddit(queries=[\"topic recommendations\"]) \u2014 find discussion URLs to scrape instead',\n ]);\n }\n\n const tokensPerUrl = calculateTokenAllocation(validUrls.length, TOKEN_BUDGETS.SCRAPER);\n const totalBatches = Math.ceil(validUrls.length / SCRAPER.BATCH_SIZE);\n\n mcpLog('info', `Starting scrape: ${validUrls.length} URL(s), ${tokensPerUrl} tokens/URL, ${totalBatches} batch(es)`, 'scrape');\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n let clients: ScrapeClients;\n try {\n clients = initializeScrapeClients();\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse('CLIENT_INIT_FAILED', `Failed to initialize scraper: ${err.message}`, startTime, params.urls.length, false, [\n 'web-search(keywords=[\"topic key findings\", \"topic summary\", \"topic overview\"]) \u2014 search for information instead of scraping',\n 'search-reddit(queries=[\"topic discussion\", \"topic recommendations\"]) \u2014 get community insights as an alternative',\n ]);\n }\n\n const enhancedInstruction = enhanceExtractionInstruction(params.extract);\n\n await reporter.progress(35, 100, 'Fetching page content');\n const results = await clients.client.scrapeMultiple(validUrls, { timeout: 300 });\n mcpLog('info', `Scraping complete. Processing ${results.length} results...`, 'scrape');\n await reporter.log('info', `Fetched ${results.length} scrape response(s) from the provider`);\n await reporter.progress(60, 100, 'Cleaning and classifying scrape results');\n\n const { successItems, failedContents, metrics } = processScrapeResults(results, invalidUrls);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over scraped pages');\n }\n const { items: processedItems, llmErrors } = await processItemsWithLlm(\n successItems, enhancedInstruction, tokensPerUrl, clients.llmProcessor,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog('info', `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`, 'scrape');\n await reporter.log(\n 'info',\n `Scrape completed with ${metrics.successful} success(es), ${metrics.failed} failure(s), and ${llmErrors} LLM extraction issue(s)`,\n );\n\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n tokensPerUrl,\n totalBatches,\n llmErrors,\n executionTime,\n );\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Scrape up to 100 web pages and run LLM extraction on each. Returns only the data you specify in the extract field \u2014 everything else is filtered out.',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
+
"mappings": "AAOA,SAAS,SAAS,aAAa,iBAAiB,4BAA4B;AAC5E;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAGP,MAAM,kBAAkB,IAAI,gBAAgB;AAG5C,SAAS,sBAA8B;AACrC,SAAO,QAAQ;AACjB;AAEA,SAAS,sBAA8B;AACrC,SAAO,QAAQ;AACjB;AAEA,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,oBAAoB,CAAC;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,oBAAoB,CAAC;AACxE;AA6BA,SAAS,0BACP,MACA,SACA,WACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAEA,SAAS,yBAAyB,MAAgE;AAChG,QAAM,YAAsB,CAAC;AAC7B,QAAM,cAAwB,CAAC;AAC/B,aAAW,OAAO,MAAM;AACtB,QAAI;AACF,UAAI,IAAI,GAAG;AACX,gBAAU,KAAK,GAAG;AAAA,IACpB,QAAQ;AACN,kBAAY,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AACA,SAAO,EAAE,WAAW,YAAY;AAClC;AAEA,SAAS,0BAAyC;AAChD,QAAM,SAAS,IAAI,cAAc;AACjC,QAAM,eAAe,mBAAmB;AACxC,SAAO,EAAE,QAAQ,aAAa;AAChC;AAEA,SAAS,qBACP,SACA,aACmB;AACnB,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,aAAW,cAAc,aAAa;AACpC;AACA,mBAAe,KAAK,MAAM,UAAU;AAAA;AAAA,0BAA0B;AAAA,EAChE;AAEA,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK;AAAA;AAAA,0BAAwC;AAC5D;AAAA,IACF;AAEA,WAAO,SAAS,IAAI,IAAI,CAAC,IAAI,QAAQ,MAAM,gBAAgB,OAAO,GAAG,IAAI,QAAQ;AAEjF,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AACrF,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,2BAA2B,QAAQ,EAAE;AACzE,aAAO,WAAW,IAAI,IAAI,CAAC,IAAI,QAAQ,MAAM,aAAa,QAAQ,IAAI,QAAQ;AAC9E;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,gBAAU,gBAAgB,eAAe,OAAO,OAAO;AAAA,IACzD,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,EAAE,CAAC;AAAA,EAC1D;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,aAAa,EAAE;AACvF;AAEA,eAAe,oBACb,cACA,qBACA,cACA,cAC0D;AAC1D,MAAI,YAAY;AAEhB,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,oFAAoF,QAAQ;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,UAAU;AAAA,EAC1C;AAEA,SAAO,QAAQ,wCAAwC,aAAa,MAAM,wBAAwB,YAAY,cAAc,KAAK,QAAQ;AAEzI,QAAM,aAAa,MAAM,KAAK,cAAc,OAAO,SAAS;AAC1D,WAAO,SAAS,kBAAkB,KAAK,GAAG,KAAK,YAAY,eAAe,QAAQ;AAElF,UAAM,YAAY,MAAM;AAAA,MACtB,KAAK;AAAA,MACL,EAAE,SAAS,MAAM,iBAAiB,qBAAqB,YAAY,aAAa;AAAA,MAChF;AAAA,IACF;AAEA,QAAI,UAAU,WAAW;AACvB,aAAO,SAAS,+BAA+B,KAAK,GAAG,IAAI,QAAQ;AACnE,aAAO,EAAE,GAAG,MAAM,SAAS,UAAU,QAAQ;AAAA,IAC/C;AAEA;AACA,WAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAO;AAAA,EACT,GAAG,YAAY,cAAc;AAE7B,SAAO,EAAE,OAAO,YAAY,UAAU;AACxC;AAEA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,cAAc;AAC/B,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,SACA,cACA,cACA,eAC+B;AAC/B,SAAO;AAAA,IACL,aAAa,OAAO,KAAK;AAAA,IACzB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,mBAAmB;AAAA,IACnB,eAAe,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,oBACP,QACA,UACA,SACA,cACA,cACA,WACA,eAC2D;AAC3D,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO,oBAAoB,OAAO,KAAK,MAAM;AAAA,IAC7C,YAAY,OAAO,KAAK;AAAA,IACxB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,eAAe;AAAA,IACf,SAAS;AAAA,IACT,QAAQ;AAAA,MACN,gBAAgB,QAAQ;AAAA,MACxB,GAAI,YAAY,IAAI,EAAE,2BAA2B,UAAU,IAAI,CAAC;AAAA,IAClE;AAAA,EACF,CAAC;AAED,QAAM,mBAAmB,cAAc;AAAA,IACrC,OAAO;AAAA,IACP,SAAS;AAAA,IACT,MAAM,SAAS,KAAK,aAAa;AAAA,IACjC,WAAW;AAAA,MACT,QAAQ,aAAa,IAAI,2EAA2E;AAAA,MACpG,QAAQ,SAAS,IAAI,sCAAsC;AAAA,IAC7D,EAAE,OAAO,OAAO;AAAA,IAChB,UAAU;AAAA,MACR,kBAAkB,eAAe,aAAa;AAAA,MAC9C,gBAAgB,cAAc,QAAQ,eAAe;AAAA,IACvD;AAAA,EACF,CAAC;AAED,QAAM,WAAW,oBAAoB,QAAQ,SAAS,cAAc,cAAc,aAAa;AAC/F,SAAO,EAAE,SAAS,kBAAkB,mBAAmB,EAAE,SAAS,kBAAkB,SAAS,EAAE;AACjG;AAMA,eAAsB,kBACpB,QACA,WAAyB,eACwB;AACjD,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI,CAAC,OAAO,QAAQ,OAAO,KAAK,WAAW,GAAG;AAC5C,WAAO,0BAA0B,WAAW,oBAAoB,WAAW,OAAO,MAAM,UAAU,CAAC;AAAA,EACrG;AAEA,QAAM,EAAE,WAAW,YAAY,IAAI,yBAAyB,OAAO,IAAI;AACvE,QAAM,SAAS,IAAI,QAAQ,aAAa,UAAU,MAAM,0BAA0B,YAAY,MAAM,iBAAiB;AAErH,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,0BAA0B,gBAAgB,OAAO,OAAO,KAAK,MAAM,qBAAqB,WAAW,OAAO,KAAK,QAAQ,OAAO;AAAA,MACnI;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAEA,QAAM,eAAe,yBAAyB,UAAU,QAAQ,cAAc,OAAO;AACrF,QAAM,eAAe,KAAK,KAAK,UAAU,SAAS,QAAQ,UAAU;AAEpE,SAAO,QAAQ,oBAAoB,UAAU,MAAM,YAAY,YAAY,gBAAgB,YAAY,cAAc,QAAQ;AAC7H,QAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAE5D,MAAI;AACJ,MAAI;AACF,cAAU,wBAAwB;AAAA,EACpC,SAAS,OAAO;AACd,UAAM,MAAM,cAAc,KAAK;AAC/B,WAAO,0BAA0B,sBAAsB,iCAAiC,IAAI,OAAO,IAAI,WAAW,OAAO,KAAK,QAAQ,OAAO;AAAA,MAC3I;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAEA,QAAM,sBAAsB,6BAA6B,OAAO,OAAO;AAEvE,QAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AACxD,QAAM,UAAU,MAAM,QAAQ,OAAO,eAAe,WAAW,EAAE,SAAS,IAAI,CAAC;AAC/E,SAAO,QAAQ,iCAAiC,QAAQ,MAAM,eAAe,QAAQ;AACrF,QAAM,SAAS,IAAI,QAAQ,WAAW,QAAQ,MAAM,uCAAuC;AAC3F,QAAM,SAAS,SAAS,IAAI,KAAK,yCAAyC;AAE1E,QAAM,EAAE,cAAc,gBAAgB,QAAQ,IAAI,qBAAqB,SAAS,WAAW;AAE3F,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,SAAS,SAAS,IAAI,KAAK,2CAA2C;AAAA,EAC9E;AACA,QAAM,EAAE,OAAO,gBAAgB,UAAU,IAAI,MAAM;AAAA,IACjD;AAAA,IAAc;AAAA,IAAqB;AAAA,IAAc,QAAQ;AAAA,EAC3D;AAEA,QAAM,WAAW,uBAAuB,gBAAgB,cAAc;AACtE,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC,SAAO,QAAQ,cAAc,QAAQ,UAAU,gBAAgB,QAAQ,MAAM,YAAY,QAAQ,YAAY,iBAAiB,QAAQ;AACtI,QAAM,SAAS;AAAA,IACb;AAAA,IACA,yBAAyB,QAAQ,UAAU,iBAAiB,QAAQ,MAAM,oBAAoB,SAAS;AAAA,EACzG;AAEA,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,YAAY,OAAO,SAAS,OAAO,iBAAiB;AAC7D;AAEO,SAAS,wBAAwB,QAAyB;AAC/D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,UAAU;AAC/B,eAAO,eAAe,YAAY,qBAAqB,UAAU,CAAC,CAAC;AAAA,MACrE;AAEA,YAAM,WAAW,mBAAmB,KAAK,cAAc;AACvD,YAAM,SAAS,MAAM,kBAAkB,MAAM,QAAQ;AAErD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/search.js
CHANGED
|
@@ -41,7 +41,7 @@ function buildRawOutput(keywords, aggregation, searches) {
|
|
|
41
41
|
aggregation.thresholdNote
|
|
42
42
|
);
|
|
43
43
|
}
|
|
44
|
-
function buildClassifiedOutput(classification, aggregation,
|
|
44
|
+
function buildClassifiedOutput(classification, aggregation, extract, totalKeywords) {
|
|
45
45
|
const rankedUrls = aggregation.rankedUrls;
|
|
46
46
|
const urlByRank = new Map(rankedUrls.map((u) => [u.rank, u]));
|
|
47
47
|
const tiers = {
|
|
@@ -62,7 +62,7 @@ function buildClassifiedOutput(classification, aggregation, objective, totalKeyw
|
|
|
62
62
|
}
|
|
63
63
|
const lines = [];
|
|
64
64
|
lines.push(`## ${classification.title}`);
|
|
65
|
-
lines.push(`>
|
|
65
|
+
lines.push(`> Looking for: ${extract}`);
|
|
66
66
|
lines.push(`> ${totalKeywords} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);
|
|
67
67
|
lines.push("");
|
|
68
68
|
lines.push(`**Summary:** ${classification.synthesis}`);
|
|
@@ -120,12 +120,10 @@ function buildMetadata(aggregation, executionTime, totalKeywords, searches, llmC
|
|
|
120
120
|
});
|
|
121
121
|
const lowYieldKeywords = searches.filter((s) => s.results.length <= 1).map((s) => s.keyword);
|
|
122
122
|
return {
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
total_items: totalKeywords,
|
|
124
|
+
successful: aggregation.rankedUrls.length,
|
|
125
|
+
failed: totalKeywords - searches.filter((s) => s.results.length > 0).length,
|
|
125
126
|
execution_time_ms: executionTime,
|
|
126
|
-
total_unique_urls: aggregation.totalUniqueUrls,
|
|
127
|
-
consensus_url_count: aggregation.rankedUrls.filter((u) => u.isConsensus).length,
|
|
128
|
-
frequency_threshold: aggregation.frequencyThreshold,
|
|
129
127
|
llm_classified: llmClassified,
|
|
130
128
|
...llmError ? { llm_error: llmError } : {},
|
|
131
129
|
coverage_summary: coverageSummary,
|
|
@@ -151,16 +149,16 @@ function buildWebSearchError(error, params, startTime) {
|
|
|
151
149
|
`${errorContent}
|
|
152
150
|
|
|
153
151
|
Execution time: ${formatDuration(executionTime)}
|
|
154
|
-
|
|
152
|
+
Queries: ${params.queries.length}`
|
|
155
153
|
);
|
|
156
154
|
}
|
|
157
155
|
async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
158
156
|
const startTime = Date.now();
|
|
159
157
|
try {
|
|
160
|
-
mcpLog("info", `Searching for ${params.
|
|
161
|
-
await reporter.log("info", `Searching for ${params.
|
|
158
|
+
mcpLog("info", `Searching for ${params.queries.length} query/queries`, "search");
|
|
159
|
+
await reporter.log("info", `Searching for ${params.queries.length} query/queries`);
|
|
162
160
|
await reporter.progress(15, 100, "Submitting search queries");
|
|
163
|
-
const response = await executeSearches(params.
|
|
161
|
+
const response = await executeSearches(params.queries);
|
|
164
162
|
await reporter.progress(50, 100, "Collected search results");
|
|
165
163
|
const { aggregation } = processResults(response);
|
|
166
164
|
await reporter.log(
|
|
@@ -177,13 +175,13 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
|
177
175
|
llmError = "LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.";
|
|
178
176
|
mcpLog("warning", llmError, "search");
|
|
179
177
|
}
|
|
180
|
-
markdown = buildRawOutput(params.
|
|
178
|
+
markdown = buildRawOutput(params.queries, aggregation, response.searches);
|
|
181
179
|
await reporter.progress(80, 100, "Ranking search results");
|
|
182
180
|
} else {
|
|
183
181
|
await reporter.progress(65, 100, "Classifying results by relevance");
|
|
184
182
|
const classification = await classifySearchResults(
|
|
185
183
|
aggregation.rankedUrls,
|
|
186
|
-
params.
|
|
184
|
+
params.extract,
|
|
187
185
|
response.totalKeywords,
|
|
188
186
|
llmProcessor
|
|
189
187
|
);
|
|
@@ -191,7 +189,7 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
|
191
189
|
markdown = buildClassifiedOutput(
|
|
192
190
|
classification.result,
|
|
193
191
|
aggregation,
|
|
194
|
-
params.
|
|
192
|
+
params.extract,
|
|
195
193
|
response.totalKeywords
|
|
196
194
|
);
|
|
197
195
|
llmClassified = true;
|
|
@@ -199,7 +197,7 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
|
199
197
|
} else {
|
|
200
198
|
llmError = classification.error ?? "Unknown classification error";
|
|
201
199
|
mcpLog("warning", `Classification failed, falling back to raw: ${llmError}`, "search");
|
|
202
|
-
markdown = buildRawOutput(params.
|
|
200
|
+
markdown = buildRawOutput(params.queries, aggregation, response.searches);
|
|
203
201
|
await reporter.progress(85, 100, "Classification failed, using raw output");
|
|
204
202
|
}
|
|
205
203
|
}
|
|
@@ -216,7 +214,7 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
|
216
214
|
await reporter.log("info", `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);
|
|
217
215
|
const footer = `
|
|
218
216
|
---
|
|
219
|
-
*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs
|
|
217
|
+
*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? " | LLM classified" : ""}*`;
|
|
220
218
|
const fullMarkdown = markdown + footer;
|
|
221
219
|
return toolSuccess(fullMarkdown, { content: fullMarkdown, metadata });
|
|
222
220
|
} catch (error) {
|
|
@@ -228,7 +226,7 @@ function registerWebSearchTool(server) {
|
|
|
228
226
|
{
|
|
229
227
|
name: "web-search",
|
|
230
228
|
title: "Web Search",
|
|
231
|
-
description: "
|
|
229
|
+
description: "Run up to 100 Google searches in parallel, aggregate and deduplicate results, then classify each URL by relevance to your extract goal. Returns a tiered table: highly relevant, maybe relevant, and other. Set raw=true for unclassified ranked results.",
|
|
232
230
|
schema: webSearchParamsSchema,
|
|
233
231
|
outputSchema: webSearchOutputSchema,
|
|
234
232
|
annotations: {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/tools/search.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport { createLLMProcessor, classifySearchResults, type ClassificationResult } from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalKeywords: number;\n}\n\n// --- Helpers ---\n\nasync function executeSearches(keywords: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(keywords);\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n consensusUrls: SearchAggregation['rankedUrls'];\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n const consensusUrls = aggregation.rankedUrls.filter(u => u.isConsensus);\n return { aggregation, consensusUrls };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n keywords: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, keywords, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n );\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n objective: string,\n totalKeywords: number,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build lookup from rank \u2192 url data\n const urlByRank = new Map(rankedUrls.map(u => [u.rank, u]));\n\n // Build tier \u2192 entries mapping\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n // Classify based on LLM response\n const tierMap = new Map(classification.results.map(r => [r.rank, r.tier]));\n\n for (const url of rankedUrls) {\n const tier = tierMap.get(url.rank);\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title and synthesis\n lines.push(`## ${classification.title}`);\n lines.push(`> Objective: ${objective}`);\n lines.push(`> ${totalKeywords} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n lines.push('');\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Seen in |');\n lines.push('|---|-----|---------|');\n for (const url of tiers.high) {\n const coveragePct = Math.round(url.coverageRatio * 100);\n const queries = url.queries.map(q => `\"${q}\"`).join(', ');\n lines.push(`| ${url.rank} | [${url.title}](${url.url}) | ${url.frequency}/${totalKeywords} (${coveragePct}%) |`);\n }\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Seen in |');\n lines.push('|---|-----|---------|');\n for (const url of tiers.maybe) {\n const coveragePct = Math.round(url.coverageRatio * 100);\n lines.push(`| ${url.rank} | [${url.title}](${url.url}) | ${url.frequency}/${totalKeywords} (${coveragePct}%) |`);\n }\n lines.push('');\n }\n\n // Other tier \u2014 with keyword attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Score | Keywords |');\n lines.push('|---|-----|-------|----------|');\n for (const url of tiers.other) {\n const keywords = url.queries.map(q => `\"${q}\"`).join(', ');\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${url.score.toFixed(1)} | ${keywords} |`);\n }\n lines.push('');\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalKeywords: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { keyword: s.keyword, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldKeywords = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.keyword);\n\n return {\n total_keywords: totalKeywords,\n total_results: aggregation.rankedUrls.length,\n execution_time_ms: executionTime,\n total_unique_urls: aggregation.totalUniqueUrls,\n consensus_url_count: aggregation.rankedUrls.filter(u => u.isConsensus).length,\n frequency_threshold: aggregation.frequencyThreshold,\n llm_classified: llmClassified,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldKeywords.length > 0 ? { low_yield_keywords: lowYieldKeywords } : {}),\n };\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'search-reddit(queries=[\"topic recommendations\"]) \u2014 returns Reddit URLs via Google search',\n 'scrape-links(urls=[...], what_to_extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nKeywords: ${params.keywords.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n mcpLog('info', `Searching for ${params.keywords.length} keyword(s)`, 'search');\n await reporter.log('info', `Searching for ${params.keywords.length} keyword(s)`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const response = await executeSearches(params.keywords);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalKeywords} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = useRaw ? null : createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n }\n markdown = buildRawOutput(params.keywords, aggregation, response.searches);\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.objective,\n response.totalKeywords,\n llmProcessor,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.objective, response.totalKeywords,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n markdown = buildRawOutput(params.keywords, aggregation, response.searches);\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalKeywords, response.searches, llmClassified, llmError,\n );\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs | ${metadata.consensus_url_count} consensus | threshold \u2265${aggregation.frequencyThreshold}${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { content: fullMarkdown, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Search Google with 1-100 keywords in parallel. Each keyword runs as a separate search; results are aggregated, deduplicated, and ranked. Provide an objective describing what you need \u2014 an LLM classifies every result into 3 tiers (highly relevant / maybe relevant / other) based on titles and snippets alone (no URL fetching). Output: synthesis paragraph + tiered markdown table. Set raw=true to skip classification and get the traditional consensus-ranked URL list.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
-
"mappings": "AAOA,SAAS,iBAAiB,4BAA4B;AACtD;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB,6BAAwD;AACrF,SAAS,qBAAqB;AAC9B;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAkBP,eAAe,gBAAgB,UAA6C;AAC1E,QAAM,SAAS,IAAI,aAAa;AAChC,SAAO,OAAO,eAAe,QAAQ;AACvC;AAEA,SAAS,eAAe,UAGtB;AACA,QAAM,cAAc,iBAAiB,SAAS,UAAU,CAAC;AACzD,QAAM,gBAAgB,YAAY,WAAW,OAAO,OAAK,EAAE,WAAW;AACtE,SAAO,EAAE,aAAa,cAAc;AACtC;AAIA,SAAS,eACP,UACA,aACA,UACQ;AACR,SAAO;AAAA,IACL,YAAY;AAAA,IAAY;AAAA,IAAU;AAAA,IAClC,YAAY;AAAA,IACZ,YAAY;AAAA,IAAoB,YAAY;AAAA,EAC9C;AACF;AAIA,SAAS,sBACP,gBACA,aACA,
|
|
4
|
+
"sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport { createLLMProcessor, classifySearchResults, type ClassificationResult } from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalKeywords: number;\n}\n\n// --- Helpers ---\n\nasync function executeSearches(keywords: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(keywords);\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n consensusUrls: SearchAggregation['rankedUrls'];\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n const consensusUrls = aggregation.rankedUrls.filter(u => u.isConsensus);\n return { aggregation, consensusUrls };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n keywords: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, keywords, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n );\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n extract: string,\n totalKeywords: number,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build lookup from rank \u2192 url data\n const urlByRank = new Map(rankedUrls.map(u => [u.rank, u]));\n\n // Build tier \u2192 entries mapping\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n // Classify based on LLM response\n const tierMap = new Map(classification.results.map(r => [r.rank, r.tier]));\n\n for (const url of rankedUrls) {\n const tier = tierMap.get(url.rank);\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title and synthesis\n lines.push(`## ${classification.title}`);\n lines.push(`> Looking for: ${extract}`);\n lines.push(`> ${totalKeywords} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n lines.push('');\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Seen in |');\n lines.push('|---|-----|---------|');\n for (const url of tiers.high) {\n const coveragePct = Math.round(url.coverageRatio * 100);\n const queries = url.queries.map(q => `\"${q}\"`).join(', ');\n lines.push(`| ${url.rank} | [${url.title}](${url.url}) | ${url.frequency}/${totalKeywords} (${coveragePct}%) |`);\n }\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Seen in |');\n lines.push('|---|-----|---------|');\n for (const url of tiers.maybe) {\n const coveragePct = Math.round(url.coverageRatio * 100);\n lines.push(`| ${url.rank} | [${url.title}](${url.url}) | ${url.frequency}/${totalKeywords} (${coveragePct}%) |`);\n }\n lines.push('');\n }\n\n // Other tier \u2014 with keyword attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Score | Keywords |');\n lines.push('|---|-----|-------|----------|');\n for (const url of tiers.other) {\n const keywords = url.queries.map(q => `\"${q}\"`).join(', ');\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${url.score.toFixed(1)} | ${keywords} |`);\n }\n lines.push('');\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalKeywords: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { keyword: s.keyword, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldKeywords = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.keyword);\n\n return {\n total_items: totalKeywords,\n successful: aggregation.rankedUrls.length,\n failed: totalKeywords - searches.filter(s => s.results.length > 0).length,\n execution_time_ms: executionTime,\n llm_classified: llmClassified,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldKeywords.length > 0 ? { low_yield_keywords: lowYieldKeywords } : {}),\n };\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'search-reddit(queries=[\"topic recommendations\"]) \u2014 returns Reddit URLs via Google search',\n 'scrape-links(urls=[...], what_to_extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nQueries: ${params.queries.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n mcpLog('info', `Searching for ${params.queries.length} query/queries`, 'search');\n await reporter.log('info', `Searching for ${params.queries.length} query/queries`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const response = await executeSearches(params.queries);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalKeywords} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = useRaw ? null : createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n }\n markdown = buildRawOutput(params.queries, aggregation, response.searches);\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.extract,\n response.totalKeywords,\n llmProcessor,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.extract, response.totalKeywords,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n markdown = buildRawOutput(params.queries, aggregation, response.searches);\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalKeywords, response.searches, llmClassified, llmError,\n );\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { content: fullMarkdown, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Run up to 100 Google searches in parallel, aggregate and deduplicate results, then classify each URL by relevance to your extract goal. Returns a tiered table: highly relevant, maybe relevant, and other. Set raw=true for unclassified ranked results.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
+
"mappings": "AAOA,SAAS,iBAAiB,4BAA4B;AACtD;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB,6BAAwD;AACrF,SAAS,qBAAqB;AAC9B;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAkBP,eAAe,gBAAgB,UAA6C;AAC1E,QAAM,SAAS,IAAI,aAAa;AAChC,SAAO,OAAO,eAAe,QAAQ;AACvC;AAEA,SAAS,eAAe,UAGtB;AACA,QAAM,cAAc,iBAAiB,SAAS,UAAU,CAAC;AACzD,QAAM,gBAAgB,YAAY,WAAW,OAAO,OAAK,EAAE,WAAW;AACtE,SAAO,EAAE,aAAa,cAAc;AACtC;AAIA,SAAS,eACP,UACA,aACA,UACQ;AACR,SAAO;AAAA,IACL,YAAY;AAAA,IAAY;AAAA,IAAU;AAAA,IAClC,YAAY;AAAA,IACZ,YAAY;AAAA,IAAoB,YAAY;AAAA,EAC9C;AACF;AAIA,SAAS,sBACP,gBACA,aACA,SACA,eACQ;AACR,QAAM,aAAa,YAAY;AAG/B,QAAM,YAAY,IAAI,IAAI,WAAW,IAAI,OAAK,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;AAG1D,QAAM,QAAQ;AAAA,IACZ,MAAM,CAAC;AAAA,IACP,OAAO,CAAC;AAAA,IACR,OAAO,CAAC;AAAA,EACV;AAGA,QAAM,UAAU,IAAI,IAAI,eAAe,QAAQ,IAAI,OAAK,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;AAEzE,aAAW,OAAO,YAAY;AAC5B,UAAM,OAAO,QAAQ,IAAI,IAAI,IAAI;AACjC,QAAI,SAAS,mBAAmB;AAC9B,YAAM,KAAK,KAAK,GAAG;AAAA,IACrB,WAAW,SAAS,kBAAkB;AACpC,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB,OAAO;AACL,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AAEA,QAAM,QAAkB,CAAC;AAGzB,QAAM,KAAK,MAAM,eAAe,KAAK,EAAE;AACvC,QAAM,KAAK,kBAAkB,OAAO,EAAE;AACtC,QAAM,KAAK,KAAK,aAAa,mBAAc,WAAW,MAAM,gBAAW,MAAM,KAAK,MAAM,qBAAqB,MAAM,MAAM,MAAM,oBAAoB;AACnJ,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,gBAAgB,eAAe,SAAS,EAAE;AACrD,QAAM,KAAK,EAAE;AAGb,MAAI,MAAM,KAAK,SAAS,GAAG;AACzB,UAAM,KAAK,wBAAwB,MAAM,KAAK,MAAM,GAAG;AACvD,UAAM,KAAK,uBAAuB;AAClC,UAAM,KAAK,uBAAuB;AAClC,eAAW,OAAO,MAAM,MAAM;AAC5B,YAAM,cAAc,KAAK,MAAM,IAAI,gBAAgB,GAAG;AACtD,YAAM,UAAU,IAAI,QAAQ,IAAI,OAAK,IAAI,CAAC,GAAG,EAAE,KAAK,IAAI;AACxD,YAAM,KAAK,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,OAAO,IAAI,SAAS,IAAI,aAAa,KAAK,WAAW,MAAM;AAAA,IACjH;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,uBAAuB,MAAM,MAAM,MAAM,GAAG;AACvD,UAAM,KAAK,uBAAuB;AAClC,UAAM,KAAK,uBAAuB;AAClC,eAAW,OAAO,MAAM,OAAO;AAC7B,YAAM,cAAc,KAAK,MAAM,IAAI,gBAAgB,GAAG;AACtD,YAAM,KAAK,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,OAAO,IAAI,SAAS,IAAI,aAAa,KAAK,WAAW,MAAM;AAAA,IACjH;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,sBAAsB,MAAM,MAAM,MAAM,GAAG;AACtD,UAAM,KAAK,gCAAgC;AAC3C,UAAM,KAAK,gCAAgC;AAC3C,eAAW,OAAO,MAAM,OAAO;AAC7B,YAAM,WAAW,IAAI,QAAQ,IAAI,OAAK,IAAI,CAAC,GAAG,EAAE,KAAK,IAAI;AACzD,UAAI;AACJ,UAAI;AACF,iBAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MACzD,QAAQ;AACN,iBAAS,IAAI;AAAA,MACf;AACA,YAAM,KAAK,KAAK,IAAI,IAAI,MAAM,MAAM,MAAM,IAAI,MAAM,QAAQ,CAAC,CAAC,MAAM,QAAQ,IAAI;AAAA,IAClF;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,cACP,aACA,eACA,eACA,UACA,eACA,UACA;AACA,QAAM,kBAAkB,SAAS,IAAI,OAAK;AACxC,QAAI;AACJ,UAAM,YAAY,EAAE,QAAQ,CAAC;AAC7B,QAAI,WAAW;AACb,UAAI;AAAE,oBAAY,IAAI,IAAI,UAAU,IAAI,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IACnG;AACA,WAAO,EAAE,SAAS,EAAE,SAAS,cAAc,EAAE,QAAQ,QAAQ,SAAS,UAAU;AAAA,EAClF,CAAC;AACD,QAAM,mBAAmB,SACtB,OAAO,OAAK,EAAE,QAAQ,UAAU,CAAC,EACjC,IAAI,OAAK,EAAE,OAAO;AAErB,SAAO;AAAA,IACL,aAAa;AAAA,IACb,YAAY,YAAY,WAAW;AAAA,IACnC,QAAQ,gBAAgB,SAAS,OAAO,OAAK,EAAE,QAAQ,SAAS,CAAC,EAAE;AAAA,IACnE,mBAAmB;AAAA,IACnB,gBAAgB;AAAA,IAChB,GAAI,WAAW,EAAE,WAAW,SAAS,IAAI,CAAC;AAAA,IAC1C,kBAAkB;AAAA,IAClB,GAAI,iBAAiB,SAAS,IAAI,EAAE,oBAAoB,iBAAiB,IAAI,CAAC;AAAA,EAChF;AACF;AAIA,SAAS,oBACP,OACA,QACA,WACsC;AACtC,QAAM,kBAAkB,cAAc,KAAK;AAC3C,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC,SAAO,SAAS,eAAe,gBAAgB,OAAO,IAAI,QAAQ;AAElE,QAAM,eAAe,YAAY;AAAA,IAC/B,MAAM,gBAAgB;AAAA,IACtB,SAAS,gBAAgB;AAAA,IACzB,WAAW,gBAAgB;AAAA,IAC3B,UAAU;AAAA,IACV,UAAU,CAAC,wCAAwC;AAAA,IACnD,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL,GAAG,YAAY;AAAA;AAAA,kBAAuB,eAAe,aAAa,CAAC;AAAA,WAAc,OAAO,QAAQ,MAAM;AAAA,EACxG;AACF;AAIA,eAAsB,gBACpB,QACA,WAAyB,eACsB;AAC/C,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI;AACF,WAAO,QAAQ,iBAAiB,OAAO,QAAQ,MAAM,kBAAkB,QAAQ;AAC/E,UAAM,SAAS,IAAI,QAAQ,iBAAiB,OAAO,QAAQ,MAAM,gBAAgB;AACjF,UAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAE5D,UAAM,WAAW,MAAM,gBAAgB,OAAO,OAAO;AACrD,UAAM,SAAS,SAAS,IAAI,KAAK,0BAA0B;AAE3D,UAAM,EAAE,YAAY,IAAI,eAAe,QAAQ;AAC/C,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,YAAY,eAAe,uBAAuB,SAAS,aAAa;AAAA,IACvF;AAGA,UAAM,SAAS,OAAO;AACtB,UAAM,eAAe,SAAS,OAAO,mBAAmB;AAExD,QAAI;AACJ,QAAI,gBAAgB;AACpB,QAAI;AAEJ,QAAI,UAAU,CAAC,cAAc;AAE3B,UAAI,CAAC,UAAU,CAAC,cAAc;AAC5B,mBAAW;AACX,eAAO,WAAW,UAAU,QAAQ;AAAA,MACtC;AACA,iBAAW,eAAe,OAAO,SAAS,aAAa,SAAS,QAAQ;AACxE,YAAM,SAAS,SAAS,IAAI,KAAK,wBAAwB;AAAA,IAC3D,OAAO;AAEL,YAAM,SAAS,SAAS,IAAI,KAAK,kCAAkC;AACnE,YAAM,iBAAiB,MAAM;AAAA,QAC3B,YAAY;AAAA,QACZ,OAAO;AAAA,QACP,SAAS;AAAA,QACT;AAAA,MACF;AAEA,UAAI,eAAe,QAAQ;AACzB,mBAAW;AAAA,UACT,eAAe;AAAA,UAAQ;AAAA,UAAa,OAAO;AAAA,UAAS,SAAS;AAAA,QAC/D;AACA,wBAAgB;AAChB,cAAM,SAAS,SAAS,IAAI,KAAK,8BAA8B;AAAA,MACjE,OAAO;AAEL,mBAAW,eAAe,SAAS;AACnC,eAAO,WAAW,+CAA+C,QAAQ,IAAI,QAAQ;AACrF,mBAAW,eAAe,OAAO,SAAS,aAAa,SAAS,QAAQ;AACxE,cAAM,SAAS,SAAS,IAAI,KAAK,yCAAyC;AAAA,MAC5E;AAAA,IACF;AAEA,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,UAAM,WAAW;AAAA,MACf;AAAA,MAAa;AAAA,MAAe,SAAS;AAAA,MAAe,SAAS;AAAA,MAAU;AAAA,MAAe;AAAA,IACxF;AAEA,WAAO,QAAQ,qBAAqB,YAAY,WAAW,MAAM,qBAAqB,aAAa,IAAI,QAAQ;AAC/G,UAAM,SAAS,IAAI,QAAQ,yBAAyB,YAAY,WAAW,MAAM,sBAAsB,aAAa,GAAG;AAEvH,UAAM,SAAS;AAAA;AAAA,GAAW,eAAe,aAAa,CAAC,MAAM,YAAY,eAAe,eAAe,gBAAgB,sBAAsB,EAAE;AAC/I,UAAM,eAAe,WAAW;AAEhC,WAAO,YAAY,cAAc,EAAE,SAAS,cAAc,SAAS,CAAC;AAAA,EACtE,SAAS,OAAO;AACd,WAAO,oBAAoB,OAAO,QAAQ,SAAS;AAAA,EACrD;AACF;AAEO,SAAS,sBAAsB,QAAyB;AAC7D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,WAAW,mBAAmB,KAAK,YAAY;AACrD,YAAM,SAAS,MAAM,gBAAgB,MAAM,QAAQ;AAEnD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-researchpowerpack-http",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.1.0",
|
|
4
4
|
"description": "The ultimate research MCP toolkit: Reddit mining, web search with CTR aggregation, and intelligent web scraping - all in one modular package",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|