npm - mcp-scraper - Versions diffs - 0.1.6 → 0.1.7 - Mend

mcp-scraper 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +13 -2
package/dist/bin/api-server.cjs +572 -171
package/dist/bin/api-server.cjs.map +1 -1
package/dist/bin/api-server.js +2 -2
package/dist/bin/mcp-stdio-server.cjs +299 -149
package/dist/bin/mcp-stdio-server.cjs.map +1 -1
package/dist/bin/mcp-stdio-server.js +2 -1
package/dist/bin/mcp-stdio-server.js.map +1 -1
package/dist/bin/paa-harvest.cjs +22 -1
package/dist/bin/paa-harvest.cjs.map +1 -1
package/dist/bin/paa-harvest.js +2 -1
package/dist/bin/paa-harvest.js.map +1 -1
package/dist/{chunk-6TWZS2FQ.js → chunk-3OIRNUF5.js} +302 -150
package/dist/chunk-3OIRNUF5.js.map +1 -0
package/dist/{chunk-W4P2U5VF.js → chunk-LUBDFS67.js} +32 -32
package/dist/chunk-LUBDFS67.js.map +1 -0
package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
package/dist/chunk-ZK456YXN.js.map +1 -0
package/dist/chunk-ZMOWIBMK.js +36 -0
package/dist/chunk-ZMOWIBMK.js.map +1 -0
package/dist/index.cjs +22 -1
package/dist/index.cjs.map +1 -1
package/dist/index.js +2 -1
package/dist/index.js.map +1 -1
package/dist/{server-2Y27U4TO.js → server-YNJHP5PU.js} +235 -22
package/dist/server-YNJHP5PU.js.map +1 -0
package/dist/{worker-UT4ZQU2T.js → worker-PBG6LGET.js} +4 -3
package/dist/{worker-UT4ZQU2T.js.map → worker-PBG6LGET.js.map} +1 -1
package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
package/docs/adr/README.md +11 -0
package/docs/mcp-tool-quality-spec.md +238 -0
package/package.json +5 -4
package/dist/chunk-6TWZS2FQ.js.map +0 -1
package/dist/chunk-7HB7NDOY.js.map +0 -1
package/dist/chunk-W4P2U5VF.js.map +0 -1
package/dist/server-2Y27U4TO.js.map +0 -1

package/dist/{chunk-6TWZS2FQ.js → chunk-3OIRNUF5.js} RENAMED Viewed

@@ -1,3 +1,7 @@
+import {
+  sanitizeVendorName
+} from "./chunk-ZMOWIBMK.js";
 // src/harvest-timeout.ts
 var VERCEL_FUNCTION_MAX_MS = 3e5;
 var CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
@@ -15,6 +19,9 @@ function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
 // src/mcp/paa-mcp-server.ts
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+// src/version.ts
+var PACKAGE_VERSION = "0.1.7";
 // src/mcp/mcp-tool-schemas.ts
 import { z } from "zod";
 var HarvestPaaInputSchema = {
@@ -77,6 +84,93 @@ var MapsPlaceIntelInputSchema = {
   includeReviews: z.boolean().default(false).describe("Whether to fetch individual review cards"),
   maxReviews: z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
 };
+var MapsSearchInputSchema = {
+  query: z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
+  location: z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
+  gl: z.string().length(2).default("us").describe("Google country code inferred from location."),
+  hl: z.string().length(2).default("en").describe("Language inferred from user request."),
+  maxResults: z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
+};
+var NullableString = z.string().nullable();
+var MapsSearchOutputSchema = {
+  query: z.string(),
+  location: z.string().nullable(),
+  searchQuery: z.string(),
+  searchUrl: z.string().url(),
+  extractedAt: z.string(),
+  requestedMaxResults: z.number().int().min(1).max(50),
+  resultCount: z.number().int().min(0).max(50),
+  results: z.array(z.object({
+    position: z.number().int().min(1),
+    name: z.string(),
+    placeUrl: z.string().url(),
+    cid: NullableString,
+    cidDecimal: NullableString,
+    rating: NullableString,
+    reviewCount: NullableString,
+    category: NullableString,
+    address: NullableString,
+    websiteUrl: NullableString,
+    directionsUrl: NullableString,
+    metadata: z.array(z.string())
+  })),
+  durationMs: z.number().int().min(0)
+};
+var MapSiteUrlsOutputSchema = {
+  startUrl: z.string(),
+  totalFound: z.number().int().min(0),
+  truncated: z.boolean(),
+  okCount: z.number().int().min(0),
+  redirectCount: z.number().int().min(0),
+  brokenCount: z.number().int().min(0),
+  urls: z.array(z.object({
+    url: z.string(),
+    status: z.number().int().nullable()
+  })),
+  durationMs: z.number().min(0)
+};
+var YoutubeHarvestOutputSchema = {
+  mode: z.string(),
+  videoCount: z.number().int().min(0),
+  channel: z.object({
+    title: NullableString,
+    subscriberCount: NullableString
+  }).nullable(),
+  videos: z.array(z.object({
+    videoId: z.string(),
+    title: z.string(),
+    channelName: NullableString,
+    views: NullableString,
+    duration: NullableString,
+    url: NullableString
+  }))
+};
+var FacebookAdSearchOutputSchema = {
+  query: z.string(),
+  advertiserCount: z.number().int().min(0),
+  advertisers: z.array(z.object({
+    name: NullableString,
+    adCount: z.number().int().nullable(),
+    libraryId: NullableString
+  }))
+};
+var FacebookPageIntelOutputSchema = {
+  advertiserName: NullableString,
+  totalAds: z.number().int().min(0),
+  activeCount: z.number().int().min(0),
+  videoCount: z.number().int().min(0),
+  imageCount: z.number().int().min(0),
+  ads: z.array(z.object({
+    libraryId: NullableString,
+    status: NullableString,
+    creativeType: NullableString,
+    headline: NullableString,
+    cta: NullableString,
+    startDate: NullableString,
+    videoUrl: NullableString,
+    variations: z.number().int().nullable()
+  }))
+};
 var CreditsInfoInputSchema = {
   item: z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
   includeLedger: z.boolean().default(false).describe("Whether to include recent credit ledger entries")
@@ -126,6 +220,15 @@ var CaptureSerpPageSnapshotsInputSchema = {
 import { mkdirSync, writeFileSync } from "fs";
 import { homedir } from "os";
 import { join } from "path";
+var reportSavingEnabled = true;
+function configureReportSaving(enabled) {
+  reportSavingEnabled = enabled;
+}
+function sanitizeVendorText(text) {
+  return sanitizeVendorName(
+    text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
+  );
+}
 function slugifyReportName(input) {
   return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
 }
@@ -137,7 +240,7 @@ function outputBaseDir() {
   return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || join(homedir(), "Downloads", "mcp-scraper");
 }
 function saveFullReport(full) {
-  if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
+  if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
   const outDir = outputBaseDir();
   try {
     mkdirSync(outDir, { recursive: true });
@@ -150,7 +253,7 @@ function saveFullReport(full) {
   }
 }
 function persistScreenshotLocally(base64, url) {
-  if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
+  if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
   try {
     const dir = join(outputBaseDir(), "screenshots");
     mkdirSync(dir, { recursive: true });
@@ -190,11 +293,11 @@ function parseData(raw) {
   const text = first?.type === "text" ? first.text : "";
   try {
     const parsed = JSON.parse(text || "{}");
-    if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
+    if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
     const data = parsed.result ?? parsed;
     return { data };
   } catch {
-    if (raw.isError) return { error: text || "Tool error" };
+    if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
     return { error: "Failed to parse tool response" };
   }
 }
@@ -208,15 +311,6 @@ function entityIdsSection(ids) {
 ## Entity IDs
 ${lines.join("\n")}` : "";
 }
-function entityIdsSummaryLine(ids) {
-  if (!ids) return "";
-  const parts = [];
-  if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
-  if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
-  if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
-  return parts.length ? `
-**Entity IDs:** ${parts.join(" \xB7 ")}` : "";
-}
 function truncate(s, max) {
   if (!s) return "";
   return s.length > max ? s.slice(0, max) + "\u2026" : s;
@@ -246,7 +340,7 @@ function debugSection(debug) {
   if (locationEvidence) {
     lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
   }
-  return lines.join("\n");
+  return sanitizeVendorText(lines.join("\n"));
 }
 function errorAttemptsSection(body) {
   const attempts = Array.isArray(body.attempts) ? body.attempts : [];
@@ -300,26 +394,12 @@ ${serpRows}` : "";
   const tips = `
 ---
 \u{1F4A1} **Tips**
-- Max questions: \`maxQuestions: 150\` (current: ${input.maxQuestions ?? 30})
+- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
 - Organic results only: use \`search_serp\`
 - Dig into a result: use \`extract_url\` on any organic URL`;
   const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
 ${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
-  const topQ = flat.slice(0, 10).map((r, i) => `${i + 1}. ${r.question}`).join("\n");
-  const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
-  const summary = [
-    `**PAA: "${input.query}"** \u2014 ${flat.length} questions extracted`,
-    topQ ? `
-**Top questions:**
-${topQ}` : "",
-    organic.length ? `
-**Top organic results:**
-${topO}` : "",
-    entityIdsSummaryLine(entityIds),
-    `
-\u{1F4A1} \`maxQuestions\` up to 150 \xB7Use \`extract_url\` to dig into any result`
-  ].filter(Boolean).join("\n");
   return oneBlock(full);
 }
 function formatSearchSerp(raw, input) {
@@ -358,18 +438,6 @@ ${localRows}` : "";
   const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
 ${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
-  const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
-  const summary = [
-    `**SERP: "${input.query}"** \u2014 ${organic.length} organic results`,
-    topO ? `
-**Top results:**
-${topO}` : "",
-    localPack.length ? `
-**Local Pack:** ${localPack.map((b) => b.name).join(", ")}` : "",
-    entityIdsSummaryLine(entityIds),
-    `
-\u{1F4A1} Use \`harvest_paa\` for questions \xB7 \`extract_url\` to scrape any result`
-  ].filter(Boolean).join("\n");
   return oneBlock(full);
 }
 function formatExtractUrl(raw, input) {
@@ -480,15 +548,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
 - Extract content from all pages: use \`extract_site\`
 - Scrape a single page: use \`extract_url\``
   ].filter(Boolean).join("\n");
-  const summary = [
-    `**URL Map: ${input.url}**`,
-    `${d.totalFound} URLs \u2014 ${ok.length} OK \xB7 ${broken.length} broken \xB7 ${redirects.length} redirects`,
-    broken.length ? `
-**Broken URLs:** ${broken.slice(0, 3).map((u) => u.url).join(", ")}` : "",
-    `
-\u{1F4A1} Use \`extract_site\` to extract content from all pages`
-  ].filter(Boolean).join("\n");
-  return oneBlock(full);
+  return {
+    ...oneBlock(full),
+    structuredContent: {
+      startUrl: d.startUrl ?? input.url,
+      totalFound: d.totalFound ?? urls.length,
+      truncated: d.truncated === true,
+      okCount: ok.length,
+      redirectCount: redirects.length,
+      brokenCount: broken.length,
+      urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
+      durationMs: d.durationMs ?? 0
+    }
+  };
 }
 function formatExtractSite(raw, input) {
   const parsed = parseData(raw);
@@ -513,13 +585,6 @@ ${pageRows}`,
 - Map URLs first: use \`map_site_urls\`
 - Inspect a single page: use \`extract_url\``
   ].join("\n");
-  const summary = [
-    `**Site Extract: ${input.url}** \u2014 ${pages.length} pages`,
-    pages.slice(0, 5).map((p) => `- ${p.title ?? p.url}`).join("\n"),
-    pages.length > 5 ? `- \u2026 and ${pages.length - 5} more` : "",
-    `
-\u{1F4A1} Use \`extract_url\` to inspect any individual page`
-  ].filter(Boolean).join("\n");
   return oneBlock(full);
 }
 function formatYoutubeHarvest(raw, input) {
@@ -550,16 +615,22 @@ ${videoRows}`,
 - Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
 - Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
   ].filter(Boolean).join("\n");
-  const top5 = videos.slice(0, 5).map((v, i) => `${i + 1}. ${v.title} (\`${v.videoId}\`)`).join("\n");
-  const summary = [
-    `**YouTube: ${label}** \u2014 ${videos.length} videos`,
-    `
-**Top videos:**
-${top5}`,
-    `
-\u{1F4A1} Transcribe any video: \`youtube_transcribe\` with its videoId`
-  ].join("\n");
-  return oneBlock(full);
+  return {
+    ...oneBlock(full),
+    structuredContent: {
+      mode: input.mode,
+      videoCount: videos.length,
+      channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
+      videos: videos.map((v) => ({
+        videoId: String(v.videoId ?? ""),
+        title: String(v.title ?? ""),
+        channelName: v.channelName ?? null,
+        views: v.views ?? null,
+        duration: v.duration ?? null,
+        url: v.url ?? null
+      }))
+    }
+  };
 }
 function formatYoutubeTranscribe(raw, input) {
   const parsed = parseData(raw);
@@ -589,14 +660,6 @@ ${chunkRows}` : "",
 ---
 \u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
   ].filter(Boolean).join("\n");
-  const summary = [
-    `**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
-    `
-**Preview:**
-> ${truncate(text, 300)}`,
-    `
-\u{1F4A1} Full transcript in artifact above`
-  ].join("\n");
   return oneBlock(full);
 }
 function formatFacebookPageIntel(raw, input) {
@@ -625,19 +688,26 @@ ${adBlocks}`,
 - Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
 - Find other advertisers: use \`facebook_ad_search\``
   ].filter(Boolean).join("\n");
-  const activeAds = ads.filter((a) => a.status?.toLowerCase() === "active").slice(0, 5);
-  const adSummary = activeAds.map((a, i) => `${i + 1}. ${truncate(a.headline ?? a.primaryText, 80)} (${a.creativeType ?? "\u2014"})`).join("\n");
-  const videoCount = ads.filter((a) => a.videoUrl).length;
-  const summary = [
-    `**Facebook Ads: ${advertiser}** \u2014 ${s.totalAds} ads (${s.activeCount} active)`,
-    adSummary ? `
-**Active ads:**
-${adSummary}` : "",
-    `**Creative mix:** ${s.videoCount} video \xB7 ${s.imageCount} image`,
-    videoCount ? `
-\u{1F4A1} ${videoCount} video ads \u2014 transcribe with \`facebook_ad_transcribe\` using the videoUrl` : ""
-  ].filter(Boolean).join("\n");
-  return oneBlock(full);
+  return {
+    ...oneBlock(full),
+    structuredContent: {
+      advertiserName: d.advertiserName ?? null,
+      totalAds: s.totalAds ?? 0,
+      activeCount: s.activeCount ?? 0,
+      videoCount: s.videoCount ?? 0,
+      imageCount: s.imageCount ?? 0,
+      ads: ads.map((ad) => ({
+        libraryId: ad.libraryId ?? null,
+        status: ad.status ?? null,
+        creativeType: ad.creativeType ?? null,
+        headline: ad.headline ?? null,
+        cta: ad.cta ?? null,
+        startDate: ad.startDate ?? null,
+        videoUrl: ad.videoUrl ?? null,
+        variations: typeof ad.variations === "number" ? ad.variations : null
+      }))
+    }
+  };
 }
 function formatFacebookAdSearch(raw, input) {
   const parsed = parseData(raw);
@@ -661,15 +731,18 @@ ${rows}`,
 - Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
 - Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
   ].join("\n");
-  const summary = [
-    `**Facebook Ad Search: "${input.query}"** \u2014 ${advertisers.length} advertisers`,
-    advertisers.slice(0, 5).map(
-      (a, i) => `${i + 1}. ${a.name}${a.adCount ? ` (${a.adCount} ads)` : ""} \u2014 \`${a.libraryId ?? "\u2014"}\``
-    ).join("\n"),
-    `
-\u{1F4A1} Scan ads with \`facebook_page_intel\` using \`libraryId\``
-  ].filter(Boolean).join("\n");
-  return oneBlock(full);
+  return {
+    ...oneBlock(full),
+    structuredContent: {
+      query: input.query,
+      advertiserCount: advertisers.length,
+      advertisers: advertisers.map((a) => ({
+        name: a.pageName ?? a.name ?? null,
+        adCount: typeof a.adCount === "number" ? a.adCount : null,
+        libraryId: a.sampleLibraryId ?? a.libraryId ?? null
+      }))
+    }
+  };
 }
 function formatCreditsInfo(raw, input) {
   const parsed = parseData(raw);
@@ -708,16 +781,58 @@ ${costRows}` : "",
 | Date | Operation | Credits | Description |
 |------|-----------|---------|-------------|
 ${ledgerRows}` : ""
-  ].filter(Boolean).join("\n");
-  const summary = [
-    `**Credit balance:** ${balance ?? "unknown"} credits`,
-    matched ? `
-**${matched.label}:** ${matched.credits} credits ${matched.unit}` : null,
-    input.includeLedger && ledger.length ? `
-Recent ledger entries included in the full report.` : null
   ].filter(Boolean).join("\n");
   return oneBlock(full);
 }
+function formatMapsSearch(raw, input) {
+  const parsed = parseData(raw);
+  if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
+  const d = parsed.data;
+  const results = d.results ?? [];
+  const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
+  const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
+  const durationMs = d.durationMs;
+  const rows = results.map((r) => {
+    const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
+    return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
+  }).join("\n");
+  const metadataSection = results.length ? `
+## Candidate Metadata
+${results.map((r) => {
+    const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => `  - ${m}`).join("\n") : "  - none";
+    return `### ${r.position}. ${r.name}
+${meta}`;
+  }).join("\n\n")}` : "";
+  const full = [
+    `# Google Maps Search: "${searchQuery}"`,
+    `**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
+    `
+## Results
+| # | Name | Category | Rating | Address | CID | Website | Maps |
+|---|------|----------|--------|---------|-----|---------|------|
+${rows}`,
+    metadataSection,
+    `
+---
+\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
+    durationMs != null ? `
+*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
+  ].filter(Boolean).join("\n");
+  return {
+    ...oneBlock(full),
+    structuredContent: {
+      query: d.query,
+      location: d.location ?? null,
+      searchQuery: d.searchQuery,
+      searchUrl: d.searchUrl,
+      extractedAt: d.extractedAt,
+      requestedMaxResults: requestedMax,
+      resultCount: results.length,
+      results,
+      durationMs: durationMs ?? 0
+    }
+  };
+}
 function formatMapsPlaceIntel(raw, input) {
   const parsed = parseData(raw);
   if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
@@ -815,19 +930,6 @@ ${entitySection}` : null,
     durationMs != null ? `
 ---
 *Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
-  ].filter(Boolean).join("\n");
-  const summary = [
-    `**${name}** \u2014 ${category ?? "Business"} \xB7 ${ratingLine || "No rating"}`,
-    address ? `\u{1F4CD} ${address}` : null,
-    phone ? `\u{1F4DE} ${phone}` : null,
-    hoursSummary ? `\u{1F550} ${hoursSummary}` : null,
-    website ? `\u{1F310} ${website}` : null,
-    reviewsStatus === "collected" && reviews.length ? `
-\u{1F4AC} ${reviews.length} reviews fetched \u2014 full list in artifact above` : null,
-    reviewsStatus === "unavailable" ? `
-\u26A0\uFE0F Reviews could not be retrieved this run` : null,
-    reviewsStatus === "none_exist" ? `
-\u{1F4AC} No reviews on Google Maps` : null
   ].filter(Boolean).join("\n");
   return oneBlock(full);
 }
@@ -859,67 +961,112 @@ ${chunkRows}` : "",
 ---
 \u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
   ].filter(Boolean).join("\n");
-  const summary = [
-    `**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
-    `
-**Preview:**
-> ${truncate(text, 300)}`,
-    `
-\u{1F4A1} Full transcript in artifact above`
-  ].join("\n");
   return oneBlock(full);
 }
 // src/mcp/paa-mcp-server.ts
-function buildPaaExtractorMcpServer(executor) {
-  const server = new McpServer({ name: "mcp-scraper", version: "1.0.0" });
+function liveWebToolAnnotations(title) {
+  return {
+    title,
+    readOnlyHint: true,
+    destructiveHint: false,
+    idempotentHint: false,
+    openWorldHint: true
+  };
+}
+function buildPaaExtractorMcpServer(executor, options = {}) {
+  const savesReports = options.savesReportsLocally !== false;
+  const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
+  const withReportNote = (description) => `${description}${reportNote}`;
+  const server = new McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
   server.registerTool("harvest_paa", {
-    description: 'Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded. Saves a full Markdown report locally.',
-    inputSchema: HarvestPaaInputSchema
+    title: "Google PAA + SERP Harvest",
+    description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
+    inputSchema: HarvestPaaInputSchema,
+    annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
   }, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
   server.registerTool("search_serp", {
-    description: "Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. Saves a full Markdown report locally.",
-    inputSchema: SearchSerpInputSchema
+    title: "Google SERP Lookup",
+    description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
+    inputSchema: SearchSerpInputSchema,
+    annotations: liveWebToolAnnotations("Google SERP Lookup")
   }, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
   server.registerTool("extract_url", {
-    description: "Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page. Saves a full Markdown report locally.",
-    inputSchema: ExtractUrlInputSchema
+    title: "Single URL Extract",
+    description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
+    inputSchema: ExtractUrlInputSchema,
+    annotations: liveWebToolAnnotations("Single URL Extract")
   }, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
   server.registerTool("map_site_urls", {
-    description: "Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory. Saves a full Markdown report locally.",
-    inputSchema: MapSiteUrlsInputSchema
+    title: "Site URL Map",
+    description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
+    inputSchema: MapSiteUrlsInputSchema,
+    outputSchema: MapSiteUrlsOutputSchema,
+    annotations: liveWebToolAnnotations("Site URL Map")
   }, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
   server.registerTool("extract_site", {
-    description: "Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction. Saves a full Markdown report locally.",
-    inputSchema: ExtractSiteInputSchema
+    title: "Multi-Page Site Extract",
+    description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
+    inputSchema: ExtractSiteInputSchema,
+    annotations: liveWebToolAnnotations("Multi-Page Site Extract")
   }, async (input) => formatExtractSite(await executor.extractSite(input), input));
   server.registerTool("youtube_harvest", {
-    description: 'Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription. Saves a full Markdown report locally.',
-    inputSchema: YoutubeHarvestInputSchema
+    title: "YouTube Video Harvest",
+    description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
+    inputSchema: YoutubeHarvestInputSchema,
+    outputSchema: YoutubeHarvestOutputSchema,
+    annotations: liveWebToolAnnotations("YouTube Video Harvest")
   }, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
   server.registerTool("youtube_transcribe", {
-    description: "Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one. Saves a full Markdown report locally.",
-    inputSchema: YoutubeTranscribeInputSchema
+    title: "YouTube Transcription",
+    description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
+    inputSchema: YoutubeTranscribeInputSchema,
+    annotations: liveWebToolAnnotations("YouTube Transcription")
   }, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
   server.registerTool("facebook_page_intel", {
-    description: "Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible. Saves a full Markdown report locally.",
-    inputSchema: FacebookPageIntelInputSchema
+    title: "Facebook Advertiser Ad Intel",
+    description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
+    inputSchema: FacebookPageIntelInputSchema,
+    outputSchema: FacebookPageIntelOutputSchema,
+    annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
   }, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
   server.registerTool("facebook_ad_search", {
-    description: "Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel. Saves a full Markdown report locally.",
-    inputSchema: FacebookAdSearchInputSchema
+    title: "Facebook Ad Library Search",
+    description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
+    inputSchema: FacebookAdSearchInputSchema,
+    outputSchema: FacebookAdSearchOutputSchema,
+    annotations: liveWebToolAnnotations("Facebook Ad Library Search")
   }, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
   server.registerTool("facebook_ad_transcribe", {
+    title: "Facebook Ad Transcription",
     description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
-    inputSchema: FacebookAdTranscribeInputSchema
+    inputSchema: FacebookAdTranscribeInputSchema,
+    annotations: liveWebToolAnnotations("Facebook Ad Transcription")
   }, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
   server.registerTool("maps_place_intel", {
-    description: 'Extract Google Maps business intelligence for a named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain. Saves a full Markdown report locally.',
-    inputSchema: MapsPlaceIntelInputSchema
+    title: "Google Maps Business Profile Details",
+    description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
+    inputSchema: MapsPlaceIntelInputSchema,
+    annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
   }, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
+  server.registerTool("maps_search", {
+    title: "Google Maps Business Search",
+    description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
+    inputSchema: MapsSearchInputSchema,
+    outputSchema: MapsSearchOutputSchema,
+    annotations: liveWebToolAnnotations("Google Maps Business Search")
+  }, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
   server.registerTool("credits_info", {
+    title: "MCP Scraper Credits & Costs",
     description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
-    inputSchema: CreditsInfoInputSchema
+    inputSchema: CreditsInfoInputSchema,
+    annotations: {
+      title: "MCP Scraper Credits & Costs",
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: true,
+      openWorldHint: false
+    }
   }, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
   return server;
 }
@@ -1013,6 +1160,9 @@ var HttpMcpToolExecutor = class {
   mapsPlaceIntel(input) {
     return this.call("/maps/place", input);
   }
+  mapsSearch(input) {
+    return this.call("/maps/search", input);
+  }
   creditsInfo(input) {
     return this.call("/billing/credits", input);
   }
@@ -1028,7 +1178,9 @@ export {
   harvestTimeoutBudget,
   CaptureSerpSnapshotInputSchema,
   CaptureSerpPageSnapshotsInputSchema,
+  configureReportSaving,
+  liveWebToolAnnotations,
   buildPaaExtractorMcpServer,
   HttpMcpToolExecutor
 };
-//# sourceMappingURL=chunk-6TWZS2FQ.js.map
+//# sourceMappingURL=chunk-3OIRNUF5.js.map