mcp-scraper 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  "use strict";
3
3
 
4
4
  // bin/mcp-stdio-server.ts
5
- var import_node_fs2 = require("fs");
5
+ var import_node_fs3 = require("fs");
6
6
  var import_node_os2 = require("os");
7
- var import_node_path2 = require("path");
7
+ var import_node_path3 = require("path");
8
8
  var import_stdio = require("@modelcontextprotocol/sdk/server/stdio.js");
9
9
 
10
10
  // src/harvest-timeout.ts
@@ -126,203 +126,11 @@ var HttpMcpToolExecutor = class {
126
126
 
127
127
  // src/mcp/paa-mcp-server.ts
128
128
  var import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
129
+ var import_node_fs2 = require("fs");
130
+ var import_node_path2 = require("path");
129
131
 
130
132
  // src/version.ts
131
- var PACKAGE_VERSION = "0.1.7";
132
-
133
- // src/mcp/mcp-tool-schemas.ts
134
- var import_zod = require("zod");
135
- var HarvestPaaInputSchema = {
136
- query: import_zod.z.string().min(1).describe('Core search topic only. If the user says "best hvac company in Denver CO", use query="best hvac company" and location="Denver, CO". Do not include the location in query when it can be separated.'),
137
- location: import_zod.z.string().optional().describe('City, region, or country for geo-targeted results, inferred from the user request when present, e.g. "Denver, CO", "Tokyo, Japan", "London, UK".'),
138
- maxQuestions: import_zod.z.number().int().min(1).max(200).default(30).describe("Number of PAA questions to extract. Default 30. Maximum 200. Use 10 for quick probes, 30 for normal research, 100-200 when the user asks for everything/full/deep research. Larger harvests get a longer server time budget (151-200 questions \u2192 up to 280s). Credits are charged by extracted question; unused request hold is refunded."),
139
- gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
140
- hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
141
- device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
142
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
143
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
144
- debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
145
- };
146
- var ExtractUrlInputSchema = {
147
- url: import_zod.z.string().url().describe("Public http/https URL to extract. Use this when the user provides one specific page URL."),
148
- screenshot: import_zod.z.boolean().default(false).describe("Also capture a full-page screenshot of the URL. Saved to ~/Downloads/mcp-scraper/screenshots/ and returned inline. Use when the user asks to see or capture the page visually."),
149
- screenshotDevice: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport for screenshot. desktop = 1440\xD7900. mobile = 390\xD7844. Default desktop."),
150
- extractBranding: import_zod.z.boolean().default(false).describe("Extract brand colors, fonts, logo, and favicon using a rendered browser session. Returns colorScheme (light/dark), colors (primary/accent/background/text/heading as hex), fonts (heading/body family names), and assets (logo URL, favicon URL). Use when the user asks about brand colors, site theme, or brand assets."),
151
- downloadMedia: import_zod.z.boolean().default(false).describe("Extract and download all page media (images, video, audio) to ~/Downloads/mcp-scraper/media/. Ad networks, tracking pixels, and noise URLs are filtered automatically. Use when the user asks to download or harvest assets from a page."),
152
- mediaTypes: import_zod.z.array(import_zod.z.enum(["image", "video", "audio"])).default(["image", "video", "audio"]).describe("Which media types to download. Default all three."),
153
- allowLocal: import_zod.z.boolean().default(false).describe("Allow localhost and private-network URLs. For local development only.")
154
- };
155
- var MapSiteUrlsInputSchema = {
156
- url: import_zod.z.string().url().describe("Public website URL or domain to crawl for internal URLs. Use before extract_site when the user asks to audit/map/crawl a site."),
157
- maxUrls: import_zod.z.number().int().min(1).max(500).optional().describe("Maximum URLs to discover. Use 100 for normal maps, higher when the user asks for a full inventory.")
158
- };
159
- var ExtractSiteInputSchema = {
160
- url: import_zod.z.string().url().describe("Public website URL or domain to extract across multiple pages. Use when the user asks for a site audit, website crawl, or full-site content/schema extraction."),
161
- maxPages: import_zod.z.number().int().min(1).max(50).optional().describe("Maximum pages to extract. Use 50 when the user asks for full results or a complete crawl within MCP limits.")
162
- };
163
- var YoutubeHarvestInputSchema = {
164
- mode: import_zod.z.enum(["search", "channel"]).describe("Use search for topic/keyword requests. Use channel when the user provides @handle, channel ID, or channel URL."),
165
- query: import_zod.z.string().optional().describe("Required when mode is search. The YouTube search topic in the user\u2019s words."),
166
- channelHandle: import_zod.z.string().optional().describe("YouTube channel handle, channel ID, or URL. Examples: @mkbhd, UC..., https://youtube.com/@mkbhd."),
167
- maxVideos: import_zod.z.number().int().min(1).max(500).default(50).describe("Number of videos to return. Default 50. Increase when user asks for full channel/history.")
168
- };
169
- var YoutubeTranscribeInputSchema = {
170
- videoId: import_zod.z.string().min(1).describe("YouTube video ID, e.g. dQw4w9WgXcQ")
171
- };
172
- var FacebookPageIntelInputSchema = {
173
- pageId: import_zod.z.string().optional(),
174
- libraryId: import_zod.z.string().optional(),
175
- query: import_zod.z.string().optional().describe("Advertiser or brand name when pageId/libraryId is not known. One of pageId, libraryId, or query is required."),
176
- maxAds: import_zod.z.number().int().min(1).max(200).default(50),
177
- country: import_zod.z.string().length(2).default("US")
178
- };
179
- var FacebookAdSearchInputSchema = {
180
- query: import_zod.z.string().min(1).describe("Advertiser, brand, competitor, niche, or keyword to search in Facebook Ad Library."),
181
- country: import_zod.z.string().length(2).default("US"),
182
- maxResults: import_zod.z.number().int().min(1).max(20).default(10)
183
- };
184
- var FacebookAdTranscribeInputSchema = {
185
- videoUrl: import_zod.z.string().url().describe("Facebook CDN video URL from a facebook_page_intel result")
186
- };
187
- var MapsPlaceIntelInputSchema = {
188
- businessName: import_zod.z.string().min(1).describe('Business name only. If user says "Elite Roofing Denver CO", use businessName="Elite Roofing" and location="Denver, CO".'),
189
- location: import_zod.z.string().min(1).describe('City/region/country where the business should be searched, e.g. "Denver, CO". Infer from the user request when possible.'),
190
- gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
191
- hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
192
- includeReviews: import_zod.z.boolean().default(false).describe("Whether to fetch individual review cards"),
193
- maxReviews: import_zod.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
194
- };
195
- var MapsSearchInputSchema = {
196
- query: import_zod.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
197
- location: import_zod.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
198
- gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
199
- hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
200
- maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
201
- };
202
- var NullableString = import_zod.z.string().nullable();
203
- var MapsSearchOutputSchema = {
204
- query: import_zod.z.string(),
205
- location: import_zod.z.string().nullable(),
206
- searchQuery: import_zod.z.string(),
207
- searchUrl: import_zod.z.string().url(),
208
- extractedAt: import_zod.z.string(),
209
- requestedMaxResults: import_zod.z.number().int().min(1).max(50),
210
- resultCount: import_zod.z.number().int().min(0).max(50),
211
- results: import_zod.z.array(import_zod.z.object({
212
- position: import_zod.z.number().int().min(1),
213
- name: import_zod.z.string(),
214
- placeUrl: import_zod.z.string().url(),
215
- cid: NullableString,
216
- cidDecimal: NullableString,
217
- rating: NullableString,
218
- reviewCount: NullableString,
219
- category: NullableString,
220
- address: NullableString,
221
- websiteUrl: NullableString,
222
- directionsUrl: NullableString,
223
- metadata: import_zod.z.array(import_zod.z.string())
224
- })),
225
- durationMs: import_zod.z.number().int().min(0)
226
- };
227
- var MapSiteUrlsOutputSchema = {
228
- startUrl: import_zod.z.string(),
229
- totalFound: import_zod.z.number().int().min(0),
230
- truncated: import_zod.z.boolean(),
231
- okCount: import_zod.z.number().int().min(0),
232
- redirectCount: import_zod.z.number().int().min(0),
233
- brokenCount: import_zod.z.number().int().min(0),
234
- urls: import_zod.z.array(import_zod.z.object({
235
- url: import_zod.z.string(),
236
- status: import_zod.z.number().int().nullable()
237
- })),
238
- durationMs: import_zod.z.number().min(0)
239
- };
240
- var YoutubeHarvestOutputSchema = {
241
- mode: import_zod.z.string(),
242
- videoCount: import_zod.z.number().int().min(0),
243
- channel: import_zod.z.object({
244
- title: NullableString,
245
- subscriberCount: NullableString
246
- }).nullable(),
247
- videos: import_zod.z.array(import_zod.z.object({
248
- videoId: import_zod.z.string(),
249
- title: import_zod.z.string(),
250
- channelName: NullableString,
251
- views: NullableString,
252
- duration: NullableString,
253
- url: NullableString
254
- }))
255
- };
256
- var FacebookAdSearchOutputSchema = {
257
- query: import_zod.z.string(),
258
- advertiserCount: import_zod.z.number().int().min(0),
259
- advertisers: import_zod.z.array(import_zod.z.object({
260
- name: NullableString,
261
- adCount: import_zod.z.number().int().nullable(),
262
- libraryId: NullableString
263
- }))
264
- };
265
- var FacebookPageIntelOutputSchema = {
266
- advertiserName: NullableString,
267
- totalAds: import_zod.z.number().int().min(0),
268
- activeCount: import_zod.z.number().int().min(0),
269
- videoCount: import_zod.z.number().int().min(0),
270
- imageCount: import_zod.z.number().int().min(0),
271
- ads: import_zod.z.array(import_zod.z.object({
272
- libraryId: NullableString,
273
- status: NullableString,
274
- creativeType: NullableString,
275
- headline: NullableString,
276
- cta: NullableString,
277
- startDate: NullableString,
278
- videoUrl: NullableString,
279
- variations: import_zod.z.number().int().nullable()
280
- }))
281
- };
282
- var CreditsInfoInputSchema = {
283
- item: import_zod.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
284
- includeLedger: import_zod.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
285
- };
286
- var SearchSerpInputSchema = {
287
- query: import_zod.z.string().min(1).describe('Core search topic only. Separate location when possible. If user says "best dentist in Brooklyn NY serp", use query="best dentist" and location="Brooklyn, NY".'),
288
- location: import_zod.z.string().optional().describe("City, region, or country for geo-targeted results, inferred from user request when present."),
289
- gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
290
- hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from user request."),
291
- device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
292
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
293
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
294
- debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
295
- pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
296
- };
297
- var CaptureSerpSnapshotInputSchema = {
298
- query: import_zod.z.string().min(1).describe("Core search query to capture as a structured SERP Intelligence snapshot. Separate the place into location when the user gives a city, region, country, or ZIP."),
299
- location: import_zod.z.string().optional().describe("City, region, country, or service area used for localized Google results. MCP Scraper records location evidence; UULE alone is not proof of localization."),
300
- gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
301
- hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request."),
302
- device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
303
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized residential proxy targeting, configured for the static residential proxy, and none only for direct-network debugging."),
304
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed."),
305
- pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
306
- debug: import_zod.z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
307
- includePageSnapshots: import_zod.z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
308
- pageSnapshotLimit: import_zod.z.number().int().min(0).max(10).default(0).describe("Maximum ranking-page snapshots to capture when includePageSnapshots is true. Use 0 when only SERP evidence is needed.")
309
- };
310
- var ScreenshotInputSchema = {
311
- url: import_zod.z.string().url().describe("URL to capture as a full-page screenshot. Use http or https. Pass allowLocal: true to capture localhost or private-network URLs during development."),
312
- device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport profile. desktop = 1440\xD7900. mobile = 390\xD7844. Use desktop by default; use mobile when the user asks for a mobile view."),
313
- allowLocal: import_zod.z.boolean().default(false).describe("Allow localhost and private-network URLs (127.x, 192.168.x, 10.x, etc.). For local development only \u2014 not for production use.")
314
- };
315
- var CaptureSerpPageSnapshotsInputSchema = {
316
- urls: import_zod.z.array(import_zod.z.string().url()).min(1).max(25).describe("Public HTTP/HTTPS URLs to capture as SERP Intelligence page snapshots. Do not pass localhost, private IPs, file URLs, or internal admin URLs."),
317
- targets: import_zod.z.array(import_zod.z.object({
318
- url: import_zod.z.string().url().describe("Public HTTP/HTTPS URL to capture."),
319
- sourceKind: import_zod.z.enum(["organic", "ai_citation", "local_pack_website", "configured_target", "site_subject"]).default("configured_target").describe("Why this page is being captured for SERP Intelligence evidence."),
320
- sourcePosition: import_zod.z.number().int().min(1).optional().describe("Ranking or citation position when the page came from SERP evidence.")
321
- }).strict()).min(1).max(25).optional().describe("Structured page snapshot targets. Use this instead of urls when source kind or position should be preserved."),
322
- maxConcurrency: import_zod.z.number().int().min(1).max(5).default(2).describe("Parallel page captures. Use 2 normally; higher values can increase proxy/browser pressure."),
323
- timeoutMs: import_zod.z.number().int().min(1e3).max(6e4).default(15e3).describe("Per-page capture timeout in milliseconds. Increase for slow pages; timeout artifacts are returned as structured capture failures."),
324
- debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics for page snapshot debugging. Use true for capture, network, or proxy troubleshooting.")
325
- };
133
+ var PACKAGE_VERSION = "0.1.9";
326
134
 
327
135
  // src/mcp/mcp-response-formatter.ts
328
136
  var import_node_fs = require("fs");
@@ -434,7 +242,7 @@ function debugSection(debug) {
434
242
  if (!debug || typeof debug !== "object") return "";
435
243
  const request = debug.request ?? {};
436
244
  const browser = debug.browser ?? {};
437
- const kernel = browser.kernel ?? {};
245
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
438
246
  const network = browser.networkLocation ?? {};
439
247
  const nav = browser.serpNavigation ?? {};
440
248
  const proxyResolution = kernel.proxyResolution ?? {};
@@ -460,12 +268,14 @@ function errorAttemptsSection(body) {
460
268
  const lines = attempts.slice(0, 5).map((attempt) => {
461
269
  const debug = attempt.debug ?? {};
462
270
  const browser = debug.browser ?? {};
463
- const kernel = browser.kernel ?? {};
271
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
464
272
  const proxyResolution = kernel.proxyResolution ?? {};
465
273
  const network = browser.networkLocation ?? {};
466
274
  const nav = browser.serpNavigation ?? {};
467
275
  const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
468
- return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${attempt.kernel_session_id ?? kernel.sessionId ?? "unknown"} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 deleted ${attempt.kernel_delete_succeeded === true ? "yes" : attempt.kernel_delete_succeeded === false ? "no" : "unknown"}`;
276
+ const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
277
+ const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
278
+ return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
469
279
  });
470
280
  return `
471
281
 
@@ -512,7 +322,31 @@ ${serpRows}` : "";
512
322
  const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
513
323
 
514
324
  ${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
515
- return oneBlock(full);
325
+ return {
326
+ ...oneBlock(full),
327
+ structuredContent: {
328
+ query: input.query,
329
+ location: input.location ?? null,
330
+ questionCount: flat.length,
331
+ completionStatus: diagnostics?.completionStatus ?? null,
332
+ questions: flat.map((r) => ({
333
+ question: String(r.question ?? ""),
334
+ answer: r.answer ?? null,
335
+ sourceTitle: r.source_title ?? null,
336
+ sourceSite: r.source_site ?? null
337
+ })),
338
+ organicResults: organic.map((r) => ({
339
+ position: Number(r.position) || 0,
340
+ title: String(r.title ?? ""),
341
+ url: String(r.url ?? ""),
342
+ domain: String(r.domain ?? ""),
343
+ snippet: r.snippet ?? null
344
+ })),
345
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
346
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
347
+ durationMs: durationMs ?? null
348
+ }
349
+ };
516
350
  }
517
351
  function formatSearchSerp(raw, input) {
518
352
  const parsed = parseData(raw);
@@ -550,7 +384,29 @@ ${localRows}` : "";
550
384
  const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
551
385
 
552
386
  ${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
553
- return oneBlock(full);
387
+ return {
388
+ ...oneBlock(full),
389
+ structuredContent: {
390
+ query: input.query,
391
+ location: input.location ?? null,
392
+ organicResults: organic.map((r) => ({
393
+ position: Number(r.position) || 0,
394
+ title: String(r.title ?? ""),
395
+ url: String(r.url ?? ""),
396
+ domain: String(r.domain ?? ""),
397
+ snippet: r.snippet ?? null
398
+ })),
399
+ localPack: localPack.map((b) => ({
400
+ position: Number(b.position) || 0,
401
+ name: String(b.name ?? ""),
402
+ rating: b.rating ?? null,
403
+ reviewCount: b.reviewCount ?? null,
404
+ websiteUrl: b.websiteUrl ?? null
405
+ })),
406
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
407
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
408
+ }
409
+ };
554
410
  }
555
411
  function formatExtractUrl(raw, input) {
556
412
  const parsed = parseData(raw);
@@ -619,15 +475,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
619
475
  **${title}**
620
476
  ${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
621
477
  const textResult = oneBlock(full);
478
+ const structuredContent = {
479
+ url,
480
+ title: d.title ?? null,
481
+ headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
482
+ schemaBlockCount: schemaCount,
483
+ entityName: kpo?.entityName ?? null,
484
+ entityTypes: kpo?.type ?? [],
485
+ napScore: kpo?.napScore ?? null,
486
+ missingSchemaFields: kpo?.missingFields ?? [],
487
+ screenshotSaved: screenshotPath ?? null
488
+ };
622
489
  if (screenshotMeta?.base64) {
623
490
  return {
624
491
  content: [
625
492
  ...textResult.content,
626
493
  { type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
627
- ]
494
+ ],
495
+ structuredContent
628
496
  };
629
497
  }
630
- return textResult;
498
+ return { ...textResult, structuredContent };
631
499
  }
632
500
  function formatMapSiteUrls(raw, input) {
633
501
  const parsed = parseData(raw);
@@ -697,7 +565,19 @@ ${pageRows}`,
697
565
  - Map URLs first: use \`map_site_urls\`
698
566
  - Inspect a single page: use \`extract_url\``
699
567
  ].join("\n");
700
- return oneBlock(full);
568
+ return {
569
+ ...oneBlock(full),
570
+ structuredContent: {
571
+ url: input.url,
572
+ pageCount: pages.length,
573
+ pages: pages.map((p) => ({
574
+ url: String(p.url ?? ""),
575
+ title: p.title ?? null,
576
+ schemaTypes: p.kpo?.type ?? []
577
+ })),
578
+ durationMs: d.durationMs ?? 0
579
+ }
580
+ };
701
581
  }
702
582
  function formatYoutubeHarvest(raw, input) {
703
583
  const parsed = parseData(raw);
@@ -894,7 +774,26 @@ ${costRows}` : "",
894
774
  |------|-----------|---------|-------------|
895
775
  ${ledgerRows}` : ""
896
776
  ].filter(Boolean).join("\n");
897
- return oneBlock(full);
777
+ return {
778
+ ...oneBlock(full),
779
+ structuredContent: {
780
+ balanceCredits: typeof balance === "number" ? balance : null,
781
+ matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
782
+ costs: costs.map((c) => ({
783
+ key: c.key,
784
+ label: c.label,
785
+ credits: c.credits,
786
+ unit: c.unit,
787
+ notes: c.notes ?? null
788
+ })),
789
+ ledger: ledger.map((row) => ({
790
+ createdAt: String(row.created_at ?? ""),
791
+ operation: String(row.operation ?? ""),
792
+ credits: row.amount_mc / 1e3,
793
+ description: row.description ?? null
794
+ }))
795
+ }
796
+ };
898
797
  }
899
798
  function formatMapsSearch(raw, input) {
900
799
  const parsed = parseData(raw);
@@ -1043,7 +942,28 @@ ${entitySection}` : null,
1043
942
  ---
1044
943
  *Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
1045
944
  ].filter(Boolean).join("\n");
1046
- return oneBlock(full);
945
+ return {
946
+ ...oneBlock(full),
947
+ structuredContent: {
948
+ name,
949
+ rating: rating ?? null,
950
+ reviewCount: reviewCount ?? null,
951
+ category: category ?? null,
952
+ address: address ?? null,
953
+ phone: phone ?? null,
954
+ website: website ?? null,
955
+ hoursSummary: hoursSummary ?? null,
956
+ bookingUrl: bookingUrl ?? null,
957
+ kgmid: kgmid ?? null,
958
+ cidDecimal: cidDecimal ?? null,
959
+ cidUrl: cidUrl ?? null,
960
+ lat: lat ?? null,
961
+ lng: lng ?? null,
962
+ reviewsStatus,
963
+ reviewsCollected: reviews.length,
964
+ reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
965
+ }
966
+ };
1047
967
  }
1048
968
  function formatFacebookAdTranscribe(raw, input) {
1049
969
  const parsed = parseData(raw);
@@ -1076,6 +996,314 @@ ${chunkRows}` : "",
1076
996
  return oneBlock(full);
1077
997
  }
1078
998
 
999
+ // src/mcp/mcp-tool-schemas.ts
1000
+ var import_zod = require("zod");
1001
+ var HarvestPaaInputSchema = {
1002
+ query: import_zod.z.string().min(1).describe('Core search topic only. If the user says "best hvac company in Denver CO", use query="best hvac company" and location="Denver, CO". Do not include the location in query when it can be separated.'),
1003
+ location: import_zod.z.string().optional().describe('City, region, or country for geo-targeted results, inferred from the user request when present, e.g. "Denver, CO", "Tokyo, Japan", "London, UK".'),
1004
+ maxQuestions: import_zod.z.number().int().min(1).max(200).default(30).describe("Number of PAA questions to extract. Default 30. Maximum 200. Use 10 for quick probes, 30 for normal research, 100-200 when the user asks for everything/full/deep research. Larger harvests get a longer server time budget (151-200 questions \u2192 up to 280s). Credits are charged by extracted question; unused request hold is refunded."),
1005
+ gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
1006
+ hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
1007
+ device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
1008
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
1009
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
1010
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
1011
+ };
1012
+ var ExtractUrlInputSchema = {
1013
+ url: import_zod.z.string().url().describe("Public http/https URL to extract. Use this when the user provides one specific page URL."),
1014
+ screenshot: import_zod.z.boolean().default(false).describe("Also capture a full-page screenshot of the URL. Saved to ~/Downloads/mcp-scraper/screenshots/ and returned inline. Use when the user asks to see or capture the page visually."),
1015
+ screenshotDevice: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport for screenshot. desktop = 1440\xD7900. mobile = 390\xD7844. Default desktop."),
1016
+ extractBranding: import_zod.z.boolean().default(false).describe("Extract brand colors, fonts, logo, and favicon using a rendered browser session. Returns colorScheme (light/dark), colors (primary/accent/background/text/heading as hex), fonts (heading/body family names), and assets (logo URL, favicon URL). Use when the user asks about brand colors, site theme, or brand assets."),
1017
+ downloadMedia: import_zod.z.boolean().default(false).describe("Extract and download all page media (images, video, audio) to ~/Downloads/mcp-scraper/media/. Ad networks, tracking pixels, and noise URLs are filtered automatically. Use when the user asks to download or harvest assets from a page."),
1018
+ mediaTypes: import_zod.z.array(import_zod.z.enum(["image", "video", "audio"])).default(["image", "video", "audio"]).describe("Which media types to download. Default all three."),
1019
+ allowLocal: import_zod.z.boolean().default(false).describe("Allow localhost and private-network URLs. For local development only.")
1020
+ };
1021
+ var MapSiteUrlsInputSchema = {
1022
+ url: import_zod.z.string().url().describe("Public website URL or domain to crawl for internal URLs. Use before extract_site when the user asks to audit/map/crawl a site."),
1023
+ maxUrls: import_zod.z.number().int().min(1).max(500).optional().describe("Maximum URLs to discover. Use 100 for normal maps, higher when the user asks for a full inventory.")
1024
+ };
1025
+ var ExtractSiteInputSchema = {
1026
+ url: import_zod.z.string().url().describe("Public website URL or domain to extract across multiple pages. Use when the user asks for a site audit, website crawl, or full-site content/schema extraction."),
1027
+ maxPages: import_zod.z.number().int().min(1).max(50).optional().describe("Maximum pages to extract. Use 50 when the user asks for full results or a complete crawl within MCP limits.")
1028
+ };
1029
+ var YoutubeHarvestInputSchema = {
1030
+ mode: import_zod.z.enum(["search", "channel"]).describe("Use search for topic/keyword requests. Use channel when the user provides @handle, channel ID, or channel URL."),
1031
+ query: import_zod.z.string().optional().describe("Required when mode is search. The YouTube search topic in the user\u2019s words."),
1032
+ channelHandle: import_zod.z.string().optional().describe("YouTube channel handle, channel ID, or URL. Examples: @mkbhd, UC..., https://youtube.com/@mkbhd."),
1033
+ maxVideos: import_zod.z.number().int().min(1).max(500).default(50).describe("Number of videos to return. Default 50. Increase when user asks for full channel/history.")
1034
+ };
1035
+ var YoutubeTranscribeInputSchema = {
1036
+ videoId: import_zod.z.string().min(1).describe("YouTube video ID, e.g. dQw4w9WgXcQ")
1037
+ };
1038
+ var FacebookPageIntelInputSchema = {
1039
+ pageId: import_zod.z.string().optional(),
1040
+ libraryId: import_zod.z.string().optional(),
1041
+ query: import_zod.z.string().optional().describe("Advertiser or brand name when pageId/libraryId is not known. One of pageId, libraryId, or query is required."),
1042
+ maxAds: import_zod.z.number().int().min(1).max(200).default(50),
1043
+ country: import_zod.z.string().length(2).default("US")
1044
+ };
1045
+ var FacebookAdSearchInputSchema = {
1046
+ query: import_zod.z.string().min(1).describe("Advertiser, brand, competitor, niche, or keyword to search in Facebook Ad Library."),
1047
+ country: import_zod.z.string().length(2).default("US"),
1048
+ maxResults: import_zod.z.number().int().min(1).max(20).default(10)
1049
+ };
1050
+ var FacebookAdTranscribeInputSchema = {
1051
+ videoUrl: import_zod.z.string().url().describe("Facebook CDN video URL from a facebook_page_intel result")
1052
+ };
1053
+ var MapsPlaceIntelInputSchema = {
1054
+ businessName: import_zod.z.string().min(1).describe('Business name only. If user says "Elite Roofing Denver CO", use businessName="Elite Roofing" and location="Denver, CO".'),
1055
+ location: import_zod.z.string().min(1).describe('City/region/country where the business should be searched, e.g. "Denver, CO". Infer from the user request when possible.'),
1056
+ gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
1057
+ hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
1058
+ includeReviews: import_zod.z.boolean().default(false).describe("Whether to fetch individual review cards"),
1059
+ maxReviews: import_zod.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
1060
+ };
1061
+ var MapsSearchInputSchema = {
1062
+ query: import_zod.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
1063
+ location: import_zod.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
1064
+ gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
1065
+ hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
1066
+ maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
1067
+ };
1068
+ var NullableString = import_zod.z.string().nullable();
1069
+ var MapsSearchOutputSchema = {
1070
+ query: import_zod.z.string(),
1071
+ location: import_zod.z.string().nullable(),
1072
+ searchQuery: import_zod.z.string(),
1073
+ searchUrl: import_zod.z.string().url(),
1074
+ extractedAt: import_zod.z.string(),
1075
+ requestedMaxResults: import_zod.z.number().int().min(1).max(50),
1076
+ resultCount: import_zod.z.number().int().min(0).max(50),
1077
+ results: import_zod.z.array(import_zod.z.object({
1078
+ position: import_zod.z.number().int().min(1),
1079
+ name: import_zod.z.string(),
1080
+ placeUrl: import_zod.z.string().url(),
1081
+ cid: NullableString,
1082
+ cidDecimal: NullableString,
1083
+ rating: NullableString,
1084
+ reviewCount: NullableString,
1085
+ category: NullableString,
1086
+ address: NullableString,
1087
+ websiteUrl: NullableString,
1088
+ directionsUrl: NullableString,
1089
+ metadata: import_zod.z.array(import_zod.z.string())
1090
+ })),
1091
+ durationMs: import_zod.z.number().int().min(0)
1092
+ };
1093
+ var OrganicResultOutput = import_zod.z.object({
1094
+ position: import_zod.z.number().int(),
1095
+ title: import_zod.z.string(),
1096
+ url: import_zod.z.string(),
1097
+ domain: import_zod.z.string(),
1098
+ snippet: NullableString
1099
+ });
1100
+ var AiOverviewOutput = import_zod.z.object({
1101
+ detected: import_zod.z.boolean(),
1102
+ text: NullableString
1103
+ }).nullable();
1104
+ var EntityIdsOutput = import_zod.z.object({
1105
+ kgIds: import_zod.z.array(import_zod.z.string()),
1106
+ cids: import_zod.z.array(import_zod.z.string()),
1107
+ gcids: import_zod.z.array(import_zod.z.string())
1108
+ }).nullable();
1109
+ var HarvestPaaOutputSchema = {
1110
+ query: import_zod.z.string(),
1111
+ location: NullableString,
1112
+ questionCount: import_zod.z.number().int().min(0),
1113
+ completionStatus: NullableString,
1114
+ questions: import_zod.z.array(import_zod.z.object({
1115
+ question: import_zod.z.string(),
1116
+ answer: NullableString,
1117
+ sourceTitle: NullableString,
1118
+ sourceSite: NullableString
1119
+ })),
1120
+ organicResults: import_zod.z.array(OrganicResultOutput),
1121
+ aiOverview: AiOverviewOutput,
1122
+ entityIds: EntityIdsOutput,
1123
+ durationMs: import_zod.z.number().min(0).nullable()
1124
+ };
1125
+ var SearchSerpOutputSchema = {
1126
+ query: import_zod.z.string(),
1127
+ location: NullableString,
1128
+ organicResults: import_zod.z.array(OrganicResultOutput),
1129
+ localPack: import_zod.z.array(import_zod.z.object({
1130
+ position: import_zod.z.number().int(),
1131
+ name: import_zod.z.string(),
1132
+ rating: NullableString,
1133
+ reviewCount: NullableString,
1134
+ websiteUrl: NullableString
1135
+ })),
1136
+ aiOverview: AiOverviewOutput,
1137
+ entityIds: EntityIdsOutput
1138
+ };
1139
+ var ExtractUrlOutputSchema = {
1140
+ url: import_zod.z.string(),
1141
+ title: NullableString,
1142
+ headings: import_zod.z.array(import_zod.z.object({
1143
+ level: import_zod.z.number().int(),
1144
+ text: import_zod.z.string()
1145
+ })),
1146
+ schemaBlockCount: import_zod.z.number().int().min(0),
1147
+ entityName: NullableString,
1148
+ entityTypes: import_zod.z.array(import_zod.z.string()),
1149
+ napScore: import_zod.z.number().nullable(),
1150
+ missingSchemaFields: import_zod.z.array(import_zod.z.string()),
1151
+ screenshotSaved: NullableString
1152
+ };
1153
+ var ExtractSiteOutputSchema = {
1154
+ url: import_zod.z.string(),
1155
+ pageCount: import_zod.z.number().int().min(0),
1156
+ pages: import_zod.z.array(import_zod.z.object({
1157
+ url: import_zod.z.string(),
1158
+ title: NullableString,
1159
+ schemaTypes: import_zod.z.array(import_zod.z.string())
1160
+ })),
1161
+ durationMs: import_zod.z.number().min(0)
1162
+ };
1163
+ var MapsPlaceIntelOutputSchema = {
1164
+ name: import_zod.z.string(),
1165
+ rating: NullableString,
1166
+ reviewCount: NullableString,
1167
+ category: NullableString,
1168
+ address: NullableString,
1169
+ phone: NullableString,
1170
+ website: NullableString,
1171
+ hoursSummary: NullableString,
1172
+ bookingUrl: NullableString,
1173
+ kgmid: NullableString,
1174
+ cidDecimal: NullableString,
1175
+ cidUrl: NullableString,
1176
+ lat: import_zod.z.number().nullable(),
1177
+ lng: import_zod.z.number().nullable(),
1178
+ reviewsStatus: import_zod.z.string(),
1179
+ reviewsCollected: import_zod.z.number().int().min(0),
1180
+ reviewTopics: import_zod.z.array(import_zod.z.object({
1181
+ label: import_zod.z.string(),
1182
+ count: import_zod.z.string()
1183
+ }))
1184
+ };
1185
+ var CreditsInfoOutputSchema = {
1186
+ balanceCredits: import_zod.z.number().nullable(),
1187
+ matchedCost: import_zod.z.object({
1188
+ label: import_zod.z.string(),
1189
+ credits: import_zod.z.number(),
1190
+ unit: import_zod.z.string(),
1191
+ notes: NullableString
1192
+ }).nullable(),
1193
+ costs: import_zod.z.array(import_zod.z.object({
1194
+ key: import_zod.z.string(),
1195
+ label: import_zod.z.string(),
1196
+ credits: import_zod.z.number(),
1197
+ unit: import_zod.z.string(),
1198
+ notes: NullableString
1199
+ })),
1200
+ ledger: import_zod.z.array(import_zod.z.object({
1201
+ createdAt: import_zod.z.string(),
1202
+ operation: import_zod.z.string(),
1203
+ credits: import_zod.z.number(),
1204
+ description: NullableString
1205
+ }))
1206
+ };
1207
+ var MapSiteUrlsOutputSchema = {
1208
+ startUrl: import_zod.z.string(),
1209
+ totalFound: import_zod.z.number().int().min(0),
1210
+ truncated: import_zod.z.boolean(),
1211
+ okCount: import_zod.z.number().int().min(0),
1212
+ redirectCount: import_zod.z.number().int().min(0),
1213
+ brokenCount: import_zod.z.number().int().min(0),
1214
+ urls: import_zod.z.array(import_zod.z.object({
1215
+ url: import_zod.z.string(),
1216
+ status: import_zod.z.number().int().nullable()
1217
+ })),
1218
+ durationMs: import_zod.z.number().min(0)
1219
+ };
1220
+ var YoutubeHarvestOutputSchema = {
1221
+ mode: import_zod.z.string(),
1222
+ videoCount: import_zod.z.number().int().min(0),
1223
+ channel: import_zod.z.object({
1224
+ title: NullableString,
1225
+ subscriberCount: NullableString
1226
+ }).nullable(),
1227
+ videos: import_zod.z.array(import_zod.z.object({
1228
+ videoId: import_zod.z.string(),
1229
+ title: import_zod.z.string(),
1230
+ channelName: NullableString,
1231
+ views: NullableString,
1232
+ duration: NullableString,
1233
+ url: NullableString
1234
+ }))
1235
+ };
1236
+ var FacebookAdSearchOutputSchema = {
1237
+ query: import_zod.z.string(),
1238
+ advertiserCount: import_zod.z.number().int().min(0),
1239
+ advertisers: import_zod.z.array(import_zod.z.object({
1240
+ name: NullableString,
1241
+ adCount: import_zod.z.number().int().nullable(),
1242
+ libraryId: NullableString
1243
+ }))
1244
+ };
1245
+ var FacebookPageIntelOutputSchema = {
1246
+ advertiserName: NullableString,
1247
+ totalAds: import_zod.z.number().int().min(0),
1248
+ activeCount: import_zod.z.number().int().min(0),
1249
+ videoCount: import_zod.z.number().int().min(0),
1250
+ imageCount: import_zod.z.number().int().min(0),
1251
+ ads: import_zod.z.array(import_zod.z.object({
1252
+ libraryId: NullableString,
1253
+ status: NullableString,
1254
+ creativeType: NullableString,
1255
+ headline: NullableString,
1256
+ cta: NullableString,
1257
+ startDate: NullableString,
1258
+ videoUrl: NullableString,
1259
+ variations: import_zod.z.number().int().nullable()
1260
+ }))
1261
+ };
1262
+ var CreditsInfoInputSchema = {
1263
+ item: import_zod.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
1264
+ includeLedger: import_zod.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
1265
+ };
1266
+ var SearchSerpInputSchema = {
1267
+ query: import_zod.z.string().min(1).describe('Core search topic only. Separate location when possible. If user says "best dentist in Brooklyn NY serp", use query="best dentist" and location="Brooklyn, NY".'),
1268
+ location: import_zod.z.string().optional().describe("City, region, or country for geo-targeted results, inferred from user request when present."),
1269
+ gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
1270
+ hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from user request."),
1271
+ device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
1272
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
1273
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
1274
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
1275
+ pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
1276
+ };
1277
+ var CaptureSerpSnapshotInputSchema = {
1278
+ query: import_zod.z.string().min(1).describe("Core search query to capture as a structured SERP Intelligence snapshot. Separate the place into location when the user gives a city, region, country, or ZIP."),
1279
+ location: import_zod.z.string().optional().describe("City, region, country, or service area used for localized Google results. MCP Scraper records location evidence; UULE alone is not proof of localization."),
1280
+ gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
1281
+ hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request."),
1282
+ device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
1283
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized residential proxy targeting, configured for the static residential proxy, and none only for direct-network debugging."),
1284
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed."),
1285
+ pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
1286
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
1287
+ includePageSnapshots: import_zod.z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
1288
+ pageSnapshotLimit: import_zod.z.number().int().min(0).max(10).default(0).describe("Maximum ranking-page snapshots to capture when includePageSnapshots is true. Use 0 when only SERP evidence is needed.")
1289
+ };
1290
+ var ScreenshotInputSchema = {
1291
+ url: import_zod.z.string().url().describe("URL to capture as a full-page screenshot. Use http or https. Pass allowLocal: true to capture localhost or private-network URLs during development."),
1292
+ device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport profile. desktop = 1440\xD7900. mobile = 390\xD7844. Use desktop by default; use mobile when the user asks for a mobile view."),
1293
+ allowLocal: import_zod.z.boolean().default(false).describe("Allow localhost and private-network URLs (127.x, 192.168.x, 10.x, etc.). For local development only \u2014 not for production use.")
1294
+ };
1295
+ var CaptureSerpPageSnapshotsInputSchema = {
1296
+ urls: import_zod.z.array(import_zod.z.string().url()).min(1).max(25).describe("Public HTTP/HTTPS URLs to capture as SERP Intelligence page snapshots. Do not pass localhost, private IPs, file URLs, or internal admin URLs."),
1297
+ targets: import_zod.z.array(import_zod.z.object({
1298
+ url: import_zod.z.string().url().describe("Public HTTP/HTTPS URL to capture."),
1299
+ sourceKind: import_zod.z.enum(["organic", "ai_citation", "local_pack_website", "configured_target", "site_subject"]).default("configured_target").describe("Why this page is being captured for SERP Intelligence evidence."),
1300
+ sourcePosition: import_zod.z.number().int().min(1).optional().describe("Ranking or citation position when the page came from SERP evidence.")
1301
+ }).strict()).min(1).max(25).optional().describe("Structured page snapshot targets. Use this instead of urls when source kind or position should be preserved."),
1302
+ maxConcurrency: import_zod.z.number().int().min(1).max(5).default(2).describe("Parallel page captures. Use 2 normally; higher values can increase proxy/browser pressure."),
1303
+ timeoutMs: import_zod.z.number().int().min(1e3).max(6e4).default(15e3).describe("Per-page capture timeout in milliseconds. Increase for slow pages; timeout artifacts are returned as structured capture failures."),
1304
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics for page snapshot debugging. Use true for capture, network, or proxy troubleshooting.")
1305
+ };
1306
+
1079
1307
  // src/mcp/paa-mcp-server.ts
1080
1308
  function liveWebToolAnnotations(title) {
1081
1309
  return {
@@ -1086,27 +1314,65 @@ function liveWebToolAnnotations(title) {
1086
1314
  openWorldHint: true
1087
1315
  };
1088
1316
  }
1317
+ function listSavedReports() {
1318
+ try {
1319
+ const dir = outputBaseDir();
1320
+ return (0, import_node_fs2.readdirSync)(dir).filter((f) => f.endsWith(".md")).map((f) => ({ filename: f, mtimeMs: (0, import_node_fs2.statSync)((0, import_node_path2.join)(dir, f)).mtimeMs })).sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, 100);
1321
+ } catch {
1322
+ return [];
1323
+ }
1324
+ }
1325
+ function registerSavedReportResources(server2) {
1326
+ server2.registerResource(
1327
+ "saved-report",
1328
+ new import_mcp.ResourceTemplate("report://{filename}", {
1329
+ list: () => ({
1330
+ resources: listSavedReports().map((r) => ({
1331
+ uri: `report://${encodeURIComponent(r.filename)}`,
1332
+ name: r.filename,
1333
+ mimeType: "text/markdown"
1334
+ }))
1335
+ })
1336
+ }),
1337
+ {
1338
+ title: "Saved MCP Scraper Reports",
1339
+ description: "Markdown research reports saved by previous MCP Scraper tool calls. Read a report to reuse prior research without re-scraping or spending credits.",
1340
+ mimeType: "text/markdown"
1341
+ },
1342
+ async (uri, variables) => {
1343
+ const requested = Array.isArray(variables.filename) ? variables.filename[0] : variables.filename;
1344
+ const filename = (0, import_node_path2.basename)(decodeURIComponent(String(requested ?? "")));
1345
+ if (!filename.endsWith(".md")) throw new Error("Only saved .md reports can be read");
1346
+ const text = (0, import_node_fs2.readFileSync)((0, import_node_path2.join)(outputBaseDir(), filename), "utf8");
1347
+ return { contents: [{ uri: uri.href, mimeType: "text/markdown", text }] };
1348
+ }
1349
+ );
1350
+ }
1089
1351
  function buildPaaExtractorMcpServer(executor2, options = {}) {
1090
1352
  const savesReports = options.savesReportsLocally !== false;
1091
1353
  const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
1092
1354
  const withReportNote = (description) => `${description}${reportNote}`;
1093
1355
  const server2 = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
1356
+ if (savesReports) registerSavedReportResources(server2);
1094
1357
  server2.registerTool("harvest_paa", {
1095
1358
  title: "Google PAA + SERP Harvest",
1096
- description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
1359
+ description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
1097
1360
  inputSchema: HarvestPaaInputSchema,
1361
+ outputSchema: HarvestPaaOutputSchema,
1098
1362
  annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
1099
1363
  }, async (input) => formatHarvestPaa(await executor2.harvestPaa(input), input));
1100
1364
  server2.registerTool("search_serp", {
1101
1365
  title: "Google SERP Lookup",
1102
1366
  description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
1103
1367
  inputSchema: SearchSerpInputSchema,
1368
+ outputSchema: SearchSerpOutputSchema,
1104
1369
  annotations: liveWebToolAnnotations("Google SERP Lookup")
1105
1370
  }, async (input) => formatSearchSerp(await executor2.searchSerp(input), input));
1106
1371
  server2.registerTool("extract_url", {
1107
1372
  title: "Single URL Extract",
1108
1373
  description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
1109
1374
  inputSchema: ExtractUrlInputSchema,
1375
+ outputSchema: ExtractUrlOutputSchema,
1110
1376
  annotations: liveWebToolAnnotations("Single URL Extract")
1111
1377
  }, async (input) => formatExtractUrl(await executor2.extractUrl(input), input));
1112
1378
  server2.registerTool("map_site_urls", {
@@ -1120,6 +1386,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1120
1386
  title: "Multi-Page Site Extract",
1121
1387
  description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
1122
1388
  inputSchema: ExtractSiteInputSchema,
1389
+ outputSchema: ExtractSiteOutputSchema,
1123
1390
  annotations: liveWebToolAnnotations("Multi-Page Site Extract")
1124
1391
  }, async (input) => formatExtractSite(await executor2.extractSite(input), input));
1125
1392
  server2.registerTool("youtube_harvest", {
@@ -1159,6 +1426,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1159
1426
  title: "Google Maps Business Profile Details",
1160
1427
  description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
1161
1428
  inputSchema: MapsPlaceIntelInputSchema,
1429
+ outputSchema: MapsPlaceIntelOutputSchema,
1162
1430
  annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
1163
1431
  }, async (input) => formatMapsPlaceIntel(await executor2.mapsPlaceIntel(input), input));
1164
1432
  server2.registerTool("maps_search", {
@@ -1172,6 +1440,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1172
1440
  title: "MCP Scraper Credits & Costs",
1173
1441
  description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
1174
1442
  inputSchema: CreditsInfoInputSchema,
1443
+ outputSchema: CreditsInfoOutputSchema,
1175
1444
  annotations: {
1176
1445
  title: "MCP Scraper Credits & Costs",
1177
1446
  readOnlyHint: true,
@@ -1186,10 +1455,10 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1186
1455
  // bin/mcp-stdio-server.ts
1187
1456
  function readApiKeyFile() {
1188
1457
  const explicitPath = process.env.MCP_SCRAPER_KEY_PATH?.trim();
1189
- const paths = [explicitPath, (0, import_node_path2.join)((0, import_node_os2.homedir)(), ".mcp-scraper-key")].filter(Boolean);
1458
+ const paths = [explicitPath, (0, import_node_path3.join)((0, import_node_os2.homedir)(), ".mcp-scraper-key")].filter(Boolean);
1190
1459
  for (const path of paths) {
1191
1460
  try {
1192
- const value = (0, import_node_fs2.readFileSync)(path, "utf8").trim();
1461
+ const value = (0, import_node_fs3.readFileSync)(path, "utf8").trim();
1193
1462
  if (value) return value;
1194
1463
  } catch {
1195
1464
  }