mcp-scraper 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
+ browserServiceApiKey,
3
4
  harvest
4
- } from "../chunk-LUBDFS67.js";
5
+ } from "../chunk-TM22BLWP.js";
5
6
  import "../chunk-ZMOWIBMK.js";
6
7
 
7
8
  // src/cli.ts
8
9
  import { Command } from "commander";
9
10
  var program = new Command();
10
- program.name("paa-harvest").description("Recursively extract Google People Also Ask questions").requiredOption("-q, --query <query>", "Seed query").option("-l, --location <location>", 'Location name (e.g. "austin" or "Austin,Texas,United States")').option("--gl <gl>", "Google country code", "us").option("--hl <hl>", "Google language code", "en").option("-d, --depth <depth>", "BFS depth (1-30)", "3").option("-m, --max-questions <n>", "Max questions to harvest", "100").option("-o, --output <dir>", "Output directory", "./paa-output").option("-f, --format <format>", "Output format: json, csv, or both", "both").option("--headless", "Run browser in headless mode", false).option("--profile <dir>", "Persistent browser profile directory").option("--proxy <url>", "Proxy server URL").option("--kernel-api-key <key>", "Kernel.sh API key (or set KERNEL_API_KEY env var)").action(async (opts) => {
11
+ program.name("paa-harvest").description("Recursively extract Google People Also Ask questions").requiredOption("-q, --query <query>", "Seed query").option("-l, --location <location>", 'Location name (e.g. "austin" or "Austin,Texas,United States")').option("--gl <gl>", "Google country code", "us").option("--hl <hl>", "Google language code", "en").option("-d, --depth <depth>", "BFS depth (1-30)", "3").option("-m, --max-questions <n>", "Max questions to harvest", "100").option("-o, --output <dir>", "Output directory", "./paa-output").option("-f, --format <format>", "Output format: json, csv, or both", "both").option("--headless", "Run browser in headless mode", false).option("--profile <dir>", "Persistent browser profile directory").option("--proxy <url>", "Proxy server URL").option("--kernel-api-key <key>", "Browser service API key (or set BROWSER_SERVICE_API_KEY env var)").action(async (opts) => {
11
12
  try {
12
13
  const result = await harvest({
13
14
  query: opts.query,
@@ -21,7 +22,7 @@ program.name("paa-harvest").description("Recursively extract Google People Also
21
22
  headless: opts.headless,
22
23
  profileDir: opts.profile,
23
24
  proxy: opts.proxy,
24
- kernelApiKey: opts.kernelApiKey ?? process.env.KERNEL_API_KEY
25
+ kernelApiKey: opts.kernelApiKey ?? browserServiceApiKey()
25
26
  });
26
27
  console.log(JSON.stringify({ totalQuestions: result.totalQuestions, outputDir: result.stats.seed }));
27
28
  } catch (err) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/cli.ts","../../bin/paa-harvest.ts"],"sourcesContent":["import { Command } from 'commander'\nimport { harvest } from './harvest.js'\n\nconst program = new Command()\n\nprogram\n .name('paa-harvest')\n .description('Recursively extract Google People Also Ask questions')\n .requiredOption('-q, --query <query>', 'Seed query')\n .option('-l, --location <location>', 'Location name (e.g. \"austin\" or \"Austin,Texas,United States\")')\n .option('--gl <gl>', 'Google country code', 'us')\n .option('--hl <hl>', 'Google language code', 'en')\n .option('-d, --depth <depth>', 'BFS depth (1-30)', '3')\n .option('-m, --max-questions <n>', 'Max questions to harvest', '100')\n .option('-o, --output <dir>', 'Output directory', './paa-output')\n .option('-f, --format <format>', 'Output format: json, csv, or both', 'both')\n .option('--headless', 'Run browser in headless mode', false)\n .option('--profile <dir>', 'Persistent browser profile directory')\n .option('--proxy <url>', 'Proxy server URL')\n .option('--kernel-api-key <key>', 'Kernel.sh API key (or set KERNEL_API_KEY env var)')\n .action(async (opts) => {\n try {\n const result = await harvest({\n query: opts.query,\n location: opts.location,\n gl: opts.gl,\n hl: opts.hl,\n depth: parseInt(opts.depth, 10),\n maxQuestions: parseInt(opts.maxQuestions, 10),\n outputDir: opts.output,\n format: opts.format,\n headless: opts.headless,\n profileDir: opts.profile,\n proxy: opts.proxy,\n kernelApiKey: opts.kernelApiKey ?? process.env.KERNEL_API_KEY,\n })\n console.log(JSON.stringify({ totalQuestions: result.totalQuestions, outputDir: result.stats.seed }))\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err))\n process.exit(1)\n }\n })\n\nexport async function runCli(): Promise<void> {\n await program.parseAsync()\n}\n","#!/usr/bin/env node\nimport { runCli } from '../src/cli.js'\nrunCli()\n"],"mappings":";;;;;;;AAAA,SAAS,eAAe;AAGxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,aAAa,EAClB,YAAY,sDAAsD,EAClE,eAAe,uBAAuB,YAAY,EAClD,OAAO,6BAA6B,+DAA+D,EACnG,OAAO,aAAa,uBAAuB,IAAI,EAC/C,OAAO,aAAa,wBAAwB,IAAI,EAChD,OAAO,uBAAuB,oBAAoB,GAAG,EACrD,OAAO,2BAA2B,4BAA4B,KAAK,EACnE,OAAO,sBAAsB,oBAAoB,cAAc,EAC/D,OAAO,yBAAyB,qCAAqC,MAAM,EAC3E,OAAO,cAAc,gCAAgC,KAAK,EAC1D,OAAO,mBAAmB,sCAAsC,EAChE,OAAO,iBAAiB,kBAAkB,EAC1C,OAAO,0BAA0B,mDAAmD,EACpF,OAAO,OAAO,SAAS;AACtB,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,UAAU,KAAK;AAAA,MACf,IAAI,KAAK;AAAA,MACT,IAAI,KAAK;AAAA,MACT,OAAO,SAAS,KAAK,OAAO,EAAE;AAAA,MAC9B,cAAc,SAAS,KAAK,cAAc,EAAE;AAAA,MAC5C,WAAW,KAAK;AAAA,MAChB,QAAQ,KAAK;AAAA,MACb,UAAU,KAAK;AAAA,MACf,YAAY,KAAK;AAAA,MACjB,OAAO,KAAK;AAAA,MACZ,cAAc,KAAK,gBAAgB,QAAQ,IAAI;AAAA,IACjD,CAAC;AACD,YAAQ,IAAI,KAAK,UAAU,EAAE,gBAAgB,OAAO,gBAAgB,WAAW,OAAO,MAAM,KAAK,CAAC,CAAC;AAAA,EACrG,SAAS,KAAK;AACZ,YAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAC9D,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,eAAsB,SAAwB;AAC5C,QAAM,QAAQ,WAAW;AAC3B;;;AC3CA,OAAO;","names":[]}
1
+ {"version":3,"sources":["../../src/cli.ts","../../bin/paa-harvest.ts"],"sourcesContent":["import { Command } from 'commander'\nimport { browserServiceApiKey } from './lib/browser-service-env.js'\nimport { harvest } from './harvest.js'\n\nconst program = new Command()\n\nprogram\n .name('paa-harvest')\n .description('Recursively extract Google People Also Ask questions')\n .requiredOption('-q, --query <query>', 'Seed query')\n .option('-l, --location <location>', 'Location name (e.g. \"austin\" or \"Austin,Texas,United States\")')\n .option('--gl <gl>', 'Google country code', 'us')\n .option('--hl <hl>', 'Google language code', 'en')\n .option('-d, --depth <depth>', 'BFS depth (1-30)', '3')\n .option('-m, --max-questions <n>', 'Max questions to harvest', '100')\n .option('-o, --output <dir>', 'Output directory', './paa-output')\n .option('-f, --format <format>', 'Output format: json, csv, or both', 'both')\n .option('--headless', 'Run browser in headless mode', false)\n .option('--profile <dir>', 'Persistent browser profile directory')\n .option('--proxy <url>', 'Proxy server URL')\n .option('--kernel-api-key <key>', 'Browser service API key (or set BROWSER_SERVICE_API_KEY env var)')\n .action(async (opts) => {\n try {\n const result = await harvest({\n query: opts.query,\n location: opts.location,\n gl: opts.gl,\n hl: opts.hl,\n depth: parseInt(opts.depth, 10),\n maxQuestions: parseInt(opts.maxQuestions, 10),\n outputDir: opts.output,\n format: opts.format,\n headless: opts.headless,\n profileDir: opts.profile,\n proxy: opts.proxy,\n kernelApiKey: opts.kernelApiKey ?? browserServiceApiKey(),\n })\n console.log(JSON.stringify({ totalQuestions: result.totalQuestions, outputDir: result.stats.seed }))\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err))\n process.exit(1)\n }\n })\n\nexport async function runCli(): Promise<void> {\n await program.parseAsync()\n}\n","#!/usr/bin/env node\nimport { runCli } from '../src/cli.js'\nrunCli()\n"],"mappings":";;;;;;;;AAAA,SAAS,eAAe;AAIxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,aAAa,EAClB,YAAY,sDAAsD,EAClE,eAAe,uBAAuB,YAAY,EAClD,OAAO,6BAA6B,+DAA+D,EACnG,OAAO,aAAa,uBAAuB,IAAI,EAC/C,OAAO,aAAa,wBAAwB,IAAI,EAChD,OAAO,uBAAuB,oBAAoB,GAAG,EACrD,OAAO,2BAA2B,4BAA4B,KAAK,EACnE,OAAO,sBAAsB,oBAAoB,cAAc,EAC/D,OAAO,yBAAyB,qCAAqC,MAAM,EAC3E,OAAO,cAAc,gCAAgC,KAAK,EAC1D,OAAO,mBAAmB,sCAAsC,EAChE,OAAO,iBAAiB,kBAAkB,EAC1C,OAAO,0BAA0B,kEAAkE,EACnG,OAAO,OAAO,SAAS;AACtB,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,OAAO,KAAK;AAAA,MACZ,UAAU,KAAK;AAAA,MACf,IAAI,KAAK;AAAA,MACT,IAAI,KAAK;AAAA,MACT,OAAO,SAAS,KAAK,OAAO,EAAE;AAAA,MAC9B,cAAc,SAAS,KAAK,cAAc,EAAE;AAAA,MAC5C,WAAW,KAAK;AAAA,MAChB,QAAQ,KAAK;AAAA,MACb,UAAU,KAAK;AAAA,MACf,YAAY,KAAK;AAAA,MACjB,OAAO,KAAK;AAAA,MACZ,cAAc,KAAK,gBAAgB,qBAAqB;AAAA,IAC1D,CAAC;AACD,YAAQ,IAAI,KAAK,UAAU,EAAE,gBAAgB,OAAO,gBAAgB,WAAW,OAAO,MAAM,KAAK,CAAC,CAAC;AAAA,EACrG,SAAS,KAAK;AACZ,YAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAC9D,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,eAAsB,SAAwB;AAC5C,QAAM,QAAQ,WAAW;AAC3B;;;AC5CA,OAAO;","names":[]}
@@ -20,7 +20,7 @@ function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
20
20
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
21
21
 
22
22
  // src/version.ts
23
- var PACKAGE_VERSION = "0.1.7";
23
+ var PACKAGE_VERSION = "0.1.8";
24
24
 
25
25
  // src/mcp/mcp-tool-schemas.ts
26
26
  import { z } from "zod";
@@ -116,6 +116,120 @@ var MapsSearchOutputSchema = {
116
116
  })),
117
117
  durationMs: z.number().int().min(0)
118
118
  };
119
+ var OrganicResultOutput = z.object({
120
+ position: z.number().int(),
121
+ title: z.string(),
122
+ url: z.string(),
123
+ domain: z.string(),
124
+ snippet: NullableString
125
+ });
126
+ var AiOverviewOutput = z.object({
127
+ detected: z.boolean(),
128
+ text: NullableString
129
+ }).nullable();
130
+ var EntityIdsOutput = z.object({
131
+ kgIds: z.array(z.string()),
132
+ cids: z.array(z.string()),
133
+ gcids: z.array(z.string())
134
+ }).nullable();
135
+ var HarvestPaaOutputSchema = {
136
+ query: z.string(),
137
+ location: NullableString,
138
+ questionCount: z.number().int().min(0),
139
+ completionStatus: NullableString,
140
+ questions: z.array(z.object({
141
+ question: z.string(),
142
+ answer: NullableString,
143
+ sourceTitle: NullableString,
144
+ sourceSite: NullableString
145
+ })),
146
+ organicResults: z.array(OrganicResultOutput),
147
+ aiOverview: AiOverviewOutput,
148
+ entityIds: EntityIdsOutput,
149
+ durationMs: z.number().min(0).nullable()
150
+ };
151
+ var SearchSerpOutputSchema = {
152
+ query: z.string(),
153
+ location: NullableString,
154
+ organicResults: z.array(OrganicResultOutput),
155
+ localPack: z.array(z.object({
156
+ position: z.number().int(),
157
+ name: z.string(),
158
+ rating: NullableString,
159
+ reviewCount: NullableString,
160
+ websiteUrl: NullableString
161
+ })),
162
+ aiOverview: AiOverviewOutput,
163
+ entityIds: EntityIdsOutput
164
+ };
165
+ var ExtractUrlOutputSchema = {
166
+ url: z.string(),
167
+ title: NullableString,
168
+ headings: z.array(z.object({
169
+ level: z.number().int(),
170
+ text: z.string()
171
+ })),
172
+ schemaBlockCount: z.number().int().min(0),
173
+ entityName: NullableString,
174
+ entityTypes: z.array(z.string()),
175
+ napScore: z.number().nullable(),
176
+ missingSchemaFields: z.array(z.string()),
177
+ screenshotSaved: NullableString
178
+ };
179
+ var ExtractSiteOutputSchema = {
180
+ url: z.string(),
181
+ pageCount: z.number().int().min(0),
182
+ pages: z.array(z.object({
183
+ url: z.string(),
184
+ title: NullableString,
185
+ schemaTypes: z.array(z.string())
186
+ })),
187
+ durationMs: z.number().min(0)
188
+ };
189
+ var MapsPlaceIntelOutputSchema = {
190
+ name: z.string(),
191
+ rating: NullableString,
192
+ reviewCount: NullableString,
193
+ category: NullableString,
194
+ address: NullableString,
195
+ phone: NullableString,
196
+ website: NullableString,
197
+ hoursSummary: NullableString,
198
+ bookingUrl: NullableString,
199
+ kgmid: NullableString,
200
+ cidDecimal: NullableString,
201
+ cidUrl: NullableString,
202
+ lat: z.number().nullable(),
203
+ lng: z.number().nullable(),
204
+ reviewsStatus: z.string(),
205
+ reviewsCollected: z.number().int().min(0),
206
+ reviewTopics: z.array(z.object({
207
+ label: z.string(),
208
+ count: z.string()
209
+ }))
210
+ };
211
+ var CreditsInfoOutputSchema = {
212
+ balanceCredits: z.number().nullable(),
213
+ matchedCost: z.object({
214
+ label: z.string(),
215
+ credits: z.number(),
216
+ unit: z.string(),
217
+ notes: NullableString
218
+ }).nullable(),
219
+ costs: z.array(z.object({
220
+ key: z.string(),
221
+ label: z.string(),
222
+ credits: z.number(),
223
+ unit: z.string(),
224
+ notes: NullableString
225
+ })),
226
+ ledger: z.array(z.object({
227
+ createdAt: z.string(),
228
+ operation: z.string(),
229
+ credits: z.number(),
230
+ description: NullableString
231
+ }))
232
+ };
119
233
  var MapSiteUrlsOutputSchema = {
120
234
  startUrl: z.string(),
121
235
  totalFound: z.number().int().min(0),
@@ -322,7 +436,7 @@ function debugSection(debug) {
322
436
  if (!debug || typeof debug !== "object") return "";
323
437
  const request = debug.request ?? {};
324
438
  const browser = debug.browser ?? {};
325
- const kernel = browser.kernel ?? {};
439
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
326
440
  const network = browser.networkLocation ?? {};
327
441
  const nav = browser.serpNavigation ?? {};
328
442
  const proxyResolution = kernel.proxyResolution ?? {};
@@ -348,12 +462,14 @@ function errorAttemptsSection(body) {
348
462
  const lines = attempts.slice(0, 5).map((attempt) => {
349
463
  const debug = attempt.debug ?? {};
350
464
  const browser = debug.browser ?? {};
351
- const kernel = browser.kernel ?? {};
465
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
352
466
  const proxyResolution = kernel.proxyResolution ?? {};
353
467
  const network = browser.networkLocation ?? {};
354
468
  const nav = browser.serpNavigation ?? {};
355
469
  const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
356
- return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${attempt.kernel_session_id ?? kernel.sessionId ?? "unknown"} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 deleted ${attempt.kernel_delete_succeeded === true ? "yes" : attempt.kernel_delete_succeeded === false ? "no" : "unknown"}`;
470
+ const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
471
+ const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
472
+ return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
357
473
  });
358
474
  return `
359
475
 
@@ -400,7 +516,31 @@ ${serpRows}` : "";
400
516
  const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
401
517
 
402
518
  ${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
403
- return oneBlock(full);
519
+ return {
520
+ ...oneBlock(full),
521
+ structuredContent: {
522
+ query: input.query,
523
+ location: input.location ?? null,
524
+ questionCount: flat.length,
525
+ completionStatus: diagnostics?.completionStatus ?? null,
526
+ questions: flat.map((r) => ({
527
+ question: String(r.question ?? ""),
528
+ answer: r.answer ?? null,
529
+ sourceTitle: r.source_title ?? null,
530
+ sourceSite: r.source_site ?? null
531
+ })),
532
+ organicResults: organic.map((r) => ({
533
+ position: Number(r.position) || 0,
534
+ title: String(r.title ?? ""),
535
+ url: String(r.url ?? ""),
536
+ domain: String(r.domain ?? ""),
537
+ snippet: r.snippet ?? null
538
+ })),
539
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
540
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
541
+ durationMs: durationMs ?? null
542
+ }
543
+ };
404
544
  }
405
545
  function formatSearchSerp(raw, input) {
406
546
  const parsed = parseData(raw);
@@ -438,7 +578,29 @@ ${localRows}` : "";
438
578
  const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
439
579
 
440
580
  ${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
441
- return oneBlock(full);
581
+ return {
582
+ ...oneBlock(full),
583
+ structuredContent: {
584
+ query: input.query,
585
+ location: input.location ?? null,
586
+ organicResults: organic.map((r) => ({
587
+ position: Number(r.position) || 0,
588
+ title: String(r.title ?? ""),
589
+ url: String(r.url ?? ""),
590
+ domain: String(r.domain ?? ""),
591
+ snippet: r.snippet ?? null
592
+ })),
593
+ localPack: localPack.map((b) => ({
594
+ position: Number(b.position) || 0,
595
+ name: String(b.name ?? ""),
596
+ rating: b.rating ?? null,
597
+ reviewCount: b.reviewCount ?? null,
598
+ websiteUrl: b.websiteUrl ?? null
599
+ })),
600
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
601
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
602
+ }
603
+ };
442
604
  }
443
605
  function formatExtractUrl(raw, input) {
444
606
  const parsed = parseData(raw);
@@ -507,15 +669,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
507
669
  **${title}**
508
670
  ${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
509
671
  const textResult = oneBlock(full);
672
+ const structuredContent = {
673
+ url,
674
+ title: d.title ?? null,
675
+ headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
676
+ schemaBlockCount: schemaCount,
677
+ entityName: kpo?.entityName ?? null,
678
+ entityTypes: kpo?.type ?? [],
679
+ napScore: kpo?.napScore ?? null,
680
+ missingSchemaFields: kpo?.missingFields ?? [],
681
+ screenshotSaved: screenshotPath ?? null
682
+ };
510
683
  if (screenshotMeta?.base64) {
511
684
  return {
512
685
  content: [
513
686
  ...textResult.content,
514
687
  { type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
515
- ]
688
+ ],
689
+ structuredContent
516
690
  };
517
691
  }
518
- return textResult;
692
+ return { ...textResult, structuredContent };
519
693
  }
520
694
  function formatMapSiteUrls(raw, input) {
521
695
  const parsed = parseData(raw);
@@ -585,7 +759,19 @@ ${pageRows}`,
585
759
  - Map URLs first: use \`map_site_urls\`
586
760
  - Inspect a single page: use \`extract_url\``
587
761
  ].join("\n");
588
- return oneBlock(full);
762
+ return {
763
+ ...oneBlock(full),
764
+ structuredContent: {
765
+ url: input.url,
766
+ pageCount: pages.length,
767
+ pages: pages.map((p) => ({
768
+ url: String(p.url ?? ""),
769
+ title: p.title ?? null,
770
+ schemaTypes: p.kpo?.type ?? []
771
+ })),
772
+ durationMs: d.durationMs ?? 0
773
+ }
774
+ };
589
775
  }
590
776
  function formatYoutubeHarvest(raw, input) {
591
777
  const parsed = parseData(raw);
@@ -782,7 +968,26 @@ ${costRows}` : "",
782
968
  |------|-----------|---------|-------------|
783
969
  ${ledgerRows}` : ""
784
970
  ].filter(Boolean).join("\n");
785
- return oneBlock(full);
971
+ return {
972
+ ...oneBlock(full),
973
+ structuredContent: {
974
+ balanceCredits: typeof balance === "number" ? balance : null,
975
+ matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
976
+ costs: costs.map((c) => ({
977
+ key: c.key,
978
+ label: c.label,
979
+ credits: c.credits,
980
+ unit: c.unit,
981
+ notes: c.notes ?? null
982
+ })),
983
+ ledger: ledger.map((row) => ({
984
+ createdAt: String(row.created_at ?? ""),
985
+ operation: String(row.operation ?? ""),
986
+ credits: row.amount_mc / 1e3,
987
+ description: row.description ?? null
988
+ }))
989
+ }
990
+ };
786
991
  }
787
992
  function formatMapsSearch(raw, input) {
788
993
  const parsed = parseData(raw);
@@ -931,7 +1136,28 @@ ${entitySection}` : null,
931
1136
  ---
932
1137
  *Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
933
1138
  ].filter(Boolean).join("\n");
934
- return oneBlock(full);
1139
+ return {
1140
+ ...oneBlock(full),
1141
+ structuredContent: {
1142
+ name,
1143
+ rating: rating ?? null,
1144
+ reviewCount: reviewCount ?? null,
1145
+ category: category ?? null,
1146
+ address: address ?? null,
1147
+ phone: phone ?? null,
1148
+ website: website ?? null,
1149
+ hoursSummary: hoursSummary ?? null,
1150
+ bookingUrl: bookingUrl ?? null,
1151
+ kgmid: kgmid ?? null,
1152
+ cidDecimal: cidDecimal ?? null,
1153
+ cidUrl: cidUrl ?? null,
1154
+ lat: lat ?? null,
1155
+ lng: lng ?? null,
1156
+ reviewsStatus,
1157
+ reviewsCollected: reviews.length,
1158
+ reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
1159
+ }
1160
+ };
935
1161
  }
936
1162
  function formatFacebookAdTranscribe(raw, input) {
937
1163
  const parsed = parseData(raw);
@@ -983,18 +1209,21 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
983
1209
  title: "Google PAA + SERP Harvest",
984
1210
  description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
985
1211
  inputSchema: HarvestPaaInputSchema,
1212
+ outputSchema: HarvestPaaOutputSchema,
986
1213
  annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
987
1214
  }, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
988
1215
  server.registerTool("search_serp", {
989
1216
  title: "Google SERP Lookup",
990
1217
  description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
991
1218
  inputSchema: SearchSerpInputSchema,
1219
+ outputSchema: SearchSerpOutputSchema,
992
1220
  annotations: liveWebToolAnnotations("Google SERP Lookup")
993
1221
  }, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
994
1222
  server.registerTool("extract_url", {
995
1223
  title: "Single URL Extract",
996
1224
  description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
997
1225
  inputSchema: ExtractUrlInputSchema,
1226
+ outputSchema: ExtractUrlOutputSchema,
998
1227
  annotations: liveWebToolAnnotations("Single URL Extract")
999
1228
  }, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
1000
1229
  server.registerTool("map_site_urls", {
@@ -1008,6 +1237,7 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
1008
1237
  title: "Multi-Page Site Extract",
1009
1238
  description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
1010
1239
  inputSchema: ExtractSiteInputSchema,
1240
+ outputSchema: ExtractSiteOutputSchema,
1011
1241
  annotations: liveWebToolAnnotations("Multi-Page Site Extract")
1012
1242
  }, async (input) => formatExtractSite(await executor.extractSite(input), input));
1013
1243
  server.registerTool("youtube_harvest", {
@@ -1047,6 +1277,7 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
1047
1277
  title: "Google Maps Business Profile Details",
1048
1278
  description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
1049
1279
  inputSchema: MapsPlaceIntelInputSchema,
1280
+ outputSchema: MapsPlaceIntelOutputSchema,
1050
1281
  annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
1051
1282
  }, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
1052
1283
  server.registerTool("maps_search", {
@@ -1060,6 +1291,7 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
1060
1291
  title: "MCP Scraper Credits & Costs",
1061
1292
  description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
1062
1293
  inputSchema: CreditsInfoInputSchema,
1294
+ outputSchema: CreditsInfoOutputSchema,
1063
1295
  annotations: {
1064
1296
  title: "MCP Scraper Credits & Costs",
1065
1297
  readOnlyHint: true,
@@ -1183,4 +1415,4 @@ export {
1183
1415
  buildPaaExtractorMcpServer,
1184
1416
  HttpMcpToolExecutor
1185
1417
  };
1186
- //# sourceMappingURL=chunk-3OIRNUF5.js.map
1418
+ //# sourceMappingURL=chunk-RE6HCRYC.js.map