mcp-scraper 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +13 -2
  2. package/dist/bin/api-server.cjs +957 -243
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/mcp-stdio-server.cjs +540 -158
  6. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  7. package/dist/bin/mcp-stdio-server.js +2 -1
  8. package/dist/bin/mcp-stdio-server.js.map +1 -1
  9. package/dist/bin/paa-harvest.cjs +36 -5
  10. package/dist/bin/paa-harvest.cjs.map +1 -1
  11. package/dist/bin/paa-harvest.js +5 -3
  12. package/dist/bin/paa-harvest.js.map +1 -1
  13. package/dist/{chunk-6TWZS2FQ.js → chunk-RE6HCRYC.js} +543 -159
  14. package/dist/chunk-RE6HCRYC.js.map +1 -0
  15. package/dist/{chunk-W4P2U5VF.js → chunk-TM22BLWP.js} +46 -34
  16. package/dist/chunk-TM22BLWP.js.map +1 -0
  17. package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
  18. package/dist/chunk-ZK456YXN.js.map +1 -0
  19. package/dist/chunk-ZMOWIBMK.js +36 -0
  20. package/dist/chunk-ZMOWIBMK.js.map +1 -0
  21. package/dist/index.cjs +34 -3
  22. package/dist/index.cjs.map +1 -1
  23. package/dist/index.js +2 -1
  24. package/dist/index.js.map +1 -1
  25. package/dist/{server-2Y27U4TO.js → server-QXVVTKJP.js} +311 -48
  26. package/dist/server-QXVVTKJP.js.map +1 -0
  27. package/dist/{worker-UT4ZQU2T.js → worker-AUCXFHEL.js} +6 -4
  28. package/dist/worker-AUCXFHEL.js.map +1 -0
  29. package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
  30. package/docs/adr/README.md +11 -0
  31. package/docs/mcp-tool-quality-spec.md +238 -0
  32. package/package.json +5 -4
  33. package/dist/chunk-6TWZS2FQ.js.map +0 -1
  34. package/dist/chunk-7HB7NDOY.js.map +0 -1
  35. package/dist/chunk-W4P2U5VF.js.map +0 -1
  36. package/dist/server-2Y27U4TO.js.map +0 -1
  37. package/dist/worker-UT4ZQU2T.js.map +0 -1
@@ -50,6 +50,109 @@ var init_harvest_timeout = __esm({
50
50
  }
51
51
  });
52
52
 
53
+ // src/lib/browser-service-env.ts
54
+ function browserServiceApiKey() {
55
+ const value = (process.env.BROWSER_SERVICE_API_KEY ?? process.env.KERNEL_API_KEY)?.trim();
56
+ return value || void 0;
57
+ }
58
+ function browserServiceProxyId() {
59
+ const value = (process.env.BROWSER_SERVICE_PROXY_ID ?? process.env.KERNEL_PROXY_ID)?.trim();
60
+ return value || void 0;
61
+ }
62
+ var init_browser_service_env = __esm({
63
+ "src/lib/browser-service-env.ts"() {
64
+ "use strict";
65
+ }
66
+ });
67
+
68
+ // src/errors.ts
69
+ function sanitizeVendorName(message) {
70
+ return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
71
+ }
72
+ var RECAPTCHA_INSTRUCTIONS, CaptchaError, ExtractionError, RequestAbortedError;
73
+ var init_errors = __esm({
74
+ "src/errors.ts"() {
75
+ "use strict";
76
+ RECAPTCHA_INSTRUCTIONS = "Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.";
77
+ CaptchaError = class extends Error {
78
+ constructor(instructions) {
79
+ super(`CAPTCHA detected. ${instructions}`);
80
+ this.instructions = instructions;
81
+ }
82
+ instructions;
83
+ name = "CaptchaError";
84
+ };
85
+ ExtractionError = class extends Error {
86
+ constructor(message, cause) {
87
+ super(message);
88
+ this.cause = cause;
89
+ }
90
+ cause;
91
+ name = "ExtractionError";
92
+ };
93
+ RequestAbortedError = class extends Error {
94
+ name = "RequestAbortedError";
95
+ constructor(message = "Request aborted before harvest completed") {
96
+ super(message);
97
+ }
98
+ };
99
+ }
100
+ });
101
+
102
+ // src/api/outbound-sanitize.ts
103
+ function sanitizeOutboundDiagnostics(value, parentKey = "") {
104
+ if (typeof value === "string") {
105
+ if (SANITIZED_VALUE_KEYS.test(parentKey) && /kernel/i.test(value)) {
106
+ return sanitizeVendorName(value);
107
+ }
108
+ return value;
109
+ }
110
+ if (Array.isArray(value)) return value.map((v) => sanitizeOutboundDiagnostics(v, parentKey));
111
+ if (value !== null && typeof value === "object") {
112
+ const out = {};
113
+ for (const [key, val] of Object.entries(value)) {
114
+ const renamed = KEY_RENAMES[key] ?? key;
115
+ out[renamed] = sanitizeOutboundDiagnostics(val, key);
116
+ }
117
+ return out;
118
+ }
119
+ return value;
120
+ }
121
+ function sanitizeAttempts(attempts) {
122
+ return attempts.map((a) => sanitizeOutboundDiagnostics(a));
123
+ }
124
+ function sanitizeHarvestResult(result) {
125
+ const diagnostics = result?.diagnostics;
126
+ if (!diagnostics?.debug) return result;
127
+ return {
128
+ ...result,
129
+ diagnostics: {
130
+ ...diagnostics,
131
+ debug: sanitizeOutboundDiagnostics(diagnostics.debug)
132
+ }
133
+ };
134
+ }
135
+ var KEY_RENAMES, SANITIZED_VALUE_KEYS;
136
+ var init_outbound_sanitize = __esm({
137
+ "src/api/outbound-sanitize.ts"() {
138
+ "use strict";
139
+ init_errors();
140
+ KEY_RENAMES = {
141
+ kernel: "browserRuntime",
142
+ kernel_session_id: "browser_session_id",
143
+ kernel_delete_started: "session_cleanup_started",
144
+ kernel_delete_succeeded: "session_cleanup_succeeded",
145
+ kernel_delete_error: "session_cleanup_error",
146
+ kernelSessionId: "browserSessionId",
147
+ kernelDeleteStarted: "sessionCleanupStarted",
148
+ kernelDeleteSucceeded: "sessionCleanupSucceeded",
149
+ kernelDeleteError: "sessionCleanupError",
150
+ kernelProxyId: "proxyId"
151
+ };
152
+ SANITIZED_VALUE_KEYS = /error|message/i;
153
+ }
154
+ });
155
+
53
156
  // src/blog/registry.ts
54
157
  var posts;
55
158
  var init_registry = __esm({
@@ -3425,7 +3528,7 @@ var init_url_utils = __esm({
3425
3528
 
3426
3529
  // src/api/kernel-fetch.ts
3427
3530
  async function fetchWithKernel(url) {
3428
- const apiKey = process.env.KERNEL_API_KEY;
3531
+ const apiKey = browserServiceApiKey();
3429
3532
  if (!apiKey) throw new Error("Browser backend API key not set");
3430
3533
  const client = new import_sdk.default({ apiKey });
3431
3534
  const kb = await client.browsers.create({ stealth: true, timeout_seconds: 60 });
@@ -3450,6 +3553,7 @@ var init_kernel_fetch = __esm({
3450
3553
  "src/api/kernel-fetch.ts"() {
3451
3554
  "use strict";
3452
3555
  import_sdk = __toESM(require("@onkernel/sdk"), 1);
3556
+ init_browser_service_env();
3453
3557
  import_playwright = require("playwright");
3454
3558
  }
3455
3559
  });
@@ -3468,9 +3572,9 @@ async function extractKpo(opts) {
3468
3572
  redirect: "manual"
3469
3573
  });
3470
3574
  if (res.status >= 300 && res.status < 400) {
3471
- const location = res.headers.get("location");
3472
- if (!location) return null;
3473
- const next = new URL(location, target).href;
3575
+ const location2 = res.headers.get("location");
3576
+ if (!location2) return null;
3577
+ const next = new URL(location2, target).href;
3474
3578
  const checkedRedirect = await validatePublicHttpUrl(next, { field: "redirect URL" });
3475
3579
  if (checkedRedirect.error || !checkedRedirect.parsed) return null;
3476
3580
  target = checkedRedirect.parsed.href;
@@ -8216,6 +8320,7 @@ var init_rates = __esm({
8216
8320
  yt_channel: 50,
8217
8321
  yt_transcription: 200,
8218
8322
  fb_ad: 50,
8323
+ maps_search: 2e3,
8219
8324
  maps_place: 2e3,
8220
8325
  maps_review: 50,
8221
8326
  fb_search: 50,
@@ -8277,6 +8382,14 @@ var init_rates = __esm({
8277
8382
  credits: mcToCredits(MC_COSTS.fb_ad),
8278
8383
  unit: "per call"
8279
8384
  },
8385
+ {
8386
+ key: "maps_search",
8387
+ label: "Maps business search",
8388
+ aliases: ["maps_search", "google maps search", "gmb search", "gbp search", "business profiles"],
8389
+ credits: mcToCredits(MC_COSTS.maps_search),
8390
+ unit: "per search",
8391
+ notes: "Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses."
8392
+ },
8280
8393
  {
8281
8394
  key: "maps_place",
8282
8395
  label: "Maps business lookup",
@@ -8338,6 +8451,7 @@ var init_rates = __esm({
8338
8451
  TRANSCRIPTION_REFUND: "transcription_refund",
8339
8452
  YT_CHANNEL: "yt_channel",
8340
8453
  FB_AD: "fb_ad",
8454
+ MAPS_SEARCH: "maps_search",
8341
8455
  MAPS_PLACE: "maps_place",
8342
8456
  MAPS_REVIEW: "maps_review",
8343
8457
  MAPS_REVIEW_REFUND: "maps_review_refund",
@@ -8484,40 +8598,6 @@ var init_selectors = __esm({
8484
8598
  }
8485
8599
  });
8486
8600
 
8487
- // src/errors.ts
8488
- function sanitizeVendorName(message) {
8489
- return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
8490
- }
8491
- var RECAPTCHA_INSTRUCTIONS, CaptchaError, ExtractionError, RequestAbortedError;
8492
- var init_errors = __esm({
8493
- "src/errors.ts"() {
8494
- "use strict";
8495
- RECAPTCHA_INSTRUCTIONS = "Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.";
8496
- CaptchaError = class extends Error {
8497
- constructor(instructions) {
8498
- super(`CAPTCHA detected. ${instructions}`);
8499
- this.instructions = instructions;
8500
- }
8501
- instructions;
8502
- name = "CaptchaError";
8503
- };
8504
- ExtractionError = class extends Error {
8505
- constructor(message, cause) {
8506
- super(message);
8507
- this.cause = cause;
8508
- }
8509
- cause;
8510
- name = "ExtractionError";
8511
- };
8512
- RequestAbortedError = class extends Error {
8513
- name = "RequestAbortedError";
8514
- constructor(message = "Request aborted before harvest completed") {
8515
- super(message);
8516
- }
8517
- };
8518
- }
8519
- });
8520
-
8521
8601
  // src/driver/BrowserDriver.ts
8522
8602
  function positiveIntFromEnv(name, fallback) {
8523
8603
  const raw = process.env[name];
@@ -9499,7 +9579,7 @@ async function writeOutputs(result, outputDir) {
9499
9579
  }
9500
9580
  }
9501
9581
  async function ytHarvest(rawOptions) {
9502
- const kernelApiKey = process.env.KERNEL_API_KEY;
9582
+ const kernelApiKey = browserServiceApiKey();
9503
9583
  if (!kernelApiKey) {
9504
9584
  throw new Error("A browser backend API key is required \u2014 YouTube harvesting requires a stealth session.");
9505
9585
  }
@@ -9533,6 +9613,7 @@ var init_youtube_harvest = __esm({
9533
9613
  "src/youtube/youtube-harvest.ts"() {
9534
9614
  "use strict";
9535
9615
  import_node_fs2 = require("fs");
9616
+ init_browser_service_env();
9536
9617
  import_node_path4 = __toESM(require("path"), 1);
9537
9618
  import_papaparse = __toESM(require("papaparse"), 1);
9538
9619
  init_schemas2();
@@ -9610,7 +9691,7 @@ function parseTimedtextXml(xml) {
9610
9691
  return results;
9611
9692
  }
9612
9693
  async function fetchViaKernelInnertube(videoId) {
9613
- const kernelApiKey = process.env.KERNEL_API_KEY;
9694
+ const kernelApiKey = browserServiceApiKey();
9614
9695
  if (!kernelApiKey) return null;
9615
9696
  const driver = new BrowserDriver();
9616
9697
  const start = Date.now();
@@ -9753,7 +9834,7 @@ async function attemptKernelWhisper(videoId, kernelApiKey, falKey, start) {
9753
9834
  }
9754
9835
  }
9755
9836
  async function fetchViaKernelWhisper(videoId) {
9756
- const kernelApiKey = process.env.KERNEL_API_KEY;
9837
+ const kernelApiKey = browserServiceApiKey();
9757
9838
  const falKey = process.env.FAL_KEY;
9758
9839
  if (!kernelApiKey || !falKey) return null;
9759
9840
  const start = Date.now();
@@ -9793,6 +9874,7 @@ var init_CaptionFetcher = __esm({
9793
9874
  "src/youtube/CaptionFetcher.ts"() {
9794
9875
  "use strict";
9795
9876
  init_BrowserDriver();
9877
+ init_browser_service_env();
9796
9878
  import_client2 = require("@fal-ai/client");
9797
9879
  WHISPER_RECORD_SECONDS = 90;
9798
9880
  }
@@ -10034,6 +10116,7 @@ var init_screenshot_routes = __esm({
10034
10116
  "src/api/screenshot-routes.ts"() {
10035
10117
  "use strict";
10036
10118
  import_hono3 = require("hono");
10119
+ init_browser_service_env();
10037
10120
  import_zod14 = require("zod");
10038
10121
  init_screenshot();
10039
10122
  init_api_auth();
@@ -10068,7 +10151,7 @@ var init_screenshot_routes = __esm({
10068
10151
  }
10069
10152
  const device2 = body.device === "mobile" ? "mobile" : "desktop";
10070
10153
  try {
10071
- const buf = await captureScreenshot(parsedFallback.href, process.env.KERNEL_API_KEY?.trim(), device2);
10154
+ const buf = await captureScreenshot(parsedFallback.href, browserServiceApiKey(), device2);
10072
10155
  return new Response(new Uint8Array(buf), {
10073
10156
  status: 200,
10074
10157
  headers: {
@@ -10084,7 +10167,7 @@ var init_screenshot_routes = __esm({
10084
10167
  }
10085
10168
  const device = body.device === "mobile" ? "mobile" : "desktop";
10086
10169
  try {
10087
- const buf = await captureScreenshot(urlCheck.parsed.href, process.env.KERNEL_API_KEY?.trim(), device);
10170
+ const buf = await captureScreenshot(urlCheck.parsed.href, browserServiceApiKey(), device);
10088
10171
  return new Response(new Uint8Array(buf), {
10089
10172
  status: 200,
10090
10173
  headers: {
@@ -11034,9 +11117,9 @@ function proxyName(country, state, city) {
11034
11117
  function zipProxyName(zip) {
11035
11118
  return `mcp-serp-residential-us-zip-${zip}`;
11036
11119
  }
11037
- function parseKernelLocationProxyTarget(location, gl) {
11038
- if (!location || gl.toLowerCase() !== "us") return null;
11039
- const canonicalLocation = normalizeLocation(location);
11120
+ function parseKernelLocationProxyTarget(location2, gl) {
11121
+ if (!location2 || gl.toLowerCase() !== "us") return null;
11122
+ const canonicalLocation = normalizeLocation(location2);
11040
11123
  let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
11041
11124
  if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
11042
11125
  parts = parts.slice(0, -1);
@@ -11369,29 +11452,30 @@ function buildPageIntelUrl(body, country) {
11369
11452
  return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
11370
11453
  }
11371
11454
  function kernelLaunchOpts() {
11372
- return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
11455
+ return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: browserServiceProxyId(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
11373
11456
  }
11374
11457
  async function kernelLaunchOptsResidential() {
11375
- let proxyId = process.env.KERNEL_PROXY_ID?.trim();
11458
+ let proxyId = browserServiceProxyId();
11376
11459
  try {
11377
11460
  const resolution2 = await resolveKernelProxyId({
11378
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
11461
+ kernelApiKey: browserServiceApiKey(),
11379
11462
  proxyMode: "location",
11380
- configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
11463
+ configuredKernelProxyId: browserServiceProxyId(),
11381
11464
  location: "New York, NY",
11382
11465
  gl: "us"
11383
11466
  });
11384
11467
  if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
11385
11468
  } catch {
11386
- proxyId = process.env.KERNEL_PROXY_ID?.trim();
11469
+ proxyId = browserServiceProxyId();
11387
11470
  }
11388
- return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
11471
+ return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
11389
11472
  }
11390
11473
  var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
11391
11474
  var init_facebook_ad_routes = __esm({
11392
11475
  "src/api/facebook-ad-routes.ts"() {
11393
11476
  "use strict";
11394
11477
  import_hono4 = require("hono");
11478
+ init_browser_service_env();
11395
11479
  import_zod15 = require("zod");
11396
11480
  init_db();
11397
11481
  init_rates();
@@ -11687,7 +11771,7 @@ var init_facebook_ad_routes = __esm({
11687
11771
  });
11688
11772
 
11689
11773
  // src/schemas.ts
11690
- var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
11774
+ var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, MapsSearchOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
11691
11775
  var init_schemas3 = __esm({
11692
11776
  "src/schemas.ts"() {
11693
11777
  "use strict";
@@ -11725,6 +11809,16 @@ var init_schemas3 = __esm({
11725
11809
  kernelProxyId: import_zod16.z.string().optional(),
11726
11810
  headless: import_zod16.z.boolean().default(true)
11727
11811
  });
11812
+ MapsSearchOptionsSchema = import_zod16.z.object({
11813
+ query: import_zod16.z.string().min(1),
11814
+ location: import_zod16.z.string().optional(),
11815
+ gl: import_zod16.z.string().length(2).default("us"),
11816
+ hl: import_zod16.z.string().length(2).default("en"),
11817
+ maxResults: import_zod16.z.number().int().min(1).max(50).default(10),
11818
+ kernelApiKey: import_zod16.z.string().optional(),
11819
+ kernelProxyId: import_zod16.z.string().optional(),
11820
+ headless: import_zod16.z.boolean().default(true)
11821
+ });
11728
11822
  RawPAAItemSchema = import_zod16.z.object({
11729
11823
  question: import_zod16.z.string().min(1),
11730
11824
  answer: import_zod16.z.string().optional(),
@@ -11785,8 +11879,8 @@ var init_MapsNavigator = __esm({
11785
11879
  this.page = page;
11786
11880
  }
11787
11881
  page;
11788
- async navigateToPlacePage(businessName, location) {
11789
- const query = `${businessName} ${location}`;
11882
+ async navigateToPlacePage(businessName, location2) {
11883
+ const query = `${businessName} ${location2}`;
11790
11884
  const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(query)}`;
11791
11885
  await this.page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 45e3 });
11792
11886
  const onPlacePage = await this.page.evaluate(() => /\/maps\/place\//.test(window.location.href));
@@ -12230,7 +12324,172 @@ var init_MapsExtractor = __esm({
12230
12324
  }
12231
12325
  });
12232
12326
 
12327
+ // src/extractor/MapsSearchExtractor.ts
12328
+ var MAPS_SEARCH_SCROLL_BUDGET_MS, MAPS_SEARCH_SCROLL_STEP_MS, MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS, MapsSearchExtractor;
12329
+ var init_MapsSearchExtractor = __esm({
12330
+ "src/extractor/MapsSearchExtractor.ts"() {
12331
+ "use strict";
12332
+ init_errors();
12333
+ MAPS_SEARCH_SCROLL_BUDGET_MS = 6e4;
12334
+ MAPS_SEARCH_SCROLL_STEP_MS = 1200;
12335
+ MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS = 4;
12336
+ MapsSearchExtractor = class {
12337
+ constructor(driver) {
12338
+ this.driver = driver;
12339
+ }
12340
+ driver;
12341
+ async extract(options) {
12342
+ const startMs = Date.now();
12343
+ const searchQuery = [options.query, options.location].filter(Boolean).join(" ");
12344
+ const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(searchQuery)}?hl=${encodeURIComponent(options.hl)}`;
12345
+ const config = {
12346
+ headless: options.headless,
12347
+ kernelApiKey: options.kernelApiKey,
12348
+ kernelProxyId: options.kernelProxyId,
12349
+ viewport: { width: 1280, height: 900 },
12350
+ locale: `${options.hl}-${options.gl.toUpperCase()}`
12351
+ };
12352
+ try {
12353
+ await this.driver.launch(config);
12354
+ const page = this.driver.getPage();
12355
+ await page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
12356
+ await page.waitForTimeout(3e3);
12357
+ const blocked = await this.detectBlock(page);
12358
+ if (blocked) throw new CaptchaError(RECAPTCHA_INSTRUCTIONS);
12359
+ const results = await this.collectResults(page, options.maxResults);
12360
+ return {
12361
+ query: options.query,
12362
+ location: options.location ?? null,
12363
+ searchQuery,
12364
+ searchUrl,
12365
+ extractedAt: (/* @__PURE__ */ new Date()).toISOString(),
12366
+ requestedMaxResults: options.maxResults,
12367
+ resultCount: results.length,
12368
+ results,
12369
+ durationMs: Date.now() - startMs
12370
+ };
12371
+ } finally {
12372
+ await this.driver.close();
12373
+ }
12374
+ }
12375
+ async detectBlock(page) {
12376
+ return page.evaluate(() => {
12377
+ const text = document.body.innerText.slice(0, 2e3);
12378
+ return /unusual traffic|captcha|recaptcha|about this page/i.test(text) || /\/sorry\//.test(location.href);
12379
+ });
12380
+ }
12381
+ async collectResults(page, maxResults) {
12382
+ const seen = /* @__PURE__ */ new Map();
12383
+ const started = Date.now();
12384
+ let noGrowthRounds = 0;
12385
+ while (Date.now() - started < MAPS_SEARCH_SCROLL_BUDGET_MS) {
12386
+ const before = seen.size;
12387
+ const batch = await this.extractVisibleResults(page);
12388
+ for (const result of batch) {
12389
+ const key = this.resultKey(result);
12390
+ if (!seen.has(key)) seen.set(key, { ...result, position: seen.size + 1 });
12391
+ if (seen.size >= maxResults) break;
12392
+ }
12393
+ if (seen.size >= maxResults) break;
12394
+ if (seen.size === before) noGrowthRounds += 1;
12395
+ else noGrowthRounds = 0;
12396
+ if (noGrowthRounds >= MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS) break;
12397
+ await page.evaluate(() => {
12398
+ const feed = document.querySelector('[role="feed"]');
12399
+ if (feed) {
12400
+ feed.scrollTop = feed.scrollHeight;
12401
+ } else {
12402
+ window.scrollTo(0, document.body.scrollHeight);
12403
+ }
12404
+ });
12405
+ await page.waitForTimeout(MAPS_SEARCH_SCROLL_STEP_MS);
12406
+ }
12407
+ return [...seen.values()].slice(0, maxResults);
12408
+ }
12409
+ resultKey(result) {
12410
+ return result.cidDecimal ?? result.placeUrl.replace(/[?&].*$/, "") ?? result.name;
12411
+ }
12412
+ async extractVisibleResults(page) {
12413
+ return page.evaluate(() => {
12414
+ function normalizeText(value) {
12415
+ const text = value?.replace(/\s+/g, " ").trim() ?? "";
12416
+ return text || null;
12417
+ }
12418
+ function cidFromUrl(url) {
12419
+ const fid = url.match(/!1s(0x[0-9a-f]+):(0x[0-9a-f]+)/i);
12420
+ if (!fid) return { cid: null, cidDecimal: null };
12421
+ let cidDecimal = null;
12422
+ try {
12423
+ cidDecimal = BigInt(fid[2]).toString();
12424
+ } catch {
12425
+ }
12426
+ return { cid: `${fid[1]}:${fid[2]}`, cidDecimal };
12427
+ }
12428
+ function textParts(card) {
12429
+ if (!card) return [];
12430
+ const parts = [];
12431
+ card.querySelectorAll("div, span").forEach((el2) => {
12432
+ const text = Array.from(el2.childNodes).filter((node) => node.nodeType === 3).map((node) => node.textContent?.trim() ?? "").filter((text2) => text2.length > 1 && text2.length < 140).join(" ");
12433
+ if (text && !parts.includes(text)) parts.push(text);
12434
+ });
12435
+ return parts;
12436
+ }
12437
+ function firstMatching(parts, pattern) {
12438
+ const value = parts.find((part) => pattern.test(part));
12439
+ return value ?? null;
12440
+ }
12441
+ const out = [];
12442
+ const seen = /* @__PURE__ */ new Set();
12443
+ const anchors = Array.from(document.querySelectorAll('a[href*="/maps/place/"]'));
12444
+ for (const anchor of anchors) {
12445
+ const placeUrl = anchor.href;
12446
+ const stableUrl = placeUrl.replace(/[?&].*$/, "");
12447
+ if (seen.has(stableUrl)) continue;
12448
+ seen.add(stableUrl);
12449
+ const card = anchor.closest('.Nv2PK, [role="article"], .bfdHYd') ?? anchor.parentElement;
12450
+ const parts = textParts(card);
12451
+ const aria = normalizeText(anchor.getAttribute("aria-label"));
12452
+ const heading = normalizeText(card?.querySelector('.qBF1Pd, .fontHeadlineSmall, [role="heading"]')?.textContent);
12453
+ const name = aria ?? heading ?? parts[0] ?? stableUrl;
12454
+ const links = Array.from(card?.querySelectorAll("a[href]") ?? []);
12455
+ const websiteUrl = links.find((link) => link.href.startsWith("http") && !link.href.includes("google."))?.href ?? null;
12456
+ const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? null;
12457
+ const rating = firstMatching(parts, /^\d(?:\.\d)?$/);
12458
+ const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?$/);
12459
+ const category = parts.find((part) => !/^\d(?:\.\d)?$|^\(?[\d,]+\)?$|open|closed|directions|website/i.test(part)) ?? null;
12460
+ const address = parts.find((part) => /\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part)) ?? null;
12461
+ const { cid, cidDecimal } = cidFromUrl(placeUrl);
12462
+ out.push({
12463
+ position: out.length + 1,
12464
+ name,
12465
+ placeUrl,
12466
+ cid,
12467
+ cidDecimal,
12468
+ rating,
12469
+ reviewCount: reviewCountRaw ? reviewCountRaw.replace(/[()]/g, "") : null,
12470
+ category,
12471
+ address,
12472
+ websiteUrl,
12473
+ directionsUrl,
12474
+ metadata: parts.slice(0, 20)
12475
+ });
12476
+ }
12477
+ return out;
12478
+ });
12479
+ }
12480
+ };
12481
+ }
12482
+ });
12483
+
12233
12484
  // src/api/maps-routes.ts
12485
+ function mapsErrorResponse(c, msg, errorCode) {
12486
+ const blocked = msg.includes("CAPTCHA") || msg.includes("blocked");
12487
+ return c.json({
12488
+ error: sanitizeVendorName(msg),
12489
+ error_code: blocked ? "captcha_or_blocked" : errorCode,
12490
+ retryable: blocked
12491
+ }, blocked ? 503 : 500);
12492
+ }
12234
12493
  var import_hono5, mapsApp;
12235
12494
  var init_maps_routes = __esm({
12236
12495
  "src/api/maps-routes.ts"() {
@@ -12239,10 +12498,59 @@ var init_maps_routes = __esm({
12239
12498
  init_db();
12240
12499
  init_rates();
12241
12500
  init_MapsExtractor();
12501
+ init_MapsSearchExtractor();
12242
12502
  init_BrowserDriver();
12243
12503
  init_schemas3();
12244
12504
  init_api_auth();
12505
+ init_errors();
12245
12506
  mapsApp = new import_hono5.Hono();
12507
+ mapsApp.post("/search", createApiKeyAuth(), async (c) => {
12508
+ const user = c.get("user");
12509
+ const body = await c.req.json().catch(() => ({}));
12510
+ const parsed = MapsSearchOptionsSchema.safeParse({
12511
+ kernelApiKey: process.env.KERNEL_API_KEY,
12512
+ ...body
12513
+ });
12514
+ if (!parsed.success) {
12515
+ return c.json({ error: parsed.error.issues[0]?.message ?? "Invalid request" }, 400);
12516
+ }
12517
+ const { ok, balance_mc } = await debitMc(
12518
+ user.id,
12519
+ MC_COSTS.maps_search,
12520
+ LedgerOperation.MAPS_SEARCH,
12521
+ [parsed.data.query, parsed.data.location].filter(Boolean).join(" ")
12522
+ );
12523
+ if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.maps_search), 402);
12524
+ const driver = new BrowserDriver();
12525
+ const extractor = new MapsSearchExtractor(driver);
12526
+ try {
12527
+ const result = await extractor.extract(parsed.data);
12528
+ await logRequestEvent({
12529
+ userId: user.id,
12530
+ source: "maps_search",
12531
+ status: "done",
12532
+ query: result.searchQuery,
12533
+ location: parsed.data.location,
12534
+ resultCount: result.resultCount,
12535
+ result
12536
+ });
12537
+ return c.json(result);
12538
+ } catch (err) {
12539
+ await creditMc(user.id, MC_COSTS.maps_search, LedgerOperation.REFUND, "failed maps_search call");
12540
+ const msg = err instanceof Error ? err.message : String(err);
12541
+ await logRequestEvent({
12542
+ userId: user.id,
12543
+ source: "maps_search",
12544
+ status: "failed",
12545
+ query: [parsed.data.query, parsed.data.location].filter(Boolean).join(" "),
12546
+ location: parsed.data.location,
12547
+ error: msg
12548
+ });
12549
+ return mapsErrorResponse(c, msg, "maps_search_failed");
12550
+ } finally {
12551
+ await driver.close();
12552
+ }
12553
+ });
12246
12554
  mapsApp.post("/place", createApiKeyAuth(), async (c) => {
12247
12555
  const user = c.get("user");
12248
12556
  const body = await c.req.json().catch(() => ({}));
@@ -12309,10 +12617,7 @@ var init_maps_routes = __esm({
12309
12617
  location: parsed.data.location,
12310
12618
  error: msg
12311
12619
  });
12312
- if (msg.includes("CAPTCHA") || msg.includes("blocked")) {
12313
- return c.json({ error: msg }, 503);
12314
- }
12315
- return c.json({ error: msg }, 500);
12620
+ return mapsErrorResponse(c, msg, "maps_place_failed");
12316
12621
  } finally {
12317
12622
  await driver.close();
12318
12623
  }
@@ -12670,8 +12975,19 @@ function addCandidate(candidates, city, region, example) {
12670
12975
  }
12671
12976
  candidates.set(key, { city: normalizedCity, regionCode, count: 1, examples: [example] });
12672
12977
  }
12978
+ function decodeSerpText(text) {
12979
+ try {
12980
+ return decodeURIComponent(text);
12981
+ } catch {
12982
+ }
12983
+ try {
12984
+ return decodeURIComponent(text.replace(/%(?![0-9a-fA-F]{2})/g, "%25"));
12985
+ } catch {
12986
+ return text;
12987
+ }
12988
+ }
12673
12989
  function scanText(candidates, text) {
12674
- const normalized = decodeURIComponent(text).replace(/[+/|_-]+/g, " ");
12990
+ const normalized = decodeSerpText(text).replace(/[+/|_-]+/g, " ");
12675
12991
  for (const match of normalized.matchAll(CITY_STATE_RE)) {
12676
12992
  addCandidate(candidates, match[1] ?? "", match[2] ?? "", normalized.slice(0, 180));
12677
12993
  }
@@ -14079,8 +14395,8 @@ async function harvest(rawOptions) {
14079
14395
  const onAttemptEvent = getAttemptLogSink(rawOptions);
14080
14396
  const requestedProxyMode = raw.proxyMode;
14081
14397
  const proxyMode = requestedProxyMode === "none" ? "none" : requestedProxyMode === "configured" ? "configured" : "location";
14082
- const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() : process.env.KERNEL_API_KEY?.trim();
14083
- const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() : process.env.KERNEL_PROXY_ID?.trim();
14398
+ const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() : browserServiceApiKey();
14399
+ const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() : browserServiceProxyId();
14084
14400
  const proxyOpts = {
14085
14401
  kernelApiKey,
14086
14402
  proxyMode,
@@ -14267,6 +14583,7 @@ var init_harvest = __esm({
14267
14583
  "src/harvest.ts"() {
14268
14584
  "use strict";
14269
14585
  init_schemas3();
14586
+ init_browser_service_env();
14270
14587
  init_BrowserDriver();
14271
14588
  init_PAAExtractor();
14272
14589
  init_OutputSerializer();
@@ -14691,8 +15008,8 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
14691
15008
  debug,
14692
15009
  serpOnly: true,
14693
15010
  headless: runtimeOptions.headless ?? true,
14694
- kernelApiKey: runtimeOptions.kernelApiKey ?? process.env.KERNEL_API_KEY?.trim(),
14695
- kernelProxyId: runtimeOptions.kernelProxyId ?? process.env.KERNEL_PROXY_ID?.trim(),
15011
+ kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
15012
+ kernelProxyId: runtimeOptions.kernelProxyId ?? browserServiceProxyId(),
14696
15013
  format: "json",
14697
15014
  outputDir: runtimeOptions.outputDir ?? "/tmp/serp-intelligence-output",
14698
15015
  signal: runtimeOptions.signal,
@@ -14703,7 +15020,7 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
14703
15020
  const pageSnapshotLimit = normalizePageSnapshotLimit(parsedInput);
14704
15021
  const pageSnapshotTargets = collectPageSnapshotTargets(harvestResult, pageSnapshotLimit);
14705
15022
  const pageSnapshotArtifacts = pageSnapshotTargets.length > 0 ? (await capturePageSnapshotsFn(pageSnapshotTargets, {
14706
- kernelApiKey: runtimeOptions.kernelApiKey ?? process.env.KERNEL_API_KEY?.trim(),
15023
+ kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
14707
15024
  timeoutMs: runtimeOptions.pageSnapshotTimeoutMs,
14708
15025
  maxConcurrency: runtimeOptions.pageSnapshotMaxConcurrency,
14709
15026
  debug,
@@ -14725,6 +15042,7 @@ var init_serp_capture_service = __esm({
14725
15042
  "src/serp-intelligence/serp-capture-service.ts"() {
14726
15043
  "use strict";
14727
15044
  init_harvest();
15045
+ init_browser_service_env();
14728
15046
  init_harvest_problems();
14729
15047
  init_page_snapshot_extractor();
14730
15048
  init_schemas4();
@@ -14829,6 +15147,7 @@ var init_serp_intelligence_routes = __esm({
14829
15147
  "src/api/serp-intelligence-routes.ts"() {
14830
15148
  "use strict";
14831
15149
  import_hono6 = require("hono");
15150
+ init_browser_service_env();
14832
15151
  init_page_snapshot_extractor();
14833
15152
  init_serp_capture_service();
14834
15153
  init_schemas4();
@@ -14861,8 +15180,8 @@ var init_serp_intelligence_routes = __esm({
14861
15180
  if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
14862
15181
  try {
14863
15182
  const result = await captureSerpIntelligenceSnapshot(parsed.data, {
14864
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
14865
- kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
15183
+ kernelApiKey: browserServiceApiKey(),
15184
+ kernelProxyId: browserServiceProxyId(),
14866
15185
  signal: c.req.raw.signal,
14867
15186
  billing: { creditsUsed: cost / 1e3 }
14868
15187
  });
@@ -14917,7 +15236,7 @@ var init_serp_intelligence_routes = __esm({
14917
15236
  if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
14918
15237
  try {
14919
15238
  const result = await capturePageSnapshots(targets, {
14920
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
15239
+ kernelApiKey: browserServiceApiKey(),
14921
15240
  timeoutMs: parsed.data.timeoutMs,
14922
15241
  maxConcurrency: parsed.data.maxConcurrency,
14923
15242
  debug: parsed.data.debug
@@ -14952,8 +15271,17 @@ var init_serp_intelligence_routes = __esm({
14952
15271
  }
14953
15272
  });
14954
15273
 
15274
+ // src/version.ts
15275
+ var PACKAGE_VERSION;
15276
+ var init_version = __esm({
15277
+ "src/version.ts"() {
15278
+ "use strict";
15279
+ PACKAGE_VERSION = "0.1.8";
15280
+ }
15281
+ });
15282
+
14955
15283
  // src/mcp/mcp-tool-schemas.ts
14956
- var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
15284
+ var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, MapsSearchInputSchema, NullableString, MapsSearchOutputSchema, OrganicResultOutput, AiOverviewOutput, EntityIdsOutput, HarvestPaaOutputSchema, SearchSerpOutputSchema, ExtractUrlOutputSchema, ExtractSiteOutputSchema, MapsPlaceIntelOutputSchema, CreditsInfoOutputSchema, MapSiteUrlsOutputSchema, YoutubeHarvestOutputSchema, FacebookAdSearchOutputSchema, FacebookPageIntelOutputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
14957
15285
  var init_mcp_tool_schemas = __esm({
14958
15286
  "src/mcp/mcp-tool-schemas.ts"() {
14959
15287
  "use strict";
@@ -15018,6 +15346,207 @@ var init_mcp_tool_schemas = __esm({
15018
15346
  includeReviews: import_zod19.z.boolean().default(false).describe("Whether to fetch individual review cards"),
15019
15347
  maxReviews: import_zod19.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
15020
15348
  };
15349
+ MapsSearchInputSchema = {
15350
+ query: import_zod19.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
15351
+ location: import_zod19.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
15352
+ gl: import_zod19.z.string().length(2).default("us").describe("Google country code inferred from location."),
15353
+ hl: import_zod19.z.string().length(2).default("en").describe("Language inferred from user request."),
15354
+ maxResults: import_zod19.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
15355
+ };
15356
+ NullableString = import_zod19.z.string().nullable();
15357
+ MapsSearchOutputSchema = {
15358
+ query: import_zod19.z.string(),
15359
+ location: import_zod19.z.string().nullable(),
15360
+ searchQuery: import_zod19.z.string(),
15361
+ searchUrl: import_zod19.z.string().url(),
15362
+ extractedAt: import_zod19.z.string(),
15363
+ requestedMaxResults: import_zod19.z.number().int().min(1).max(50),
15364
+ resultCount: import_zod19.z.number().int().min(0).max(50),
15365
+ results: import_zod19.z.array(import_zod19.z.object({
15366
+ position: import_zod19.z.number().int().min(1),
15367
+ name: import_zod19.z.string(),
15368
+ placeUrl: import_zod19.z.string().url(),
15369
+ cid: NullableString,
15370
+ cidDecimal: NullableString,
15371
+ rating: NullableString,
15372
+ reviewCount: NullableString,
15373
+ category: NullableString,
15374
+ address: NullableString,
15375
+ websiteUrl: NullableString,
15376
+ directionsUrl: NullableString,
15377
+ metadata: import_zod19.z.array(import_zod19.z.string())
15378
+ })),
15379
+ durationMs: import_zod19.z.number().int().min(0)
15380
+ };
15381
+ OrganicResultOutput = import_zod19.z.object({
15382
+ position: import_zod19.z.number().int(),
15383
+ title: import_zod19.z.string(),
15384
+ url: import_zod19.z.string(),
15385
+ domain: import_zod19.z.string(),
15386
+ snippet: NullableString
15387
+ });
15388
+ AiOverviewOutput = import_zod19.z.object({
15389
+ detected: import_zod19.z.boolean(),
15390
+ text: NullableString
15391
+ }).nullable();
15392
+ EntityIdsOutput = import_zod19.z.object({
15393
+ kgIds: import_zod19.z.array(import_zod19.z.string()),
15394
+ cids: import_zod19.z.array(import_zod19.z.string()),
15395
+ gcids: import_zod19.z.array(import_zod19.z.string())
15396
+ }).nullable();
15397
+ HarvestPaaOutputSchema = {
15398
+ query: import_zod19.z.string(),
15399
+ location: NullableString,
15400
+ questionCount: import_zod19.z.number().int().min(0),
15401
+ completionStatus: NullableString,
15402
+ questions: import_zod19.z.array(import_zod19.z.object({
15403
+ question: import_zod19.z.string(),
15404
+ answer: NullableString,
15405
+ sourceTitle: NullableString,
15406
+ sourceSite: NullableString
15407
+ })),
15408
+ organicResults: import_zod19.z.array(OrganicResultOutput),
15409
+ aiOverview: AiOverviewOutput,
15410
+ entityIds: EntityIdsOutput,
15411
+ durationMs: import_zod19.z.number().min(0).nullable()
15412
+ };
15413
+ SearchSerpOutputSchema = {
15414
+ query: import_zod19.z.string(),
15415
+ location: NullableString,
15416
+ organicResults: import_zod19.z.array(OrganicResultOutput),
15417
+ localPack: import_zod19.z.array(import_zod19.z.object({
15418
+ position: import_zod19.z.number().int(),
15419
+ name: import_zod19.z.string(),
15420
+ rating: NullableString,
15421
+ reviewCount: NullableString,
15422
+ websiteUrl: NullableString
15423
+ })),
15424
+ aiOverview: AiOverviewOutput,
15425
+ entityIds: EntityIdsOutput
15426
+ };
15427
+ ExtractUrlOutputSchema = {
15428
+ url: import_zod19.z.string(),
15429
+ title: NullableString,
15430
+ headings: import_zod19.z.array(import_zod19.z.object({
15431
+ level: import_zod19.z.number().int(),
15432
+ text: import_zod19.z.string()
15433
+ })),
15434
+ schemaBlockCount: import_zod19.z.number().int().min(0),
15435
+ entityName: NullableString,
15436
+ entityTypes: import_zod19.z.array(import_zod19.z.string()),
15437
+ napScore: import_zod19.z.number().nullable(),
15438
+ missingSchemaFields: import_zod19.z.array(import_zod19.z.string()),
15439
+ screenshotSaved: NullableString
15440
+ };
15441
+ ExtractSiteOutputSchema = {
15442
+ url: import_zod19.z.string(),
15443
+ pageCount: import_zod19.z.number().int().min(0),
15444
+ pages: import_zod19.z.array(import_zod19.z.object({
15445
+ url: import_zod19.z.string(),
15446
+ title: NullableString,
15447
+ schemaTypes: import_zod19.z.array(import_zod19.z.string())
15448
+ })),
15449
+ durationMs: import_zod19.z.number().min(0)
15450
+ };
15451
+ MapsPlaceIntelOutputSchema = {
15452
+ name: import_zod19.z.string(),
15453
+ rating: NullableString,
15454
+ reviewCount: NullableString,
15455
+ category: NullableString,
15456
+ address: NullableString,
15457
+ phone: NullableString,
15458
+ website: NullableString,
15459
+ hoursSummary: NullableString,
15460
+ bookingUrl: NullableString,
15461
+ kgmid: NullableString,
15462
+ cidDecimal: NullableString,
15463
+ cidUrl: NullableString,
15464
+ lat: import_zod19.z.number().nullable(),
15465
+ lng: import_zod19.z.number().nullable(),
15466
+ reviewsStatus: import_zod19.z.string(),
15467
+ reviewsCollected: import_zod19.z.number().int().min(0),
15468
+ reviewTopics: import_zod19.z.array(import_zod19.z.object({
15469
+ label: import_zod19.z.string(),
15470
+ count: import_zod19.z.string()
15471
+ }))
15472
+ };
15473
+ CreditsInfoOutputSchema = {
15474
+ balanceCredits: import_zod19.z.number().nullable(),
15475
+ matchedCost: import_zod19.z.object({
15476
+ label: import_zod19.z.string(),
15477
+ credits: import_zod19.z.number(),
15478
+ unit: import_zod19.z.string(),
15479
+ notes: NullableString
15480
+ }).nullable(),
15481
+ costs: import_zod19.z.array(import_zod19.z.object({
15482
+ key: import_zod19.z.string(),
15483
+ label: import_zod19.z.string(),
15484
+ credits: import_zod19.z.number(),
15485
+ unit: import_zod19.z.string(),
15486
+ notes: NullableString
15487
+ })),
15488
+ ledger: import_zod19.z.array(import_zod19.z.object({
15489
+ createdAt: import_zod19.z.string(),
15490
+ operation: import_zod19.z.string(),
15491
+ credits: import_zod19.z.number(),
15492
+ description: NullableString
15493
+ }))
15494
+ };
15495
+ MapSiteUrlsOutputSchema = {
15496
+ startUrl: import_zod19.z.string(),
15497
+ totalFound: import_zod19.z.number().int().min(0),
15498
+ truncated: import_zod19.z.boolean(),
15499
+ okCount: import_zod19.z.number().int().min(0),
15500
+ redirectCount: import_zod19.z.number().int().min(0),
15501
+ brokenCount: import_zod19.z.number().int().min(0),
15502
+ urls: import_zod19.z.array(import_zod19.z.object({
15503
+ url: import_zod19.z.string(),
15504
+ status: import_zod19.z.number().int().nullable()
15505
+ })),
15506
+ durationMs: import_zod19.z.number().min(0)
15507
+ };
15508
+ YoutubeHarvestOutputSchema = {
15509
+ mode: import_zod19.z.string(),
15510
+ videoCount: import_zod19.z.number().int().min(0),
15511
+ channel: import_zod19.z.object({
15512
+ title: NullableString,
15513
+ subscriberCount: NullableString
15514
+ }).nullable(),
15515
+ videos: import_zod19.z.array(import_zod19.z.object({
15516
+ videoId: import_zod19.z.string(),
15517
+ title: import_zod19.z.string(),
15518
+ channelName: NullableString,
15519
+ views: NullableString,
15520
+ duration: NullableString,
15521
+ url: NullableString
15522
+ }))
15523
+ };
15524
+ FacebookAdSearchOutputSchema = {
15525
+ query: import_zod19.z.string(),
15526
+ advertiserCount: import_zod19.z.number().int().min(0),
15527
+ advertisers: import_zod19.z.array(import_zod19.z.object({
15528
+ name: NullableString,
15529
+ adCount: import_zod19.z.number().int().nullable(),
15530
+ libraryId: NullableString
15531
+ }))
15532
+ };
15533
+ FacebookPageIntelOutputSchema = {
15534
+ advertiserName: NullableString,
15535
+ totalAds: import_zod19.z.number().int().min(0),
15536
+ activeCount: import_zod19.z.number().int().min(0),
15537
+ videoCount: import_zod19.z.number().int().min(0),
15538
+ imageCount: import_zod19.z.number().int().min(0),
15539
+ ads: import_zod19.z.array(import_zod19.z.object({
15540
+ libraryId: NullableString,
15541
+ status: NullableString,
15542
+ creativeType: NullableString,
15543
+ headline: NullableString,
15544
+ cta: NullableString,
15545
+ startDate: NullableString,
15546
+ videoUrl: NullableString,
15547
+ variations: import_zod19.z.number().int().nullable()
15548
+ }))
15549
+ };
15021
15550
  CreditsInfoInputSchema = {
15022
15551
  item: import_zod19.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
15023
15552
  includeLedger: import_zod19.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
@@ -15066,6 +15595,14 @@ var init_mcp_tool_schemas = __esm({
15066
15595
  });
15067
15596
 
15068
15597
  // src/mcp/mcp-response-formatter.ts
15598
+ function configureReportSaving(enabled) {
15599
+ reportSavingEnabled = enabled;
15600
+ }
15601
+ function sanitizeVendorText(text) {
15602
+ return sanitizeVendorName(
15603
+ text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
15604
+ );
15605
+ }
15069
15606
  function slugifyReportName(input) {
15070
15607
  return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
15071
15608
  }
@@ -15077,7 +15614,7 @@ function outputBaseDir() {
15077
15614
  return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
15078
15615
  }
15079
15616
  function saveFullReport(full) {
15080
- if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
15617
+ if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
15081
15618
  const outDir = outputBaseDir();
15082
15619
  try {
15083
15620
  (0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
@@ -15090,7 +15627,7 @@ function saveFullReport(full) {
15090
15627
  }
15091
15628
  }
15092
15629
  function persistScreenshotLocally(base64, url) {
15093
- if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
15630
+ if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
15094
15631
  try {
15095
15632
  const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
15096
15633
  (0, import_node_fs4.mkdirSync)(dir, { recursive: true });
@@ -15130,11 +15667,11 @@ function parseData(raw) {
15130
15667
  const text = first?.type === "text" ? first.text : "";
15131
15668
  try {
15132
15669
  const parsed = JSON.parse(text || "{}");
15133
- if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
15670
+ if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
15134
15671
  const data = parsed.result ?? parsed;
15135
15672
  return { data };
15136
15673
  } catch {
15137
- if (raw.isError) return { error: text || "Tool error" };
15674
+ if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
15138
15675
  return { error: "Failed to parse tool response" };
15139
15676
  }
15140
15677
  }
@@ -15148,15 +15685,6 @@ function entityIdsSection(ids) {
15148
15685
  ## Entity IDs
15149
15686
  ${lines.join("\n")}` : "";
15150
15687
  }
15151
- function entityIdsSummaryLine(ids) {
15152
- if (!ids) return "";
15153
- const parts = [];
15154
- if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
15155
- if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
15156
- if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
15157
- return parts.length ? `
15158
- **Entity IDs:** ${parts.join(" \xB7 ")}` : "";
15159
- }
15160
15688
  function truncate(s, max) {
15161
15689
  if (!s) return "";
15162
15690
  return s.length > max ? s.slice(0, max) + "\u2026" : s;
@@ -15168,7 +15696,7 @@ function debugSection(debug) {
15168
15696
  if (!debug || typeof debug !== "object") return "";
15169
15697
  const request = debug.request ?? {};
15170
15698
  const browser = debug.browser ?? {};
15171
- const kernel = browser.kernel ?? {};
15699
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
15172
15700
  const network = browser.networkLocation ?? {};
15173
15701
  const nav = browser.serpNavigation ?? {};
15174
15702
  const proxyResolution = kernel.proxyResolution ?? {};
@@ -15186,7 +15714,7 @@ function debugSection(debug) {
15186
15714
  if (locationEvidence) {
15187
15715
  lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
15188
15716
  }
15189
- return lines.join("\n");
15717
+ return sanitizeVendorText(lines.join("\n"));
15190
15718
  }
15191
15719
  function errorAttemptsSection(body) {
15192
15720
  const attempts = Array.isArray(body.attempts) ? body.attempts : [];
@@ -15194,12 +15722,14 @@ function errorAttemptsSection(body) {
15194
15722
  const lines = attempts.slice(0, 5).map((attempt) => {
15195
15723
  const debug = attempt.debug ?? {};
15196
15724
  const browser = debug.browser ?? {};
15197
- const kernel = browser.kernel ?? {};
15725
+ const kernel = browser.browserRuntime ?? browser.kernel ?? {};
15198
15726
  const proxyResolution = kernel.proxyResolution ?? {};
15199
15727
  const network = browser.networkLocation ?? {};
15200
15728
  const nav = browser.serpNavigation ?? {};
15201
15729
  const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
15202
- return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${attempt.kernel_session_id ?? kernel.sessionId ?? "unknown"} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 deleted ${attempt.kernel_delete_succeeded === true ? "yes" : attempt.kernel_delete_succeeded === false ? "no" : "unknown"}`;
15730
+ const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
15731
+ const cleanupSucceeded2 = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
15732
+ return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded2 === true ? "yes" : cleanupSucceeded2 === false ? "no" : "unknown"}`;
15203
15733
  });
15204
15734
  return `
15205
15735
 
@@ -15240,27 +15770,37 @@ ${serpRows}` : "";
15240
15770
  const tips = `
15241
15771
  ---
15242
15772
  \u{1F4A1} **Tips**
15243
- - Max questions: \`maxQuestions: 150\` (current: ${input.maxQuestions ?? 30})
15773
+ - Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
15244
15774
  - Organic results only: use \`search_serp\`
15245
15775
  - Dig into a result: use \`extract_url\` on any organic URL`;
15246
15776
  const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
15247
15777
 
15248
15778
  ${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
15249
- const topQ = flat.slice(0, 10).map((r, i) => `${i + 1}. ${r.question}`).join("\n");
15250
- const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
15251
- const summary = [
15252
- `**PAA: "${input.query}"** \u2014 ${flat.length} questions extracted`,
15253
- topQ ? `
15254
- **Top questions:**
15255
- ${topQ}` : "",
15256
- organic.length ? `
15257
- **Top organic results:**
15258
- ${topO}` : "",
15259
- entityIdsSummaryLine(entityIds),
15260
- `
15261
- \u{1F4A1} \`maxQuestions\` up to 150 \xB7Use \`extract_url\` to dig into any result`
15262
- ].filter(Boolean).join("\n");
15263
- return oneBlock(full);
15779
+ return {
15780
+ ...oneBlock(full),
15781
+ structuredContent: {
15782
+ query: input.query,
15783
+ location: input.location ?? null,
15784
+ questionCount: flat.length,
15785
+ completionStatus: diagnostics?.completionStatus ?? null,
15786
+ questions: flat.map((r) => ({
15787
+ question: String(r.question ?? ""),
15788
+ answer: r.answer ?? null,
15789
+ sourceTitle: r.source_title ?? null,
15790
+ sourceSite: r.source_site ?? null
15791
+ })),
15792
+ organicResults: organic.map((r) => ({
15793
+ position: Number(r.position) || 0,
15794
+ title: String(r.title ?? ""),
15795
+ url: String(r.url ?? ""),
15796
+ domain: String(r.domain ?? ""),
15797
+ snippet: r.snippet ?? null
15798
+ })),
15799
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
15800
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
15801
+ durationMs: durationMs ?? null
15802
+ }
15803
+ };
15264
15804
  }
15265
15805
  function formatSearchSerp(raw, input) {
15266
15806
  const parsed = parseData(raw);
@@ -15298,19 +15838,29 @@ ${localRows}` : "";
15298
15838
  const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
15299
15839
 
15300
15840
  ${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
15301
- const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
15302
- const summary = [
15303
- `**SERP: "${input.query}"** \u2014 ${organic.length} organic results`,
15304
- topO ? `
15305
- **Top results:**
15306
- ${topO}` : "",
15307
- localPack.length ? `
15308
- **Local Pack:** ${localPack.map((b) => b.name).join(", ")}` : "",
15309
- entityIdsSummaryLine(entityIds),
15310
- `
15311
- \u{1F4A1} Use \`harvest_paa\` for questions \xB7 \`extract_url\` to scrape any result`
15312
- ].filter(Boolean).join("\n");
15313
- return oneBlock(full);
15841
+ return {
15842
+ ...oneBlock(full),
15843
+ structuredContent: {
15844
+ query: input.query,
15845
+ location: input.location ?? null,
15846
+ organicResults: organic.map((r) => ({
15847
+ position: Number(r.position) || 0,
15848
+ title: String(r.title ?? ""),
15849
+ url: String(r.url ?? ""),
15850
+ domain: String(r.domain ?? ""),
15851
+ snippet: r.snippet ?? null
15852
+ })),
15853
+ localPack: localPack.map((b) => ({
15854
+ position: Number(b.position) || 0,
15855
+ name: String(b.name ?? ""),
15856
+ rating: b.rating ?? null,
15857
+ reviewCount: b.reviewCount ?? null,
15858
+ websiteUrl: b.websiteUrl ?? null
15859
+ })),
15860
+ aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
15861
+ entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
15862
+ }
15863
+ };
15314
15864
  }
15315
15865
  function formatExtractUrl(raw, input) {
15316
15866
  const parsed = parseData(raw);
@@ -15379,15 +15929,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
15379
15929
  **${title}**
15380
15930
  ${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
15381
15931
  const textResult = oneBlock(full);
15932
+ const structuredContent = {
15933
+ url,
15934
+ title: d.title ?? null,
15935
+ headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
15936
+ schemaBlockCount: schemaCount,
15937
+ entityName: kpo?.entityName ?? null,
15938
+ entityTypes: kpo?.type ?? [],
15939
+ napScore: kpo?.napScore ?? null,
15940
+ missingSchemaFields: kpo?.missingFields ?? [],
15941
+ screenshotSaved: screenshotPath ?? null
15942
+ };
15382
15943
  if (screenshotMeta?.base64) {
15383
15944
  return {
15384
15945
  content: [
15385
15946
  ...textResult.content,
15386
15947
  { type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
15387
- ]
15948
+ ],
15949
+ structuredContent
15388
15950
  };
15389
15951
  }
15390
- return textResult;
15952
+ return { ...textResult, structuredContent };
15391
15953
  }
15392
15954
  function formatMapSiteUrls(raw, input) {
15393
15955
  const parsed = parseData(raw);
@@ -15420,15 +15982,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
15420
15982
  - Extract content from all pages: use \`extract_site\`
15421
15983
  - Scrape a single page: use \`extract_url\``
15422
15984
  ].filter(Boolean).join("\n");
15423
- const summary = [
15424
- `**URL Map: ${input.url}**`,
15425
- `${d.totalFound} URLs \u2014 ${ok.length} OK \xB7 ${broken.length} broken \xB7 ${redirects.length} redirects`,
15426
- broken.length ? `
15427
- **Broken URLs:** ${broken.slice(0, 3).map((u) => u.url).join(", ")}` : "",
15428
- `
15429
- \u{1F4A1} Use \`extract_site\` to extract content from all pages`
15430
- ].filter(Boolean).join("\n");
15431
- return oneBlock(full);
15985
+ return {
15986
+ ...oneBlock(full),
15987
+ structuredContent: {
15988
+ startUrl: d.startUrl ?? input.url,
15989
+ totalFound: d.totalFound ?? urls.length,
15990
+ truncated: d.truncated === true,
15991
+ okCount: ok.length,
15992
+ redirectCount: redirects.length,
15993
+ brokenCount: broken.length,
15994
+ urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
15995
+ durationMs: d.durationMs ?? 0
15996
+ }
15997
+ };
15432
15998
  }
15433
15999
  function formatExtractSite(raw, input) {
15434
16000
  const parsed = parseData(raw);
@@ -15453,14 +16019,19 @@ ${pageRows}`,
15453
16019
  - Map URLs first: use \`map_site_urls\`
15454
16020
  - Inspect a single page: use \`extract_url\``
15455
16021
  ].join("\n");
15456
- const summary = [
15457
- `**Site Extract: ${input.url}** \u2014 ${pages.length} pages`,
15458
- pages.slice(0, 5).map((p) => `- ${p.title ?? p.url}`).join("\n"),
15459
- pages.length > 5 ? `- \u2026 and ${pages.length - 5} more` : "",
15460
- `
15461
- \u{1F4A1} Use \`extract_url\` to inspect any individual page`
15462
- ].filter(Boolean).join("\n");
15463
- return oneBlock(full);
16022
+ return {
16023
+ ...oneBlock(full),
16024
+ structuredContent: {
16025
+ url: input.url,
16026
+ pageCount: pages.length,
16027
+ pages: pages.map((p) => ({
16028
+ url: String(p.url ?? ""),
16029
+ title: p.title ?? null,
16030
+ schemaTypes: p.kpo?.type ?? []
16031
+ })),
16032
+ durationMs: d.durationMs ?? 0
16033
+ }
16034
+ };
15464
16035
  }
15465
16036
  function formatYoutubeHarvest(raw, input) {
15466
16037
  const parsed = parseData(raw);
@@ -15490,16 +16061,22 @@ ${videoRows}`,
15490
16061
  - Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
15491
16062
  - Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
15492
16063
  ].filter(Boolean).join("\n");
15493
- const top5 = videos.slice(0, 5).map((v, i) => `${i + 1}. ${v.title} (\`${v.videoId}\`)`).join("\n");
15494
- const summary = [
15495
- `**YouTube: ${label}** \u2014 ${videos.length} videos`,
15496
- `
15497
- **Top videos:**
15498
- ${top5}`,
15499
- `
15500
- \u{1F4A1} Transcribe any video: \`youtube_transcribe\` with its videoId`
15501
- ].join("\n");
15502
- return oneBlock(full);
16064
+ return {
16065
+ ...oneBlock(full),
16066
+ structuredContent: {
16067
+ mode: input.mode,
16068
+ videoCount: videos.length,
16069
+ channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
16070
+ videos: videos.map((v) => ({
16071
+ videoId: String(v.videoId ?? ""),
16072
+ title: String(v.title ?? ""),
16073
+ channelName: v.channelName ?? null,
16074
+ views: v.views ?? null,
16075
+ duration: v.duration ?? null,
16076
+ url: v.url ?? null
16077
+ }))
16078
+ }
16079
+ };
15503
16080
  }
15504
16081
  function formatYoutubeTranscribe(raw, input) {
15505
16082
  const parsed = parseData(raw);
@@ -15529,14 +16106,6 @@ ${chunkRows}` : "",
15529
16106
  ---
15530
16107
  \u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
15531
16108
  ].filter(Boolean).join("\n");
15532
- const summary = [
15533
- `**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
15534
- `
15535
- **Preview:**
15536
- > ${truncate(text, 300)}`,
15537
- `
15538
- \u{1F4A1} Full transcript in artifact above`
15539
- ].join("\n");
15540
16109
  return oneBlock(full);
15541
16110
  }
15542
16111
  function formatFacebookPageIntel(raw, input) {
@@ -15565,19 +16134,26 @@ ${adBlocks}`,
15565
16134
  - Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
15566
16135
  - Find other advertisers: use \`facebook_ad_search\``
15567
16136
  ].filter(Boolean).join("\n");
15568
- const activeAds = ads.filter((a) => a.status?.toLowerCase() === "active").slice(0, 5);
15569
- const adSummary = activeAds.map((a, i) => `${i + 1}. ${truncate(a.headline ?? a.primaryText, 80)} (${a.creativeType ?? "\u2014"})`).join("\n");
15570
- const videoCount = ads.filter((a) => a.videoUrl).length;
15571
- const summary = [
15572
- `**Facebook Ads: ${advertiser}** \u2014 ${s.totalAds} ads (${s.activeCount} active)`,
15573
- adSummary ? `
15574
- **Active ads:**
15575
- ${adSummary}` : "",
15576
- `**Creative mix:** ${s.videoCount} video \xB7 ${s.imageCount} image`,
15577
- videoCount ? `
15578
- \u{1F4A1} ${videoCount} video ads \u2014 transcribe with \`facebook_ad_transcribe\` using the videoUrl` : ""
15579
- ].filter(Boolean).join("\n");
15580
- return oneBlock(full);
16137
+ return {
16138
+ ...oneBlock(full),
16139
+ structuredContent: {
16140
+ advertiserName: d.advertiserName ?? null,
16141
+ totalAds: s.totalAds ?? 0,
16142
+ activeCount: s.activeCount ?? 0,
16143
+ videoCount: s.videoCount ?? 0,
16144
+ imageCount: s.imageCount ?? 0,
16145
+ ads: ads.map((ad) => ({
16146
+ libraryId: ad.libraryId ?? null,
16147
+ status: ad.status ?? null,
16148
+ creativeType: ad.creativeType ?? null,
16149
+ headline: ad.headline ?? null,
16150
+ cta: ad.cta ?? null,
16151
+ startDate: ad.startDate ?? null,
16152
+ videoUrl: ad.videoUrl ?? null,
16153
+ variations: typeof ad.variations === "number" ? ad.variations : null
16154
+ }))
16155
+ }
16156
+ };
15581
16157
  }
15582
16158
  function formatFacebookAdSearch(raw, input) {
15583
16159
  const parsed = parseData(raw);
@@ -15601,15 +16177,18 @@ ${rows}`,
15601
16177
  - Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
15602
16178
  - Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
15603
16179
  ].join("\n");
15604
- const summary = [
15605
- `**Facebook Ad Search: "${input.query}"** \u2014 ${advertisers.length} advertisers`,
15606
- advertisers.slice(0, 5).map(
15607
- (a, i) => `${i + 1}. ${a.name}${a.adCount ? ` (${a.adCount} ads)` : ""} \u2014 \`${a.libraryId ?? "\u2014"}\``
15608
- ).join("\n"),
15609
- `
15610
- \u{1F4A1} Scan ads with \`facebook_page_intel\` using \`libraryId\``
15611
- ].filter(Boolean).join("\n");
15612
- return oneBlock(full);
16180
+ return {
16181
+ ...oneBlock(full),
16182
+ structuredContent: {
16183
+ query: input.query,
16184
+ advertiserCount: advertisers.length,
16185
+ advertisers: advertisers.map((a) => ({
16186
+ name: a.pageName ?? a.name ?? null,
16187
+ adCount: typeof a.adCount === "number" ? a.adCount : null,
16188
+ libraryId: a.sampleLibraryId ?? a.libraryId ?? null
16189
+ }))
16190
+ }
16191
+ };
15613
16192
  }
15614
16193
  function formatCreditsInfo(raw, input) {
15615
16194
  const parsed = parseData(raw);
@@ -15649,14 +16228,75 @@ ${costRows}` : "",
15649
16228
  |------|-----------|---------|-------------|
15650
16229
  ${ledgerRows}` : ""
15651
16230
  ].filter(Boolean).join("\n");
15652
- const summary = [
15653
- `**Credit balance:** ${balance ?? "unknown"} credits`,
15654
- matched ? `
15655
- **${matched.label}:** ${matched.credits} credits ${matched.unit}` : null,
15656
- input.includeLedger && ledger.length ? `
15657
- Recent ledger entries included in the full report.` : null
16231
+ return {
16232
+ ...oneBlock(full),
16233
+ structuredContent: {
16234
+ balanceCredits: typeof balance === "number" ? balance : null,
16235
+ matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
16236
+ costs: costs.map((c) => ({
16237
+ key: c.key,
16238
+ label: c.label,
16239
+ credits: c.credits,
16240
+ unit: c.unit,
16241
+ notes: c.notes ?? null
16242
+ })),
16243
+ ledger: ledger.map((row) => ({
16244
+ createdAt: String(row.created_at ?? ""),
16245
+ operation: String(row.operation ?? ""),
16246
+ credits: row.amount_mc / 1e3,
16247
+ description: row.description ?? null
16248
+ }))
16249
+ }
16250
+ };
16251
+ }
16252
+ function formatMapsSearch(raw, input) {
16253
+ const parsed = parseData(raw);
16254
+ if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
16255
+ const d = parsed.data;
16256
+ const results = d.results ?? [];
16257
+ const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
16258
+ const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
16259
+ const durationMs = d.durationMs;
16260
+ const rows = results.map((r) => {
16261
+ const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
16262
+ return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
16263
+ }).join("\n");
16264
+ const metadataSection = results.length ? `
16265
+ ## Candidate Metadata
16266
+ ${results.map((r) => {
16267
+ const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
16268
+ return `### ${r.position}. ${r.name}
16269
+ ${meta}`;
16270
+ }).join("\n\n")}` : "";
16271
+ const full = [
16272
+ `# Google Maps Search: "${searchQuery}"`,
16273
+ `**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
16274
+ `
16275
+ ## Results
16276
+ | # | Name | Category | Rating | Address | CID | Website | Maps |
16277
+ |---|------|----------|--------|---------|-----|---------|------|
16278
+ ${rows}`,
16279
+ metadataSection,
16280
+ `
16281
+ ---
16282
+ \u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
16283
+ durationMs != null ? `
16284
+ *Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
15658
16285
  ].filter(Boolean).join("\n");
15659
- return oneBlock(full);
16286
+ return {
16287
+ ...oneBlock(full),
16288
+ structuredContent: {
16289
+ query: d.query,
16290
+ location: d.location ?? null,
16291
+ searchQuery: d.searchQuery,
16292
+ searchUrl: d.searchUrl,
16293
+ extractedAt: d.extractedAt,
16294
+ requestedMaxResults: requestedMax,
16295
+ resultCount: results.length,
16296
+ results,
16297
+ durationMs: durationMs ?? 0
16298
+ }
16299
+ };
15660
16300
  }
15661
16301
  function formatMapsPlaceIntel(raw, input) {
15662
16302
  const parsed = parseData(raw);
@@ -15756,20 +16396,28 @@ ${entitySection}` : null,
15756
16396
  ---
15757
16397
  *Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
15758
16398
  ].filter(Boolean).join("\n");
15759
- const summary = [
15760
- `**${name}** \u2014 ${category ?? "Business"} \xB7 ${ratingLine || "No rating"}`,
15761
- address ? `\u{1F4CD} ${address}` : null,
15762
- phone ? `\u{1F4DE} ${phone}` : null,
15763
- hoursSummary ? `\u{1F550} ${hoursSummary}` : null,
15764
- website ? `\u{1F310} ${website}` : null,
15765
- reviewsStatus === "collected" && reviews.length ? `
15766
- \u{1F4AC} ${reviews.length} reviews fetched \u2014 full list in artifact above` : null,
15767
- reviewsStatus === "unavailable" ? `
15768
- \u26A0\uFE0F Reviews could not be retrieved this run` : null,
15769
- reviewsStatus === "none_exist" ? `
15770
- \u{1F4AC} No reviews on Google Maps` : null
15771
- ].filter(Boolean).join("\n");
15772
- return oneBlock(full);
16399
+ return {
16400
+ ...oneBlock(full),
16401
+ structuredContent: {
16402
+ name,
16403
+ rating: rating ?? null,
16404
+ reviewCount: reviewCount ?? null,
16405
+ category: category ?? null,
16406
+ address: address ?? null,
16407
+ phone: phone ?? null,
16408
+ website: website ?? null,
16409
+ hoursSummary: hoursSummary ?? null,
16410
+ bookingUrl: bookingUrl ?? null,
16411
+ kgmid: kgmid ?? null,
16412
+ cidDecimal: cidDecimal ?? null,
16413
+ cidUrl: cidUrl ?? null,
16414
+ lat: lat ?? null,
16415
+ lng: lng ?? null,
16416
+ reviewsStatus,
16417
+ reviewsCollected: reviews.length,
16418
+ reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
16419
+ }
16420
+ };
15773
16421
  }
15774
16422
  function formatFacebookAdTranscribe(raw, input) {
15775
16423
  const parsed = parseData(raw);
@@ -15799,76 +16447,129 @@ ${chunkRows}` : "",
15799
16447
  ---
15800
16448
  \u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
15801
16449
  ].filter(Boolean).join("\n");
15802
- const summary = [
15803
- `**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
15804
- `
15805
- **Preview:**
15806
- > ${truncate(text, 300)}`,
15807
- `
15808
- \u{1F4A1} Full transcript in artifact above`
15809
- ].join("\n");
15810
16450
  return oneBlock(full);
15811
16451
  }
15812
- var import_node_fs4, import_node_os3, import_node_path6;
16452
+ var import_node_fs4, import_node_os3, import_node_path6, reportSavingEnabled;
15813
16453
  var init_mcp_response_formatter = __esm({
15814
16454
  "src/mcp/mcp-response-formatter.ts"() {
15815
16455
  "use strict";
15816
16456
  import_node_fs4 = require("fs");
15817
16457
  import_node_os3 = require("os");
15818
16458
  import_node_path6 = require("path");
16459
+ init_errors();
16460
+ reportSavingEnabled = true;
15819
16461
  }
15820
16462
  });
15821
16463
 
15822
16464
  // src/mcp/paa-mcp-server.ts
15823
- function buildPaaExtractorMcpServer(executor) {
15824
- const server = new import_mcp.McpServer({ name: "mcp-scraper", version: "1.0.0" });
16465
+ function liveWebToolAnnotations(title) {
16466
+ return {
16467
+ title,
16468
+ readOnlyHint: true,
16469
+ destructiveHint: false,
16470
+ idempotentHint: false,
16471
+ openWorldHint: true
16472
+ };
16473
+ }
16474
+ function buildPaaExtractorMcpServer(executor, options = {}) {
16475
+ const savesReports = options.savesReportsLocally !== false;
16476
+ const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
16477
+ const withReportNote = (description) => `${description}${reportNote}`;
16478
+ const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
15825
16479
  server.registerTool("harvest_paa", {
15826
- description: 'Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded. Saves a full Markdown report locally.',
15827
- inputSchema: HarvestPaaInputSchema
16480
+ title: "Google PAA + SERP Harvest",
16481
+ description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
16482
+ inputSchema: HarvestPaaInputSchema,
16483
+ outputSchema: HarvestPaaOutputSchema,
16484
+ annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
15828
16485
  }, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
15829
16486
  server.registerTool("search_serp", {
15830
- description: "Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. Saves a full Markdown report locally.",
15831
- inputSchema: SearchSerpInputSchema
16487
+ title: "Google SERP Lookup",
16488
+ description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
16489
+ inputSchema: SearchSerpInputSchema,
16490
+ outputSchema: SearchSerpOutputSchema,
16491
+ annotations: liveWebToolAnnotations("Google SERP Lookup")
15832
16492
  }, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
15833
16493
  server.registerTool("extract_url", {
15834
- description: "Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page. Saves a full Markdown report locally.",
15835
- inputSchema: ExtractUrlInputSchema
16494
+ title: "Single URL Extract",
16495
+ description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
16496
+ inputSchema: ExtractUrlInputSchema,
16497
+ outputSchema: ExtractUrlOutputSchema,
16498
+ annotations: liveWebToolAnnotations("Single URL Extract")
15836
16499
  }, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
15837
16500
  server.registerTool("map_site_urls", {
15838
- description: "Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory. Saves a full Markdown report locally.",
15839
- inputSchema: MapSiteUrlsInputSchema
16501
+ title: "Site URL Map",
16502
+ description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
16503
+ inputSchema: MapSiteUrlsInputSchema,
16504
+ outputSchema: MapSiteUrlsOutputSchema,
16505
+ annotations: liveWebToolAnnotations("Site URL Map")
15840
16506
  }, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
15841
16507
  server.registerTool("extract_site", {
15842
- description: "Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction. Saves a full Markdown report locally.",
15843
- inputSchema: ExtractSiteInputSchema
16508
+ title: "Multi-Page Site Extract",
16509
+ description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
16510
+ inputSchema: ExtractSiteInputSchema,
16511
+ outputSchema: ExtractSiteOutputSchema,
16512
+ annotations: liveWebToolAnnotations("Multi-Page Site Extract")
15844
16513
  }, async (input) => formatExtractSite(await executor.extractSite(input), input));
15845
16514
  server.registerTool("youtube_harvest", {
15846
- description: 'Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription. Saves a full Markdown report locally.',
15847
- inputSchema: YoutubeHarvestInputSchema
16515
+ title: "YouTube Video Harvest",
16516
+ description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
16517
+ inputSchema: YoutubeHarvestInputSchema,
16518
+ outputSchema: YoutubeHarvestOutputSchema,
16519
+ annotations: liveWebToolAnnotations("YouTube Video Harvest")
15848
16520
  }, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
15849
16521
  server.registerTool("youtube_transcribe", {
15850
- description: "Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one. Saves a full Markdown report locally.",
15851
- inputSchema: YoutubeTranscribeInputSchema
16522
+ title: "YouTube Transcription",
16523
+ description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
16524
+ inputSchema: YoutubeTranscribeInputSchema,
16525
+ annotations: liveWebToolAnnotations("YouTube Transcription")
15852
16526
  }, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
15853
16527
  server.registerTool("facebook_page_intel", {
15854
- description: "Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible. Saves a full Markdown report locally.",
15855
- inputSchema: FacebookPageIntelInputSchema
16528
+ title: "Facebook Advertiser Ad Intel",
16529
+ description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
16530
+ inputSchema: FacebookPageIntelInputSchema,
16531
+ outputSchema: FacebookPageIntelOutputSchema,
16532
+ annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
15856
16533
  }, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
15857
16534
  server.registerTool("facebook_ad_search", {
15858
- description: "Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel. Saves a full Markdown report locally.",
15859
- inputSchema: FacebookAdSearchInputSchema
16535
+ title: "Facebook Ad Library Search",
16536
+ description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
16537
+ inputSchema: FacebookAdSearchInputSchema,
16538
+ outputSchema: FacebookAdSearchOutputSchema,
16539
+ annotations: liveWebToolAnnotations("Facebook Ad Library Search")
15860
16540
  }, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
15861
16541
  server.registerTool("facebook_ad_transcribe", {
16542
+ title: "Facebook Ad Transcription",
15862
16543
  description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
15863
- inputSchema: FacebookAdTranscribeInputSchema
16544
+ inputSchema: FacebookAdTranscribeInputSchema,
16545
+ annotations: liveWebToolAnnotations("Facebook Ad Transcription")
15864
16546
  }, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
15865
16547
  server.registerTool("maps_place_intel", {
15866
- description: 'Extract Google Maps business intelligence for a named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain. Saves a full Markdown report locally.',
15867
- inputSchema: MapsPlaceIntelInputSchema
16548
+ title: "Google Maps Business Profile Details",
16549
+ description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
16550
+ inputSchema: MapsPlaceIntelInputSchema,
16551
+ outputSchema: MapsPlaceIntelOutputSchema,
16552
+ annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
15868
16553
  }, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
16554
+ server.registerTool("maps_search", {
16555
+ title: "Google Maps Business Search",
16556
+ description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
16557
+ inputSchema: MapsSearchInputSchema,
16558
+ outputSchema: MapsSearchOutputSchema,
16559
+ annotations: liveWebToolAnnotations("Google Maps Business Search")
16560
+ }, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
15869
16561
  server.registerTool("credits_info", {
16562
+ title: "MCP Scraper Credits & Costs",
15870
16563
  description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
15871
- inputSchema: CreditsInfoInputSchema
16564
+ inputSchema: CreditsInfoInputSchema,
16565
+ outputSchema: CreditsInfoOutputSchema,
16566
+ annotations: {
16567
+ title: "MCP Scraper Credits & Costs",
16568
+ readOnlyHint: true,
16569
+ destructiveHint: false,
16570
+ idempotentHint: true,
16571
+ openWorldHint: false
16572
+ }
15872
16573
  }, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
15873
16574
  return server;
15874
16575
  }
@@ -15877,6 +16578,7 @@ var init_paa_mcp_server = __esm({
15877
16578
  "src/mcp/paa-mcp-server.ts"() {
15878
16579
  "use strict";
15879
16580
  import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
16581
+ init_version();
15880
16582
  init_mcp_tool_schemas();
15881
16583
  init_mcp_response_formatter();
15882
16584
  }
@@ -15976,6 +16678,9 @@ var init_http_mcp_tool_executor = __esm({
15976
16678
  mapsPlaceIntel(input) {
15977
16679
  return this.call("/maps/place", input);
15978
16680
  }
16681
+ mapsSearch(input) {
16682
+ return this.call("/maps/search", input);
16683
+ }
15979
16684
  creditsInfo(input) {
15980
16685
  return this.call("/billing/credits", input);
15981
16686
  }
@@ -16015,15 +16720,18 @@ async function requireMcpCallerKey(c) {
16015
16720
  return callerKey;
16016
16721
  }
16017
16722
  function registerSerpIntelligenceCaptureTools(server, executor) {
16018
- const serpExecutor = executor;
16019
16723
  server.registerTool("capture_serp_snapshot", {
16724
+ title: "SERP Intelligence Snapshot",
16020
16725
  description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized residential proxy evidence, configured for the static residential proxy, and none only for direct-network debugging. Set debug true when investigating location evidence, proxy behavior, CAPTCHA, or capture reliability.",
16021
- inputSchema: CaptureSerpSnapshotInputSchema
16022
- }, async (input) => serpExecutor.captureSerpSnapshot ? serpExecutor.captureSerpSnapshot(input) : Promise.resolve({ content: [{ type: "text", text: "{}" }], isError: true }));
16726
+ inputSchema: CaptureSerpSnapshotInputSchema,
16727
+ annotations: liveWebToolAnnotations("SERP Intelligence Snapshot")
16728
+ }, async (input) => executor.captureSerpSnapshot(input));
16023
16729
  server.registerTool("capture_serp_page_snapshots", {
16730
+ title: "SERP Intelligence Page Snapshots",
16024
16731
  description: "Capture public ranking-page evidence through POST /serp-intelligence/page-snapshots, the same product page snapshot path used by Phoenix. Provide urls for simple captures or targets when preserving organic, AI citation, local-pack, configured target, or site-subject source metadata. Private IPs, localhost, file URLs, and internal URLs are rejected by the service. Use timeoutMs for slow pages and debug true for sanitized proxy/browser diagnostics.",
16025
- inputSchema: CaptureSerpPageSnapshotsInputSchema
16026
- }, async (input) => serpExecutor.captureSerpPageSnapshots ? serpExecutor.captureSerpPageSnapshots(input) : Promise.resolve({ content: [{ type: "text", text: "{}" }], isError: true }));
16732
+ inputSchema: CaptureSerpPageSnapshotsInputSchema,
16733
+ annotations: liveWebToolAnnotations("SERP Intelligence Page Snapshots")
16734
+ }, async (input) => executor.captureSerpPageSnapshots(input));
16027
16735
  }
16028
16736
  var import_hono7, import_webStandardStreamableHttp, mcpApp;
16029
16737
  var init_mcp_routes = __esm({
@@ -16033,8 +16741,10 @@ var init_mcp_routes = __esm({
16033
16741
  import_webStandardStreamableHttp = require("@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js");
16034
16742
  init_paa_mcp_server();
16035
16743
  init_http_mcp_tool_executor();
16744
+ init_mcp_response_formatter();
16036
16745
  init_db();
16037
16746
  init_mcp_tool_schemas();
16747
+ configureReportSaving(false);
16038
16748
  mcpApp = new import_hono7.Hono();
16039
16749
  mcpApp.all("/", async (c) => {
16040
16750
  try {
@@ -16047,7 +16757,7 @@ var init_mcp_routes = __esm({
16047
16757
  sessionIdGenerator: void 0,
16048
16758
  enableJsonResponse: true
16049
16759
  });
16050
- const server = buildPaaExtractorMcpServer(executor);
16760
+ const server = buildPaaExtractorMcpServer(executor, { savesReportsLocally: false });
16051
16761
  registerSerpIntelligenceCaptureTools(server, executor);
16052
16762
  await server.connect(transport);
16053
16763
  return transport.handleRequest(c.req.raw);
@@ -16432,7 +17142,7 @@ async function processJob(job) {
16432
17142
  const opts = typeof job.options === "string" ? JSON.parse(job.options) : job.options;
16433
17143
  const result = await harvest({
16434
17144
  ...opts,
16435
- kernelApiKey: process.env.KERNEL_API_KEY,
17145
+ kernelApiKey: browserServiceApiKey(),
16436
17146
  headless: true,
16437
17147
  format: "json",
16438
17148
  outputDir: "/tmp/paa-output-api",
@@ -16497,6 +17207,7 @@ var init_worker = __esm({
16497
17207
  "src/api/worker.ts"() {
16498
17208
  "use strict";
16499
17209
  init_db();
17210
+ init_browser_service_env();
16500
17211
  init_harvest();
16501
17212
  init_webhook();
16502
17213
  init_rates();
@@ -16599,6 +17310,8 @@ var init_server = __esm({
16599
17310
  "src/api/server.ts"() {
16600
17311
  "use strict";
16601
17312
  init_harvest_timeout();
17313
+ init_browser_service_env();
17314
+ init_outbound_sanitize();
16602
17315
  init_registry();
16603
17316
  init_template();
16604
17317
  init_og();
@@ -16915,7 +17628,7 @@ var init_server = __esm({
16915
17628
  try {
16916
17629
  const result = await harvest({
16917
17630
  ...options,
16918
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
17631
+ kernelApiKey: browserServiceApiKey(),
16919
17632
  headless: true,
16920
17633
  format: "json",
16921
17634
  outputDir: "/tmp/paa-output-api",
@@ -16930,7 +17643,7 @@ var init_server = __esm({
16930
17643
  if (diff > 0) await creditMc(user.id, diff, LedgerOperation.PAA_REFUND, "overestimate refund");
16931
17644
  else if (diff < 0) await debitMc(user.id, -diff, LedgerOperation.PAA, options.query);
16932
17645
  }
16933
- return c.json({ job_id: jobId, status: "done", result, attempts });
17646
+ return c.json({ job_id: jobId, status: "done", result: sanitizeHarvestResult(result), attempts: sanitizeAttempts(attempts) });
16934
17647
  } catch (err) {
16935
17648
  const problem = classifyHarvestProblem(err);
16936
17649
  const response = harvestProblemResponse(problem);
@@ -16938,18 +17651,19 @@ var init_server = __esm({
16938
17651
  if (problem.terminalStatus === "cancelled" || c.req.raw.signal.aborted) {
16939
17652
  await cancelJob(jobId, serializeHarvestProblem(problem));
16940
17653
  await creditMc(user.id, syncCost, LedgerOperation.REFUND, "cancelled call");
16941
- return c.json({ job_id: jobId, status: "cancelled", ...response, attempts }, problem.httpStatus);
17654
+ return c.json({ job_id: jobId, status: "cancelled", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
16942
17655
  }
16943
17656
  await failJob(jobId, serializeHarvestProblem(problem));
16944
17657
  await creditMc(user.id, syncCost, LedgerOperation.REFUND, "failed call");
16945
- return c.json({ job_id: jobId, status: "failed", ...response, attempts }, problem.httpStatus);
17658
+ return c.json({ job_id: jobId, status: "failed", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
16946
17659
  }
16947
17660
  });
16948
17661
  app.get("/jobs/:id", auth, async (c) => {
16949
17662
  const job = await getJob(c.req.param("id"), c.get("user").id);
16950
17663
  if (!job) return c.json({ error: "Job not found" }, 404);
16951
17664
  const attempts = await listHarvestAttempts(job.id, c.get("user").id);
16952
- return c.json({ ...job, attempts });
17665
+ const safeResult = job.result && typeof job.result === "object" ? sanitizeHarvestResult(job.result) : job.result;
17666
+ return c.json({ ...job, result: safeResult, attempts: sanitizeAttempts(attempts) });
16953
17667
  });
16954
17668
  app.get("/jobs", auth, async (c) => {
16955
17669
  return c.json(await listJobs(c.get("user").id));
@@ -17048,7 +17762,7 @@ var init_server = __esm({
17048
17762
  const { ok: euOk, balance_mc: euBal } = await debitMc(user.id, MC_COSTS.page_scrape, LedgerOperation.EXTRACT_URL, new URL(canonicalUrl).hostname);
17049
17763
  if (!euOk) return c.json(insufficientBalanceResponse(euBal, MC_COSTS.page_scrape), 402);
17050
17764
  try {
17051
- const kernelApiKey = process.env.KERNEL_API_KEY?.trim();
17765
+ const kernelApiKey = browserServiceApiKey();
17052
17766
  const device = screenshotDevice === "mobile" ? "mobile" : "desktop";
17053
17767
  const [result, pageData] = await Promise.all([
17054
17768
  extractKpo({ url: canonicalUrl, kernelApiKey }),
@@ -17086,7 +17800,7 @@ var init_server = __esm({
17086
17800
  startUrl: parsed.href,
17087
17801
  maxUrls: Math.min(2e3, Math.max(1, body.maxUrls ?? 500)),
17088
17802
  concurrency: Math.min(20, Math.max(1, body.concurrency ?? 12)),
17089
- kernelApiKey: body.browserFallback ?? body.kernelFallback ? process.env.KERNEL_API_KEY : void 0
17803
+ kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
17090
17804
  });
17091
17805
  await logRequestEvent({
17092
17806
  userId: user.id,
@@ -17126,7 +17840,7 @@ var init_server = __esm({
17126
17840
  const result = await extractSite({
17127
17841
  startUrl: parsed.href,
17128
17842
  maxPages: Math.min(200, Math.max(1, body.maxPages ?? 100)),
17129
- kernelApiKey: body.browserFallback ?? body.kernelFallback ? process.env.KERNEL_API_KEY : void 0
17843
+ kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
17130
17844
  });
17131
17845
  const pageCount = result.pages?.length ?? 1;
17132
17846
  const actualSiteMc = pageCount * MC_COSTS.page_scrape;