mcp-scraper 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +13 -2
  2. package/dist/bin/api-server.cjs +957 -243
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/mcp-stdio-server.cjs +540 -158
  6. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  7. package/dist/bin/mcp-stdio-server.js +2 -1
  8. package/dist/bin/mcp-stdio-server.js.map +1 -1
  9. package/dist/bin/paa-harvest.cjs +36 -5
  10. package/dist/bin/paa-harvest.cjs.map +1 -1
  11. package/dist/bin/paa-harvest.js +5 -3
  12. package/dist/bin/paa-harvest.js.map +1 -1
  13. package/dist/{chunk-6TWZS2FQ.js → chunk-RE6HCRYC.js} +543 -159
  14. package/dist/chunk-RE6HCRYC.js.map +1 -0
  15. package/dist/{chunk-W4P2U5VF.js → chunk-TM22BLWP.js} +46 -34
  16. package/dist/chunk-TM22BLWP.js.map +1 -0
  17. package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
  18. package/dist/chunk-ZK456YXN.js.map +1 -0
  19. package/dist/chunk-ZMOWIBMK.js +36 -0
  20. package/dist/chunk-ZMOWIBMK.js.map +1 -0
  21. package/dist/index.cjs +34 -3
  22. package/dist/index.cjs.map +1 -1
  23. package/dist/index.js +2 -1
  24. package/dist/index.js.map +1 -1
  25. package/dist/{server-2Y27U4TO.js → server-QXVVTKJP.js} +311 -48
  26. package/dist/server-QXVVTKJP.js.map +1 -0
  27. package/dist/{worker-UT4ZQU2T.js → worker-AUCXFHEL.js} +6 -4
  28. package/dist/worker-AUCXFHEL.js.map +1 -0
  29. package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
  30. package/docs/adr/README.md +11 -0
  31. package/docs/mcp-tool-quality-spec.md +238 -0
  32. package/package.json +5 -4
  33. package/dist/chunk-6TWZS2FQ.js.map +0 -1
  34. package/dist/chunk-7HB7NDOY.js.map +0 -1
  35. package/dist/chunk-W4P2U5VF.js.map +0 -1
  36. package/dist/server-2Y27U4TO.js.map +0 -1
  37. package/dist/worker-UT4ZQU2T.js.map +0 -1
@@ -3,8 +3,10 @@ import {
3
3
  CaptureSerpSnapshotInputSchema,
4
4
  HttpMcpToolExecutor,
5
5
  buildPaaExtractorMcpServer,
6
- harvestTimeoutBudget
7
- } from "./chunk-6TWZS2FQ.js";
6
+ configureReportSaving,
7
+ harvestTimeoutBudget,
8
+ liveWebToolAnnotations
9
+ } from "./chunk-RE6HCRYC.js";
8
10
  import {
9
11
  BALANCE_PACK_LABELS,
10
12
  BALANCE_PRICE_IDS,
@@ -20,20 +22,27 @@ import {
20
22
  harvestProblemResponse,
21
23
  insufficientBalanceResponse,
22
24
  serializeHarvestProblem
23
- } from "./chunk-7HB7NDOY.js";
25
+ } from "./chunk-ZK456YXN.js";
24
26
  import {
25
27
  BrowserDriver,
26
- CaptchaError,
27
28
  MapsPlaceOptionsSchema,
29
+ MapsSearchOptionsSchema,
28
30
  MapsSelectors,
29
31
  RawMapsAboutAttributeSchema,
30
32
  RawMapsHoursRowSchema,
31
33
  RawMapsOverviewSchema,
32
34
  RawMapsReviewStatsSchema,
35
+ browserServiceApiKey,
36
+ browserServiceProxyId,
33
37
  buildYouTubeChannelVideosUrl,
34
38
  harvest,
35
39
  resolveKernelProxyId
36
- } from "./chunk-W4P2U5VF.js";
40
+ } from "./chunk-TM22BLWP.js";
41
+ import {
42
+ CaptchaError,
43
+ RECAPTCHA_INSTRUCTIONS,
44
+ sanitizeVendorName
45
+ } from "./chunk-ZMOWIBMK.js";
37
46
  import {
38
47
  SiteAuditJobRowSchema,
39
48
  cancelJob,
@@ -77,6 +86,53 @@ import {
77
86
  verifyPassword
78
87
  } from "./chunk-D4CJBZBY.js";
79
88
 
89
+ // src/api/outbound-sanitize.ts
90
+ var KEY_RENAMES = {
91
+ kernel: "browserRuntime",
92
+ kernel_session_id: "browser_session_id",
93
+ kernel_delete_started: "session_cleanup_started",
94
+ kernel_delete_succeeded: "session_cleanup_succeeded",
95
+ kernel_delete_error: "session_cleanup_error",
96
+ kernelSessionId: "browserSessionId",
97
+ kernelDeleteStarted: "sessionCleanupStarted",
98
+ kernelDeleteSucceeded: "sessionCleanupSucceeded",
99
+ kernelDeleteError: "sessionCleanupError",
100
+ kernelProxyId: "proxyId"
101
+ };
102
+ var SANITIZED_VALUE_KEYS = /error|message/i;
103
+ function sanitizeOutboundDiagnostics(value, parentKey = "") {
104
+ if (typeof value === "string") {
105
+ if (SANITIZED_VALUE_KEYS.test(parentKey) && /kernel/i.test(value)) {
106
+ return sanitizeVendorName(value);
107
+ }
108
+ return value;
109
+ }
110
+ if (Array.isArray(value)) return value.map((v) => sanitizeOutboundDiagnostics(v, parentKey));
111
+ if (value !== null && typeof value === "object") {
112
+ const out = {};
113
+ for (const [key, val] of Object.entries(value)) {
114
+ const renamed = KEY_RENAMES[key] ?? key;
115
+ out[renamed] = sanitizeOutboundDiagnostics(val, key);
116
+ }
117
+ return out;
118
+ }
119
+ return value;
120
+ }
121
+ function sanitizeAttempts(attempts) {
122
+ return attempts.map((a) => sanitizeOutboundDiagnostics(a));
123
+ }
124
+ function sanitizeHarvestResult(result) {
125
+ const diagnostics = result?.diagnostics;
126
+ if (!diagnostics?.debug) return result;
127
+ return {
128
+ ...result,
129
+ diagnostics: {
130
+ ...diagnostics,
131
+ debug: sanitizeOutboundDiagnostics(diagnostics.debug)
132
+ }
133
+ };
134
+ }
135
+
80
136
  // src/blog/registry.ts
81
137
  var posts = [
82
138
  {
@@ -3439,7 +3495,7 @@ import TurndownService from "turndown";
3439
3495
  import Kernel from "@onkernel/sdk";
3440
3496
  import { chromium } from "playwright";
3441
3497
  async function fetchWithKernel(url) {
3442
- const apiKey = process.env.KERNEL_API_KEY;
3498
+ const apiKey = browserServiceApiKey();
3443
3499
  if (!apiKey) throw new Error("Browser backend API key not set");
3444
3500
  const client = new Kernel({ apiKey });
3445
3501
  const kb = await client.browsers.create({ stealth: true, timeout_seconds: 60 });
@@ -3474,9 +3530,9 @@ async function extractKpo(opts) {
3474
3530
  redirect: "manual"
3475
3531
  });
3476
3532
  if (res.status >= 300 && res.status < 400) {
3477
- const location = res.headers.get("location");
3478
- if (!location) return null;
3479
- const next = new URL(location, target).href;
3533
+ const location2 = res.headers.get("location");
3534
+ if (!location2) return null;
3535
+ const next = new URL(location2, target).href;
3480
3536
  const checkedRedirect = await validatePublicHttpUrl(next, { field: "redirect URL" });
3481
3537
  if (checkedRedirect.error || !checkedRedirect.parsed) return null;
3482
3538
  target = checkedRedirect.parsed.href;
@@ -7606,7 +7662,7 @@ async function writeOutputs(result, outputDir) {
7606
7662
  }
7607
7663
  }
7608
7664
  async function ytHarvest(rawOptions) {
7609
- const kernelApiKey = process.env.KERNEL_API_KEY;
7665
+ const kernelApiKey = browserServiceApiKey();
7610
7666
  if (!kernelApiKey) {
7611
7667
  throw new Error("A browser backend API key is required \u2014 YouTube harvesting requires a stealth session.");
7612
7668
  }
@@ -7701,7 +7757,7 @@ function parseTimedtextXml(xml) {
7701
7757
  return results;
7702
7758
  }
7703
7759
  async function fetchViaKernelInnertube(videoId) {
7704
- const kernelApiKey = process.env.KERNEL_API_KEY;
7760
+ const kernelApiKey = browserServiceApiKey();
7705
7761
  if (!kernelApiKey) return null;
7706
7762
  const driver = new BrowserDriver();
7707
7763
  const start = Date.now();
@@ -7845,7 +7901,7 @@ async function attemptKernelWhisper(videoId, kernelApiKey, falKey, start) {
7845
7901
  }
7846
7902
  }
7847
7903
  async function fetchViaKernelWhisper(videoId) {
7848
- const kernelApiKey = process.env.KERNEL_API_KEY;
7904
+ const kernelApiKey = browserServiceApiKey();
7849
7905
  const falKey = process.env.FAL_KEY;
7850
7906
  if (!kernelApiKey || !falKey) return null;
7851
7907
  const start = Date.now();
@@ -8116,7 +8172,7 @@ screenshotApp.post("/", async (c) => {
8116
8172
  }
8117
8173
  const device2 = body.device === "mobile" ? "mobile" : "desktop";
8118
8174
  try {
8119
- const buf = await captureScreenshot(parsedFallback.href, process.env.KERNEL_API_KEY?.trim(), device2);
8175
+ const buf = await captureScreenshot(parsedFallback.href, browserServiceApiKey(), device2);
8120
8176
  return new Response(new Uint8Array(buf), {
8121
8177
  status: 200,
8122
8178
  headers: {
@@ -8132,7 +8188,7 @@ screenshotApp.post("/", async (c) => {
8132
8188
  }
8133
8189
  const device = body.device === "mobile" ? "mobile" : "desktop";
8134
8190
  try {
8135
- const buf = await captureScreenshot(urlCheck.parsed.href, process.env.KERNEL_API_KEY?.trim(), device);
8191
+ const buf = await captureScreenshot(urlCheck.parsed.href, browserServiceApiKey(), device);
8136
8192
  return new Response(new Uint8Array(buf), {
8137
8193
  status: 200,
8138
8194
  headers: {
@@ -8959,23 +9015,23 @@ function buildPageIntelUrl(body, country) {
8959
9015
  return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
8960
9016
  }
8961
9017
  function kernelLaunchOpts() {
8962
- return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
9018
+ return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: browserServiceProxyId(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
8963
9019
  }
8964
9020
  async function kernelLaunchOptsResidential() {
8965
- let proxyId = process.env.KERNEL_PROXY_ID?.trim();
9021
+ let proxyId = browserServiceProxyId();
8966
9022
  try {
8967
9023
  const resolution = await resolveKernelProxyId({
8968
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
9024
+ kernelApiKey: browserServiceApiKey(),
8969
9025
  proxyMode: "location",
8970
- configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
9026
+ configuredKernelProxyId: browserServiceProxyId(),
8971
9027
  location: "New York, NY",
8972
9028
  gl: "us"
8973
9029
  });
8974
9030
  if (resolution.kernelProxyId) proxyId = resolution.kernelProxyId;
8975
9031
  } catch {
8976
- proxyId = process.env.KERNEL_PROXY_ID?.trim();
9032
+ proxyId = browserServiceProxyId();
8977
9033
  }
8978
- return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
9034
+ return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
8979
9035
  }
8980
9036
  var facebookAdApp = new Hono4();
8981
9037
  facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
@@ -9242,8 +9298,8 @@ var MapsNavigator = class {
9242
9298
  this.page = page;
9243
9299
  }
9244
9300
  page;
9245
- async navigateToPlacePage(businessName, location) {
9246
- const query = `${businessName} ${location}`;
9301
+ async navigateToPlacePage(businessName, location2) {
9302
+ const query = `${businessName} ${location2}`;
9247
9303
  const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(query)}`;
9248
9304
  await this.page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 45e3 });
9249
9305
  const onPlacePage = await this.page.evaluate(() => /\/maps\/place\//.test(window.location.href));
@@ -9668,8 +9724,213 @@ var MapsExtractor = class {
9668
9724
  }
9669
9725
  };
9670
9726
 
9727
+ // src/extractor/MapsSearchExtractor.ts
9728
+ var MAPS_SEARCH_SCROLL_BUDGET_MS = 6e4;
9729
+ var MAPS_SEARCH_SCROLL_STEP_MS = 1200;
9730
+ var MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS = 4;
9731
+ var MapsSearchExtractor = class {
9732
+ constructor(driver) {
9733
+ this.driver = driver;
9734
+ }
9735
+ driver;
9736
+ async extract(options) {
9737
+ const startMs = Date.now();
9738
+ const searchQuery = [options.query, options.location].filter(Boolean).join(" ");
9739
+ const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(searchQuery)}?hl=${encodeURIComponent(options.hl)}`;
9740
+ const config = {
9741
+ headless: options.headless,
9742
+ kernelApiKey: options.kernelApiKey,
9743
+ kernelProxyId: options.kernelProxyId,
9744
+ viewport: { width: 1280, height: 900 },
9745
+ locale: `${options.hl}-${options.gl.toUpperCase()}`
9746
+ };
9747
+ try {
9748
+ await this.driver.launch(config);
9749
+ const page = this.driver.getPage();
9750
+ await page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
9751
+ await page.waitForTimeout(3e3);
9752
+ const blocked = await this.detectBlock(page);
9753
+ if (blocked) throw new CaptchaError(RECAPTCHA_INSTRUCTIONS);
9754
+ const results = await this.collectResults(page, options.maxResults);
9755
+ return {
9756
+ query: options.query,
9757
+ location: options.location ?? null,
9758
+ searchQuery,
9759
+ searchUrl,
9760
+ extractedAt: (/* @__PURE__ */ new Date()).toISOString(),
9761
+ requestedMaxResults: options.maxResults,
9762
+ resultCount: results.length,
9763
+ results,
9764
+ durationMs: Date.now() - startMs
9765
+ };
9766
+ } finally {
9767
+ await this.driver.close();
9768
+ }
9769
+ }
9770
+ async detectBlock(page) {
9771
+ return page.evaluate(() => {
9772
+ const text = document.body.innerText.slice(0, 2e3);
9773
+ return /unusual traffic|captcha|recaptcha|about this page/i.test(text) || /\/sorry\//.test(location.href);
9774
+ });
9775
+ }
9776
+ async collectResults(page, maxResults) {
9777
+ const seen = /* @__PURE__ */ new Map();
9778
+ const started = Date.now();
9779
+ let noGrowthRounds = 0;
9780
+ while (Date.now() - started < MAPS_SEARCH_SCROLL_BUDGET_MS) {
9781
+ const before = seen.size;
9782
+ const batch = await this.extractVisibleResults(page);
9783
+ for (const result of batch) {
9784
+ const key = this.resultKey(result);
9785
+ if (!seen.has(key)) seen.set(key, { ...result, position: seen.size + 1 });
9786
+ if (seen.size >= maxResults) break;
9787
+ }
9788
+ if (seen.size >= maxResults) break;
9789
+ if (seen.size === before) noGrowthRounds += 1;
9790
+ else noGrowthRounds = 0;
9791
+ if (noGrowthRounds >= MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS) break;
9792
+ await page.evaluate(() => {
9793
+ const feed = document.querySelector('[role="feed"]');
9794
+ if (feed) {
9795
+ feed.scrollTop = feed.scrollHeight;
9796
+ } else {
9797
+ window.scrollTo(0, document.body.scrollHeight);
9798
+ }
9799
+ });
9800
+ await page.waitForTimeout(MAPS_SEARCH_SCROLL_STEP_MS);
9801
+ }
9802
+ return [...seen.values()].slice(0, maxResults);
9803
+ }
9804
+ resultKey(result) {
9805
+ return result.cidDecimal ?? result.placeUrl.replace(/[?&].*$/, "") ?? result.name;
9806
+ }
9807
+ async extractVisibleResults(page) {
9808
+ return page.evaluate(() => {
9809
+ function normalizeText(value) {
9810
+ const text = value?.replace(/\s+/g, " ").trim() ?? "";
9811
+ return text || null;
9812
+ }
9813
+ function cidFromUrl(url) {
9814
+ const fid = url.match(/!1s(0x[0-9a-f]+):(0x[0-9a-f]+)/i);
9815
+ if (!fid) return { cid: null, cidDecimal: null };
9816
+ let cidDecimal = null;
9817
+ try {
9818
+ cidDecimal = BigInt(fid[2]).toString();
9819
+ } catch {
9820
+ }
9821
+ return { cid: `${fid[1]}:${fid[2]}`, cidDecimal };
9822
+ }
9823
+ function textParts(card) {
9824
+ if (!card) return [];
9825
+ const parts = [];
9826
+ card.querySelectorAll("div, span").forEach((el2) => {
9827
+ const text = Array.from(el2.childNodes).filter((node) => node.nodeType === 3).map((node) => node.textContent?.trim() ?? "").filter((text2) => text2.length > 1 && text2.length < 140).join(" ");
9828
+ if (text && !parts.includes(text)) parts.push(text);
9829
+ });
9830
+ return parts;
9831
+ }
9832
+ function firstMatching(parts, pattern) {
9833
+ const value = parts.find((part) => pattern.test(part));
9834
+ return value ?? null;
9835
+ }
9836
+ const out = [];
9837
+ const seen = /* @__PURE__ */ new Set();
9838
+ const anchors = Array.from(document.querySelectorAll('a[href*="/maps/place/"]'));
9839
+ for (const anchor of anchors) {
9840
+ const placeUrl = anchor.href;
9841
+ const stableUrl = placeUrl.replace(/[?&].*$/, "");
9842
+ if (seen.has(stableUrl)) continue;
9843
+ seen.add(stableUrl);
9844
+ const card = anchor.closest('.Nv2PK, [role="article"], .bfdHYd') ?? anchor.parentElement;
9845
+ const parts = textParts(card);
9846
+ const aria = normalizeText(anchor.getAttribute("aria-label"));
9847
+ const heading = normalizeText(card?.querySelector('.qBF1Pd, .fontHeadlineSmall, [role="heading"]')?.textContent);
9848
+ const name = aria ?? heading ?? parts[0] ?? stableUrl;
9849
+ const links = Array.from(card?.querySelectorAll("a[href]") ?? []);
9850
+ const websiteUrl = links.find((link) => link.href.startsWith("http") && !link.href.includes("google."))?.href ?? null;
9851
+ const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? null;
9852
+ const rating = firstMatching(parts, /^\d(?:\.\d)?$/);
9853
+ const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?$/);
9854
+ const category = parts.find((part) => !/^\d(?:\.\d)?$|^\(?[\d,]+\)?$|open|closed|directions|website/i.test(part)) ?? null;
9855
+ const address = parts.find((part) => /\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part)) ?? null;
9856
+ const { cid, cidDecimal } = cidFromUrl(placeUrl);
9857
+ out.push({
9858
+ position: out.length + 1,
9859
+ name,
9860
+ placeUrl,
9861
+ cid,
9862
+ cidDecimal,
9863
+ rating,
9864
+ reviewCount: reviewCountRaw ? reviewCountRaw.replace(/[()]/g, "") : null,
9865
+ category,
9866
+ address,
9867
+ websiteUrl,
9868
+ directionsUrl,
9869
+ metadata: parts.slice(0, 20)
9870
+ });
9871
+ }
9872
+ return out;
9873
+ });
9874
+ }
9875
+ };
9876
+
9671
9877
  // src/api/maps-routes.ts
9878
+ function mapsErrorResponse(c, msg, errorCode) {
9879
+ const blocked = msg.includes("CAPTCHA") || msg.includes("blocked");
9880
+ return c.json({
9881
+ error: sanitizeVendorName(msg),
9882
+ error_code: blocked ? "captcha_or_blocked" : errorCode,
9883
+ retryable: blocked
9884
+ }, blocked ? 503 : 500);
9885
+ }
9672
9886
  var mapsApp = new Hono5();
9887
+ mapsApp.post("/search", createApiKeyAuth(), async (c) => {
9888
+ const user = c.get("user");
9889
+ const body = await c.req.json().catch(() => ({}));
9890
+ const parsed = MapsSearchOptionsSchema.safeParse({
9891
+ kernelApiKey: process.env.KERNEL_API_KEY,
9892
+ ...body
9893
+ });
9894
+ if (!parsed.success) {
9895
+ return c.json({ error: parsed.error.issues[0]?.message ?? "Invalid request" }, 400);
9896
+ }
9897
+ const { ok, balance_mc } = await debitMc(
9898
+ user.id,
9899
+ MC_COSTS.maps_search,
9900
+ LedgerOperation.MAPS_SEARCH,
9901
+ [parsed.data.query, parsed.data.location].filter(Boolean).join(" ")
9902
+ );
9903
+ if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.maps_search), 402);
9904
+ const driver = new BrowserDriver();
9905
+ const extractor = new MapsSearchExtractor(driver);
9906
+ try {
9907
+ const result = await extractor.extract(parsed.data);
9908
+ await logRequestEvent({
9909
+ userId: user.id,
9910
+ source: "maps_search",
9911
+ status: "done",
9912
+ query: result.searchQuery,
9913
+ location: parsed.data.location,
9914
+ resultCount: result.resultCount,
9915
+ result
9916
+ });
9917
+ return c.json(result);
9918
+ } catch (err) {
9919
+ await creditMc(user.id, MC_COSTS.maps_search, LedgerOperation.REFUND, "failed maps_search call");
9920
+ const msg = err instanceof Error ? err.message : String(err);
9921
+ await logRequestEvent({
9922
+ userId: user.id,
9923
+ source: "maps_search",
9924
+ status: "failed",
9925
+ query: [parsed.data.query, parsed.data.location].filter(Boolean).join(" "),
9926
+ location: parsed.data.location,
9927
+ error: msg
9928
+ });
9929
+ return mapsErrorResponse(c, msg, "maps_search_failed");
9930
+ } finally {
9931
+ await driver.close();
9932
+ }
9933
+ });
9673
9934
  mapsApp.post("/place", createApiKeyAuth(), async (c) => {
9674
9935
  const user = c.get("user");
9675
9936
  const body = await c.req.json().catch(() => ({}));
@@ -9736,10 +9997,7 @@ mapsApp.post("/place", createApiKeyAuth(), async (c) => {
9736
9997
  location: parsed.data.location,
9737
9998
  error: msg
9738
9999
  });
9739
- if (msg.includes("CAPTCHA") || msg.includes("blocked")) {
9740
- return c.json({ error: msg }, 503);
9741
- }
9742
- return c.json({ error: msg }, 500);
10000
+ return mapsErrorResponse(c, msg, "maps_place_failed");
9743
10001
  } finally {
9744
10002
  await driver.close();
9745
10003
  }
@@ -10419,8 +10677,8 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
10419
10677
  debug,
10420
10678
  serpOnly: true,
10421
10679
  headless: runtimeOptions.headless ?? true,
10422
- kernelApiKey: runtimeOptions.kernelApiKey ?? process.env.KERNEL_API_KEY?.trim(),
10423
- kernelProxyId: runtimeOptions.kernelProxyId ?? process.env.KERNEL_PROXY_ID?.trim(),
10680
+ kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
10681
+ kernelProxyId: runtimeOptions.kernelProxyId ?? browserServiceProxyId(),
10424
10682
  format: "json",
10425
10683
  outputDir: runtimeOptions.outputDir ?? "/tmp/serp-intelligence-output",
10426
10684
  signal: runtimeOptions.signal,
@@ -10431,7 +10689,7 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
10431
10689
  const pageSnapshotLimit = normalizePageSnapshotLimit(parsedInput);
10432
10690
  const pageSnapshotTargets = collectPageSnapshotTargets(harvestResult, pageSnapshotLimit);
10433
10691
  const pageSnapshotArtifacts = pageSnapshotTargets.length > 0 ? (await capturePageSnapshotsFn(pageSnapshotTargets, {
10434
- kernelApiKey: runtimeOptions.kernelApiKey ?? process.env.KERNEL_API_KEY?.trim(),
10692
+ kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
10435
10693
  timeoutMs: runtimeOptions.pageSnapshotTimeoutMs,
10436
10694
  maxConcurrency: runtimeOptions.pageSnapshotMaxConcurrency,
10437
10695
  debug,
@@ -10539,8 +10797,8 @@ serpIntelligenceApp.post("/capture", async (c) => {
10539
10797
  if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
10540
10798
  try {
10541
10799
  const result = await captureSerpIntelligenceSnapshot(parsed.data, {
10542
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
10543
- kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
10800
+ kernelApiKey: browserServiceApiKey(),
10801
+ kernelProxyId: browserServiceProxyId(),
10544
10802
  signal: c.req.raw.signal,
10545
10803
  billing: { creditsUsed: cost / 1e3 }
10546
10804
  });
@@ -10595,7 +10853,7 @@ serpIntelligenceApp.post("/page-snapshots", async (c) => {
10595
10853
  if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
10596
10854
  try {
10597
10855
  const result = await capturePageSnapshots(targets, {
10598
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
10856
+ kernelApiKey: browserServiceApiKey(),
10599
10857
  timeoutMs: parsed.data.timeoutMs,
10600
10858
  maxConcurrency: parsed.data.maxConcurrency,
10601
10859
  debug: parsed.data.debug
@@ -10631,6 +10889,7 @@ serpIntelligenceApp.post("/page-snapshots", async (c) => {
10631
10889
  // src/mcp/mcp-routes.ts
10632
10890
  import { Hono as Hono7 } from "hono";
10633
10891
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
10892
+ configureReportSaving(false);
10634
10893
  function mcpAuthError() {
10635
10894
  const body = JSON.stringify({
10636
10895
  jsonrpc: "2.0",
@@ -10657,15 +10916,18 @@ async function requireMcpCallerKey(c) {
10657
10916
  }
10658
10917
  var mcpApp = new Hono7();
10659
10918
  function registerSerpIntelligenceCaptureTools(server, executor) {
10660
- const serpExecutor = executor;
10661
10919
  server.registerTool("capture_serp_snapshot", {
10920
+ title: "SERP Intelligence Snapshot",
10662
10921
  description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized residential proxy evidence, configured for the static residential proxy, and none only for direct-network debugging. Set debug true when investigating location evidence, proxy behavior, CAPTCHA, or capture reliability.",
10663
- inputSchema: CaptureSerpSnapshotInputSchema
10664
- }, async (input) => serpExecutor.captureSerpSnapshot ? serpExecutor.captureSerpSnapshot(input) : Promise.resolve({ content: [{ type: "text", text: "{}" }], isError: true }));
10922
+ inputSchema: CaptureSerpSnapshotInputSchema,
10923
+ annotations: liveWebToolAnnotations("SERP Intelligence Snapshot")
10924
+ }, async (input) => executor.captureSerpSnapshot(input));
10665
10925
  server.registerTool("capture_serp_page_snapshots", {
10926
+ title: "SERP Intelligence Page Snapshots",
10666
10927
  description: "Capture public ranking-page evidence through POST /serp-intelligence/page-snapshots, the same product page snapshot path used by Phoenix. Provide urls for simple captures or targets when preserving organic, AI citation, local-pack, configured target, or site-subject source metadata. Private IPs, localhost, file URLs, and internal URLs are rejected by the service. Use timeoutMs for slow pages and debug true for sanitized proxy/browser diagnostics.",
10667
- inputSchema: CaptureSerpPageSnapshotsInputSchema
10668
- }, async (input) => serpExecutor.captureSerpPageSnapshots ? serpExecutor.captureSerpPageSnapshots(input) : Promise.resolve({ content: [{ type: "text", text: "{}" }], isError: true }));
10928
+ inputSchema: CaptureSerpPageSnapshotsInputSchema,
10929
+ annotations: liveWebToolAnnotations("SERP Intelligence Page Snapshots")
10930
+ }, async (input) => executor.captureSerpPageSnapshots(input));
10669
10931
  }
10670
10932
  mcpApp.all("/", async (c) => {
10671
10933
  try {
@@ -10678,7 +10940,7 @@ mcpApp.all("/", async (c) => {
10678
10940
  sessionIdGenerator: void 0,
10679
10941
  enableJsonResponse: true
10680
10942
  });
10681
- const server = buildPaaExtractorMcpServer(executor);
10943
+ const server = buildPaaExtractorMcpServer(executor, { savesReportsLocally: false });
10682
10944
  registerSerpIntelligenceCaptureTools(server, executor);
10683
10945
  await server.connect(transport);
10684
10946
  return transport.handleRequest(c.req.raw);
@@ -11304,7 +11566,7 @@ app.post("/harvest/sync", auth, async (c) => {
11304
11566
  try {
11305
11567
  const result = await harvest({
11306
11568
  ...options,
11307
- kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
11569
+ kernelApiKey: browserServiceApiKey(),
11308
11570
  headless: true,
11309
11571
  format: "json",
11310
11572
  outputDir: "/tmp/paa-output-api",
@@ -11319,7 +11581,7 @@ app.post("/harvest/sync", auth, async (c) => {
11319
11581
  if (diff > 0) await creditMc(user.id, diff, LedgerOperation.PAA_REFUND, "overestimate refund");
11320
11582
  else if (diff < 0) await debitMc(user.id, -diff, LedgerOperation.PAA, options.query);
11321
11583
  }
11322
- return c.json({ job_id: jobId, status: "done", result, attempts });
11584
+ return c.json({ job_id: jobId, status: "done", result: sanitizeHarvestResult(result), attempts: sanitizeAttempts(attempts) });
11323
11585
  } catch (err) {
11324
11586
  const problem = classifyHarvestProblem(err);
11325
11587
  const response = harvestProblemResponse(problem);
@@ -11327,18 +11589,19 @@ app.post("/harvest/sync", auth, async (c) => {
11327
11589
  if (problem.terminalStatus === "cancelled" || c.req.raw.signal.aborted) {
11328
11590
  await cancelJob(jobId, serializeHarvestProblem(problem));
11329
11591
  await creditMc(user.id, syncCost, LedgerOperation.REFUND, "cancelled call");
11330
- return c.json({ job_id: jobId, status: "cancelled", ...response, attempts }, problem.httpStatus);
11592
+ return c.json({ job_id: jobId, status: "cancelled", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
11331
11593
  }
11332
11594
  await failJob(jobId, serializeHarvestProblem(problem));
11333
11595
  await creditMc(user.id, syncCost, LedgerOperation.REFUND, "failed call");
11334
- return c.json({ job_id: jobId, status: "failed", ...response, attempts }, problem.httpStatus);
11596
+ return c.json({ job_id: jobId, status: "failed", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
11335
11597
  }
11336
11598
  });
11337
11599
  app.get("/jobs/:id", auth, async (c) => {
11338
11600
  const job = await getJob(c.req.param("id"), c.get("user").id);
11339
11601
  if (!job) return c.json({ error: "Job not found" }, 404);
11340
11602
  const attempts = await listHarvestAttempts(job.id, c.get("user").id);
11341
- return c.json({ ...job, attempts });
11603
+ const safeResult = job.result && typeof job.result === "object" ? sanitizeHarvestResult(job.result) : job.result;
11604
+ return c.json({ ...job, result: safeResult, attempts: sanitizeAttempts(attempts) });
11342
11605
  });
11343
11606
  app.get("/jobs", auth, async (c) => {
11344
11607
  return c.json(await listJobs(c.get("user").id));
@@ -11437,7 +11700,7 @@ app.post("/extract-url", auth, async (c) => {
11437
11700
  const { ok: euOk, balance_mc: euBal } = await debitMc(user.id, MC_COSTS.page_scrape, LedgerOperation.EXTRACT_URL, new URL(canonicalUrl).hostname);
11438
11701
  if (!euOk) return c.json(insufficientBalanceResponse(euBal, MC_COSTS.page_scrape), 402);
11439
11702
  try {
11440
- const kernelApiKey = process.env.KERNEL_API_KEY?.trim();
11703
+ const kernelApiKey = browserServiceApiKey();
11441
11704
  const device = screenshotDevice === "mobile" ? "mobile" : "desktop";
11442
11705
  const [result, pageData] = await Promise.all([
11443
11706
  extractKpo({ url: canonicalUrl, kernelApiKey }),
@@ -11475,7 +11738,7 @@ app.post("/map-urls", auth, async (c) => {
11475
11738
  startUrl: parsed.href,
11476
11739
  maxUrls: Math.min(2e3, Math.max(1, body.maxUrls ?? 500)),
11477
11740
  concurrency: Math.min(20, Math.max(1, body.concurrency ?? 12)),
11478
- kernelApiKey: body.browserFallback ?? body.kernelFallback ? process.env.KERNEL_API_KEY : void 0
11741
+ kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
11479
11742
  });
11480
11743
  await logRequestEvent({
11481
11744
  userId: user.id,
@@ -11515,7 +11778,7 @@ app.post("/extract-site", auth, async (c) => {
11515
11778
  const result = await extractSite({
11516
11779
  startUrl: parsed.href,
11517
11780
  maxPages: Math.min(200, Math.max(1, body.maxPages ?? 100)),
11518
- kernelApiKey: body.browserFallback ?? body.kernelFallback ? process.env.KERNEL_API_KEY : void 0
11781
+ kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
11519
11782
  });
11520
11783
  const pageCount = result.pages?.length ?? 1;
11521
11784
  const actualSiteMc = pageCount * MC_COSTS.page_scrape;
@@ -11662,7 +11925,7 @@ app.get("/cron/tick", async (c) => {
11662
11925
  if (!process.env.CRON_SECRET || secret2 !== `Bearer ${process.env.CRON_SECRET}`) {
11663
11926
  return c.json({ error: "Unauthorized" }, 401);
11664
11927
  }
11665
- const { drainQueue } = await import("./worker-UT4ZQU2T.js");
11928
+ const { drainQueue } = await import("./worker-AUCXFHEL.js");
11666
11929
  const budget = { maxJobs: 10, deadlineMs: Date.now() + 28e4 };
11667
11930
  const [results, sweepResult] = await Promise.all([
11668
11931
  drainQueue(budget),
@@ -11784,4 +12047,4 @@ app.get("/blog/:slug/", (c) => {
11784
12047
  export {
11785
12048
  app
11786
12049
  };
11787
- //# sourceMappingURL=server-2Y27U4TO.js.map
12050
+ //# sourceMappingURL=server-QXVVTKJP.js.map