mcp-scraper 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,26 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
30
30
  mod
31
31
  ));
32
32
 
33
+ // src/harvest-timeout.ts
34
+ function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
35
+ const requested = Number.isFinite(maxQuestions) && maxQuestions > 0 ? Math.trunc(maxQuestions) : 30;
36
+ let serverMs;
37
+ if (serpOnly || requested <= 50) serverMs = 11e4;
38
+ else if (requested <= 100) serverMs = 18e4;
39
+ else if (requested <= 150) serverMs = 24e4;
40
+ else serverMs = 28e4;
41
+ const clientMs = Math.min(serverMs + CLIENT_OVER_SERVER_MARGIN_MS, VERCEL_FUNCTION_MAX_MS - 5e3);
42
+ return { serverMs, clientMs };
43
+ }
44
+ var VERCEL_FUNCTION_MAX_MS, CLIENT_OVER_SERVER_MARGIN_MS;
45
+ var init_harvest_timeout = __esm({
46
+ "src/harvest-timeout.ts"() {
47
+ "use strict";
48
+ VERCEL_FUNCTION_MAX_MS = 3e5;
49
+ CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
50
+ }
51
+ });
52
+
33
53
  // src/blog/registry.ts
34
54
  var posts;
35
55
  var init_registry = __esm({
@@ -3825,25 +3845,73 @@ function firstFont(fontFamily) {
3825
3845
  const first = fontFamily.split(",")[0].trim().replace(/['"]/g, "");
3826
3846
  return first || null;
3827
3847
  }
3848
+ function dominantColor(freq) {
3849
+ return Object.entries(freq).filter(([hex]) => !isTransparentOrWhite(hex) && hex !== "#000000" && hex !== "#020101").sort((a, b) => b[1] - a[1])[0]?.[0] ?? null;
3850
+ }
3828
3851
  async function extractBrandingFromPage(page) {
3829
3852
  const evalScript = `
3830
3853
  (function() {
3831
3854
  function cs(el) { return el ? window.getComputedStyle(el) : null; }
3855
+ function toHex(rgb) {
3856
+ var m = rgb && rgb.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/);
3857
+ if (!m) return null;
3858
+ return '#' + [m[1],m[2],m[3]].map(function(v){ return ('0'+parseInt(v).toString(16)).slice(-2); }).join('');
3859
+ }
3860
+ function isUsable(hex) {
3861
+ if (!hex) return false;
3862
+ if (hex === '#000000' || hex === '#020101' || hex === '#ffffff' || hex === '#fffffe') return false;
3863
+ var r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
3864
+ return (0.2126*r + 0.7152*g + 0.0722*b) <= 230;
3865
+ }
3866
+
3832
3867
  var navEl = document.querySelector('nav, header, [role="banner"]');
3833
3868
  var bodyEl = document.body;
3834
3869
  var h1El = document.querySelector('h1');
3835
3870
  var btnEl = document.querySelector(
3836
3871
  'a.btn-primary, button.btn-primary, .btn-primary, .cta-btn,' +
3837
3872
  'a.button--primary, button.button--primary, [class*="btn-cta"],' +
3838
- '[class*="cta-button"], .wp-block-button__link, [class*="hero"] a'
3873
+ '[class*="cta-button"], .wp-block-button__link, [class*="hero"] a,' +
3874
+ '.elementor-button, .elementor-button-link,' +
3875
+ '.et_pb_button,' +
3876
+ '.fl-button,' +
3877
+ '.vc_btn,' +
3878
+ '[class*="cta"][href], [class*="get-started"], [class*="contact-btn"]'
3839
3879
  );
3840
3880
  var navStyle = cs(navEl);
3841
3881
  var bodyStyle = cs(bodyEl);
3842
3882
  var h1Style = cs(h1El);
3843
3883
  var btnStyle = cs(btnEl);
3844
- var pageHost = window.location.hostname.replace(/^www./, '');
3884
+
3885
+ var svgFreq = {};
3886
+ var svgScope = navEl || document.querySelector('header, [role="banner"]');
3887
+ if (svgScope) {
3888
+ var svgEls = svgScope.querySelectorAll('svg *, [fill], path, circle, rect, polygon, polyline');
3889
+ for (var si = 0; si < svgEls.length; si++) {
3890
+ var svgEl = svgEls[si];
3891
+ var fillComp = cs(svgEl) ? cs(svgEl).fill : null;
3892
+ var fillAttr = svgEl.getAttribute('fill');
3893
+ var fillHex = null;
3894
+ if (fillComp && fillComp !== 'none') { fillHex = toHex(fillComp); }
3895
+ else if (fillAttr && fillAttr !== 'none' && fillAttr.startsWith('#')) { fillHex = fillAttr; }
3896
+ if (fillHex && isUsable(fillHex)) { svgFreq[fillHex] = (svgFreq[fillHex] || 0) + 1; }
3897
+ }
3898
+ }
3899
+
3900
+ var navChildBgFreq = {};
3901
+ if (navEl) {
3902
+ var navChildren = navEl.querySelectorAll('li, a, button, [class*="menu-item"]');
3903
+ for (var ni = 0; ni < navChildren.length; ni++) {
3904
+ var nbg = cs(navChildren[ni]);
3905
+ if (nbg) {
3906
+ var bghex = toHex(nbg.backgroundColor);
3907
+ if (bghex && isUsable(bghex)) { navChildBgFreq[bghex] = (navChildBgFreq[bghex] || 0) + 1; }
3908
+ }
3909
+ }
3910
+ }
3911
+
3912
+ var pageHost = window.location.hostname.replace(/^www\\./, '');
3845
3913
  function isSameDomain(src) {
3846
- try { return new URL(src).hostname.replace(/^www./, '').endsWith(pageHost); } catch { return false; }
3914
+ try { return new URL(src).hostname.replace(/^www\\./, '').endsWith(pageHost); } catch { return false; }
3847
3915
  }
3848
3916
  var logoSelectors = [
3849
3917
  'header img[class*="logo"]', 'nav img[class*="logo"]',
@@ -3866,22 +3934,27 @@ async function extractBrandingFromPage(page) {
3866
3934
  'link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'
3867
3935
  );
3868
3936
  return {
3869
- navBg: navStyle ? navStyle.backgroundColor : null,
3870
- bodyBg: bodyStyle ? bodyStyle.backgroundColor : null,
3871
- bodyColor: bodyStyle ? bodyStyle.color : null,
3872
- h1Color: h1Style ? h1Style.color : null,
3873
- btnBg: btnStyle ? btnStyle.backgroundColor : null,
3874
- bodyFont: bodyStyle ? bodyStyle.fontFamily : null,
3875
- h1Font: h1Style ? h1Style.fontFamily : null,
3876
- logoSrc: logoSrc,
3877
- faviconHref: faviconEl ? faviconEl.href : null,
3937
+ navBg: navStyle ? navStyle.backgroundColor : null,
3938
+ bodyBg: bodyStyle ? bodyStyle.backgroundColor : null,
3939
+ bodyColor: bodyStyle ? bodyStyle.color : null,
3940
+ h1Color: h1Style ? h1Style.color : null,
3941
+ btnBg: btnStyle ? btnStyle.backgroundColor : null,
3942
+ bodyFont: bodyStyle ? bodyStyle.fontFamily : null,
3943
+ h1Font: h1Style ? h1Style.fontFamily : null,
3944
+ logoSrc: logoSrc,
3945
+ faviconHref: faviconEl ? faviconEl.href : null,
3946
+ svgFreq: svgFreq,
3947
+ navChildBgFreq: navChildBgFreq,
3878
3948
  };
3879
3949
  })()
3880
3950
  `;
3881
3951
  const raw = await page.evaluate(evalScript);
3882
3952
  const navBgHex = rgbToHex(raw.navBg ?? "");
3883
3953
  const bodyBgHex = rgbToHex(raw.bodyBg ?? "");
3884
- const primary = !isTransparentOrWhite(navBgHex) ? navBgHex : bodyBgHex;
3954
+ const navBgUsable = navBgHex && !isTransparentOrWhite(navBgHex) && navBgHex !== "#000000" && navBgHex !== "#020101" ? navBgHex : null;
3955
+ const svgPrimary = dominantColor(raw.svgFreq ?? {});
3956
+ const navChildBg = dominantColor(raw.navChildBgFreq ?? {});
3957
+ const primary = navBgUsable ?? svgPrimary ?? navChildBg ?? bodyBgHex;
3885
3958
  const accent = rgbToHex(raw.btnBg ?? "");
3886
3959
  const text = rgbToHex(raw.bodyColor ?? "");
3887
3960
  const heading = rgbToHex(raw.h1Color ?? "");
@@ -10700,237 +10773,833 @@ var init_FacebookAdExtractor = __esm({
10700
10773
  }
10701
10774
  });
10702
10775
 
10703
- // src/api/facebook-ad-routes.ts
10704
- function invalidRequest(message) {
10705
- return { error_code: "invalid_request", message };
10706
- }
10707
- async function detectSoftBlock(driver) {
10708
- const page = driver.getPage();
10709
- const bodyText = await page.evaluate(() => document.body?.innerText ?? "").catch(() => "");
10710
- return bodyText.length < 200 || /Log in|log in|Create new account|You must log in/.test(bodyText);
10711
- }
10712
- function buildPageIntelUrl(body, country) {
10713
- if (body.libraryId?.trim()) return `https://www.facebook.com/ads/library/?id=${FacebookAdExtractor.resolveLibraryId(body.libraryId.trim()) ?? body.libraryId.trim()}`;
10714
- if (body.pageId?.trim()) return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&is_targeted_country=false&media_type=all&search_type=page&view_all_page_id=${body.pageId.trim()}`;
10715
- return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
10716
- }
10717
- function kernelLaunchOpts() {
10718
- return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
10719
- }
10720
- var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
10721
- var init_facebook_ad_routes = __esm({
10722
- "src/api/facebook-ad-routes.ts"() {
10723
- "use strict";
10724
- import_hono4 = require("hono");
10725
- import_zod15 = require("zod");
10726
- init_db();
10727
- init_rates();
10728
- init_BrowserDriver();
10729
- init_FacebookAdExtractor();
10730
- import_client3 = require("@fal-ai/client");
10731
- init_api_auth();
10732
- init_url_utils();
10733
- FacebookAdBodySchema = import_zod15.z.object({
10734
- url: import_zod15.z.string().trim().optional(),
10735
- libraryId: import_zod15.z.string().trim().optional(),
10736
- openModal: import_zod15.z.boolean().optional()
10737
- }).refine((d) => !!d.url || !!d.libraryId, { message: "url or libraryId is required" });
10738
- FacebookPageIntelBodySchema = import_zod15.z.object({
10739
- pageId: import_zod15.z.string().trim().optional(),
10740
- query: import_zod15.z.string().trim().optional(),
10741
- libraryId: import_zod15.z.string().trim().optional(),
10742
- maxAds: import_zod15.z.number().int().min(1).max(200).optional(),
10743
- country: import_zod15.z.string().trim().toUpperCase().optional()
10744
- }).refine((d) => !!d.pageId || !!d.query || !!d.libraryId, {
10745
- message: "pageId, libraryId, or query is required"
10746
- });
10747
- FacebookTranscribeBodySchema = import_zod15.z.object({
10748
- videoUrl: import_zod15.z.string().trim().min(1, "videoUrl is required")
10749
- });
10750
- FacebookSearchBodySchema = import_zod15.z.object({
10751
- query: import_zod15.z.string().trim().min(1, "query is required"),
10752
- country: import_zod15.z.string().trim().toUpperCase().optional(),
10753
- maxResults: import_zod15.z.number().int().min(1).max(20).optional()
10754
- });
10755
- FacebookMediaBodySchema = import_zod15.z.object({
10756
- url: import_zod15.z.string().trim().min(1, "url is required"),
10757
- filename: import_zod15.z.string().trim().optional()
10758
- });
10759
- facebookAdApp = new import_hono4.Hono();
10760
- facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
10761
- const raw = await c.req.json().catch(() => ({}));
10762
- const parsed = FacebookAdBodySchema.safeParse(raw);
10763
- if (!parsed.success) {
10764
- return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
10765
- }
10766
- const body = parsed.data;
10767
- const raw2 = body.url?.trim() ?? body.libraryId?.trim() ?? "";
10768
- const libraryId = FacebookAdExtractor.resolveLibraryId(raw2);
10769
- if (!libraryId) return c.json({ error: "Could not resolve a valid Facebook Ad Library ID from the provided input" }, 400);
10770
- const fbUser = c.get("user");
10771
- const { ok: adOk, balance_mc: adBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, raw2);
10772
- if (!adOk) return c.json(insufficientBalanceResponse(adBal, MC_COSTS.fb_ad), 402);
10773
- const driver = new BrowserDriver();
10774
- try {
10775
- await driver.launch(kernelLaunchOpts());
10776
- const extractor = new FacebookAdExtractor(driver);
10777
- const result = await extractor.extract(libraryId, { openModal: body.openModal !== false });
10778
- await logRequestEvent({
10779
- userId: fbUser.id,
10780
- source: "facebook_ad",
10781
- status: "done",
10782
- query: raw2,
10783
- resultCount: Array.isArray(result.variants) ? result.variants.length : null,
10784
- result
10785
- });
10786
- return c.json(result);
10787
- } catch (err) {
10788
- await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
10789
- const msg = err instanceof Error ? err.message : String(err);
10790
- await logRequestEvent({ userId: fbUser.id, source: "facebook_ad", status: "failed", query: raw2, error: msg });
10791
- if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
10792
- return c.json({ error: msg }, 503);
10793
- }
10794
- return c.json({ error: msg }, 500);
10795
- } finally {
10796
- await driver.close();
10797
- }
10798
- });
10799
- facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
10800
- const raw = await c.req.json().catch(() => ({}));
10801
- const parsed = FacebookPageIntelBodySchema.safeParse(raw);
10802
- if (!parsed.success) {
10803
- return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
10804
- }
10805
- const body = parsed.data;
10806
- const maxAds = Math.min(200, Math.max(1, body.maxAds ?? 50));
10807
- const country = body.country?.trim().toUpperCase() ?? "US";
10808
- const listingUrl = buildPageIntelUrl(body, country);
10809
- const fbUser = c.get("user");
10810
- const { ok: fbOk, balance_mc: fbBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, body.pageId ?? body.query ?? body.libraryId ?? "");
10811
- if (!fbOk) return c.json(insufficientBalanceResponse(fbBal, MC_COSTS.fb_ad), 402);
10812
- const driver = new BrowserDriver();
10813
- let refunded = false;
10776
+ // src/extractor/FacebookAdGraphql.ts
10777
+ function parseFbGraphqlJson(text) {
10778
+ const out = [];
10779
+ const body = text.replace(/^for\s*\(;;\);/, "").trim();
10780
+ try {
10781
+ out.push(JSON.parse(body));
10782
+ return out;
10783
+ } catch {
10784
+ for (const line of body.split("\n")) {
10785
+ const trimmed = line.trim();
10786
+ if (!trimmed) continue;
10814
10787
  try {
10815
- await driver.launch(kernelLaunchOpts());
10816
- await driver.navigateTo(listingUrl);
10817
- const extractor = new FacebookAdExtractor(driver);
10818
- const result = await extractor.extractPageIntel(listingUrl, maxAds);
10819
- if (result.ads.length === 0 && await detectSoftBlock(driver)) {
10820
- await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "soft-block empty result");
10821
- refunded = true;
10822
- await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: "soft-block: empty result refunded" });
10823
- return c.json({ error: "soft-block: no ads returned (refunded)" }, 503);
10824
- }
10825
- await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "done", query: body.pageId ?? body.query ?? body.libraryId ?? "", resultCount: result.ads.length, result });
10826
- return c.json(result);
10827
- } catch (err) {
10828
- const msg = err instanceof Error ? err.message : String(err);
10829
- if (!refunded) await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
10830
- await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: msg });
10831
- if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
10832
- return c.json({ error: msg }, 503);
10833
- }
10834
- return c.json({ error: msg }, 500);
10835
- } finally {
10836
- await driver.close();
10837
- }
10838
- });
10839
- facebookAdApp.post("/transcribe", createApiKeyAuth(), async (c) => {
10840
- const raw = await c.req.json().catch(() => ({}));
10841
- const parsed = FacebookTranscribeBodySchema.safeParse(raw);
10842
- if (!parsed.success) {
10843
- return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
10844
- }
10845
- const body = parsed.data;
10846
- const urlCheck = await validatePublicHttpUrl(body.videoUrl, { field: "videoUrl", requireHttps: false });
10847
- if (urlCheck.error) {
10848
- return c.json(invalidRequest(urlCheck.error), 400);
10788
+ out.push(JSON.parse(trimmed));
10789
+ } catch {
10790
+ continue;
10849
10791
  }
10850
- const videoUrl = urlCheck.parsed.href;
10851
- const fbUser = c.get("user");
10852
- const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE, videoUrl);
10853
- if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_transcribe), 402);
10854
- import_client3.fal.config({ credentials: process.env.FAL_KEY });
10855
- try {
10856
- const startMs = Date.now();
10857
- const result = await import_client3.fal.subscribe("fal-ai/wizper", {
10858
- input: { audio_url: videoUrl, task: "transcribe", language: "en" },
10859
- logs: false,
10860
- pollInterval: 3e3
10861
- });
10862
- const data = result.data;
10863
- const text = data.text ?? "";
10864
- const chunks = data.chunks ?? [];
10865
- const durationMs = Date.now() - startMs;
10866
- const fmtTs2 = (s) => `${Math.floor(s / 60)}:${String(Math.floor(s % 60)).padStart(2, "0")}`;
10867
- const lines = ["# Facebook Ad Transcript", "", `*Transcribed in ${(durationMs / 1e3).toFixed(1)}s*`, "", "## Full Text", "", text, ""];
10868
- if (chunks.length) {
10869
- lines.push("## Timestamped Segments", "");
10870
- for (const ch of chunks) {
10871
- lines.push(`**[${fmtTs2(ch.timestamp[0])} \u2192 ${fmtTs2(ch.timestamp[1])}]** ${ch.text.trim()}`, "");
10872
- }
10792
+ }
10793
+ return out;
10794
+ }
10795
+ }
10796
+ function extractCollatedResults(payload) {
10797
+ const root = payload;
10798
+ const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
10799
+ const results = [];
10800
+ for (const edge of edges) {
10801
+ const node = edge?.node;
10802
+ for (const raw of node?.collated_results ?? []) {
10803
+ const r = raw;
10804
+ const id = r.ad_archive_id;
10805
+ if (id === void 0 || id === null) continue;
10806
+ const snapshot = r.snapshot ?? null;
10807
+ results.push({
10808
+ ad_archive_id: String(id),
10809
+ page_id: r.page_id != null ? String(r.page_id) : "",
10810
+ page_name: r.page_name ?? snapshot?.page_name ?? "",
10811
+ is_active: Boolean(r.is_active),
10812
+ collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
10813
+ snapshot
10814
+ });
10815
+ }
10816
+ }
10817
+ return results;
10818
+ }
10819
+ async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
10820
+ const captureMs = opts.captureMs ?? 3e4;
10821
+ const collected = [];
10822
+ const seen = /* @__PURE__ */ new Set();
10823
+ const handler = (resp) => {
10824
+ if (!resp.url().includes("/api/graphql")) return;
10825
+ const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
10826
+ if (friendlyName !== AD_LIBRARY_QUERY) return;
10827
+ void resp.text().then((text) => {
10828
+ for (const payload of parseFbGraphqlJson(text)) {
10829
+ for (const result of extractCollatedResults(payload)) {
10830
+ if (seen.has(result.ad_archive_id)) continue;
10831
+ seen.add(result.ad_archive_id);
10832
+ collected.push(result);
10873
10833
  }
10874
- await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "done", query: videoUrl, resultCount: chunks.length, result: { text, chunks, durationMs } });
10875
- return c.json({ text, chunks, durationMs, markdown: lines.join("\n") });
10876
- } catch (err) {
10877
- const msg = err instanceof Error ? err.message : String(err);
10878
- await creditMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE_REFUND, "failed call");
10879
- await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "failed", query: videoUrl, error: msg });
10880
- return c.json({ error: msg }, 500);
10881
10834
  }
10882
- });
10883
- facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
10884
- const raw = await c.req.json().catch(() => ({}));
10885
- const parsed = FacebookSearchBodySchema.safeParse(raw);
10886
- if (!parsed.success) {
10887
- return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
10835
+ }).catch(() => void 0);
10836
+ };
10837
+ page.on("response", handler);
10838
+ try {
10839
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
10840
+ const deadline = Date.now() + captureMs;
10841
+ let lastCount = -1;
10842
+ let stableRounds = 0;
10843
+ while (Date.now() < deadline && collected.length < maxResults) {
10844
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
10845
+ await page.waitForTimeout(2e3);
10846
+ if (collected.length === lastCount) {
10847
+ stableRounds++;
10848
+ if (stableRounds >= 2 && collected.length > 0) break;
10849
+ } else {
10850
+ stableRounds = 0;
10888
10851
  }
10889
- const body = parsed.data;
10890
- const country = body.country?.trim().toUpperCase() ?? "US";
10891
- const maxResults = Math.min(20, Math.max(1, body.maxResults ?? 10));
10892
- const searchUrl = `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
10893
- const fbUser = c.get("user");
10894
- const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH, body.query.trim());
10895
- if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_search), 402);
10896
- const driver = new BrowserDriver();
10897
- let searchRefunded = false;
10898
- try {
10899
- await driver.launch(kernelLaunchOpts());
10900
- const page = driver.getPage();
10901
- await driver.navigateTo(searchUrl);
10902
- try {
10903
- await page.waitForFunction(
10904
- () => {
10905
- const bt = document.body ? document.body.innerText ?? "" : "";
10906
- return bt.includes("Library ID") || bt.includes("No results");
10907
- },
10908
- { timeout: 2e4, polling: 500 }
10909
- );
10910
- } catch {
10911
- }
10912
- await page.waitForTimeout(1500);
10913
- for (let scroll = 0; scroll < 3; scroll++) {
10914
- await page.evaluate(() => {
10915
- if (document.body) window.scrollTo(0, document.body.scrollHeight);
10916
- });
10917
- await page.waitForTimeout(1e3);
10918
- }
10919
- const rawBodyText = await page.evaluate(() => document.body?.innerText ?? "");
10920
- if (rawBodyText.length < 200 || /Log in|You must log in|Create new account/.test(rawBodyText)) {
10921
- await creditMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH_REFUND, "soft-block empty result");
10922
- searchRefunded = true;
10923
- await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "failed", query: body.query.trim(), error: "soft-block: empty result refunded" });
10924
- return c.json({ error: "soft-block: no results returned (refunded)" }, 503);
10925
- }
10926
- const bodyText = rawBodyText.replace(/​/g, " ").replace(/\s+/g, " ");
10927
- const adChunks = [];
10928
- const splitRe = /(?=(?:Active|Inactive)\s+Library ID[:\s]+\d{10,20})/g;
10929
- let last = 0;
10930
- let m;
10931
- while ((m = splitRe.exec(bodyText)) !== null) {
10932
- if (m.index > last) adChunks.push(bodyText.slice(last, m.index));
10933
- last = m.index;
10852
+ lastCount = collected.length;
10853
+ }
10854
+ } finally {
10855
+ page.off("response", handler);
10856
+ }
10857
+ return collected.slice(0, maxResults);
10858
+ }
10859
+ function advertisersFromResults(results, maxResults) {
10860
+ const byPage = /* @__PURE__ */ new Map();
10861
+ for (const r of results) {
10862
+ if (!r.page_id || !r.page_name) continue;
10863
+ const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
10864
+ const existing = byPage.get(r.page_id);
10865
+ if (existing) {
10866
+ existing.resultCount++;
10867
+ existing.maxCollation = Math.max(existing.maxCollation, collation);
10868
+ } else {
10869
+ byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
10870
+ }
10871
+ }
10872
+ return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
10873
+ }
10874
+ var AD_LIBRARY_QUERY;
10875
+ var init_FacebookAdGraphql = __esm({
10876
+ "src/extractor/FacebookAdGraphql.ts"() {
10877
+ "use strict";
10878
+ AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
10879
+ }
10880
+ });
10881
+
10882
+ // src/locations.ts
10883
+ var LOCATIONS;
10884
+ var init_locations = __esm({
10885
+ "src/locations.ts"() {
10886
+ "use strict";
10887
+ LOCATIONS = {
10888
+ "austin": "Austin,Texas,United States",
10889
+ "new york": "New York,New York,United States",
10890
+ "new york city": "New York,New York,United States",
10891
+ "nyc": "New York,New York,United States",
10892
+ "los angeles": "Los Angeles,California,United States",
10893
+ "la": "Los Angeles,California,United States",
10894
+ "chicago": "Chicago,Illinois,United States",
10895
+ "houston": "Houston,Texas,United States",
10896
+ "phoenix": "Phoenix,Arizona,United States",
10897
+ "philadelphia": "Philadelphia,Pennsylvania,United States",
10898
+ "philly": "Philadelphia,Pennsylvania,United States",
10899
+ "san antonio": "San Antonio,Texas,United States",
10900
+ "dallas": "Dallas,Texas,United States",
10901
+ "miami": "Miami,Florida,United States",
10902
+ "seattle": "Seattle,Washington,United States",
10903
+ "denver": "Denver,Colorado,United States",
10904
+ "loveland": "Loveland,Colorado,United States",
10905
+ "loveland co": "Loveland,Colorado,United States",
10906
+ "fort collins": "Fort Collins,Colorado,United States",
10907
+ "boulder": "Boulder,Colorado,United States",
10908
+ "colorado springs": "Colorado Springs,Colorado,United States",
10909
+ "boston": "Boston,Massachusetts,United States",
10910
+ "atlanta": "Atlanta,Georgia,United States",
10911
+ "san francisco": "San Francisco,California,United States",
10912
+ "sf": "San Francisco,California,United States",
10913
+ "portland": "Portland,Oregon,United States",
10914
+ "las vegas": "Las Vegas,Nevada,United States",
10915
+ "minneapolis": "Minneapolis,Minnesota,United States",
10916
+ "detroit": "Detroit,Michigan,United States",
10917
+ "nashville": "Nashville,Tennessee,United States",
10918
+ "charlotte": "Charlotte,North Carolina,United States",
10919
+ "orlando": "Orlando,Florida,United States",
10920
+ "san diego": "San Diego,California,United States",
10921
+ "baltimore": "Baltimore,Maryland,United States",
10922
+ "sacramento": "Sacramento,California,United States",
10923
+ "columbus": "Columbus,Ohio,United States",
10924
+ "indianapolis": "Indianapolis,Indiana,United States",
10925
+ "san jose": "San Jose,California,United States",
10926
+ "fort worth": "Fort Worth,Texas,United States",
10927
+ "jacksonville": "Jacksonville,Florida,United States",
10928
+ "memphis": "Memphis,Tennessee,United States",
10929
+ "louisville": "Louisville,Kentucky,United States",
10930
+ "raleigh": "Raleigh,North Carolina,United States",
10931
+ "richmond": "Richmond,Virginia,United States",
10932
+ "salt lake city": "Salt Lake City,Utah,United States",
10933
+ "toronto": "Toronto,Ontario,Canada",
10934
+ "vancouver": "Vancouver,British Columbia,Canada",
10935
+ "montreal": "Montreal,Quebec,Canada",
10936
+ "calgary": "Calgary,Alberta,Canada",
10937
+ "ottawa": "Ottawa,Ontario,Canada",
10938
+ "london": "London,England,United Kingdom",
10939
+ "manchester": "Manchester,England,United Kingdom",
10940
+ "birmingham": "Birmingham,England,United Kingdom",
10941
+ "edinburgh": "Edinburgh,Scotland,United Kingdom",
10942
+ "glasgow": "Glasgow,Scotland,United Kingdom",
10943
+ "leeds": "Leeds,England,United Kingdom",
10944
+ "sydney": "Sydney,New South Wales,Australia",
10945
+ "melbourne": "Melbourne,Victoria,Australia",
10946
+ "brisbane": "Brisbane,Queensland,Australia",
10947
+ "perth": "Perth,Western Australia,Australia",
10948
+ "adelaide": "Adelaide,South Australia,Australia",
10949
+ "dublin": "Dublin,Leinster,Ireland"
10950
+ };
10951
+ }
10952
+ });
10953
+
10954
+ // src/uule.ts
10955
+ function encodeVarint(value) {
10956
+ const bytes = [];
10957
+ let remaining = value;
10958
+ do {
10959
+ let byte = remaining & 127;
10960
+ remaining >>>= 7;
10961
+ if (remaining > 0) byte |= 128;
10962
+ bytes.push(byte);
10963
+ } while (remaining > 0);
10964
+ return bytes;
10965
+ }
10966
+ function encodeUule(name) {
10967
+ const locationBytes = Buffer.from(name, "utf8");
10968
+ const payload = Buffer.concat([
10969
+ Buffer.from([8, 2, 16, 32, 34]),
10970
+ Buffer.from(encodeVarint(locationBytes.length)),
10971
+ locationBytes
10972
+ ]);
10973
+ return `w+${payload.toString("base64")}`;
10974
+ }
10975
+ function normalizeLocation(input) {
10976
+ const raw = input.toLowerCase().trim();
10977
+ if (LOCATIONS[raw]) return LOCATIONS[raw];
10978
+ const beforeComma = raw.split(",")[0].trim();
10979
+ if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
10980
+ const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
10981
+ if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
10982
+ return input;
10983
+ }
10984
+ var init_uule = __esm({
10985
+ "src/uule.ts"() {
10986
+ "use strict";
10987
+ init_locations();
10988
+ }
10989
+ });
10990
+
10991
+ // src/kernel-proxy-resolver.ts
10992
+ function proxyIdSuffix2(proxyId) {
10993
+ return proxyId ? proxyId.slice(-6) : null;
10994
+ }
10995
+ function resolution(source, proxyMode, proxyId, target, error) {
10996
+ return {
10997
+ kernelProxyId: proxyId,
10998
+ resolution: {
10999
+ source,
11000
+ proxyMode,
11001
+ proxyIdPresent: Boolean(proxyId),
11002
+ proxyIdSuffix: proxyIdSuffix2(proxyId),
11003
+ target,
11004
+ error
11005
+ }
11006
+ };
11007
+ }
11008
+ function normalizeStateName(value) {
11009
+ return value.trim().toLowerCase().replace(/\s+/g, " ");
11010
+ }
11011
+ function normalizeCountryName(value) {
11012
+ return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
11013
+ }
11014
+ function isUnitedStates(country) {
11015
+ if (!country) return true;
11016
+ const normalized = normalizeCountryName(country);
11017
+ return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
11018
+ }
11019
+ function stateCodeFor(region) {
11020
+ const trimmed = region.trim();
11021
+ if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
11022
+ return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
11023
+ }
11024
+ function kernelCityIdentifierCandidates(city) {
11025
+ const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
11026
+ const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
11027
+ const underscored = words.join("_");
11028
+ const compact = words.join("");
11029
+ return Array.from(new Set([underscored, compact].filter(Boolean)));
11030
+ }
11031
+ function proxyName(country, state, city) {
11032
+ return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
11033
+ }
11034
+ function zipProxyName(zip) {
11035
+ return `mcp-serp-residential-us-zip-${zip}`;
11036
+ }
11037
+ function parseKernelLocationProxyTarget(location, gl) {
11038
+ if (!location || gl.toLowerCase() !== "us") return null;
11039
+ const canonicalLocation = normalizeLocation(location);
11040
+ let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
11041
+ if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
11042
+ parts = parts.slice(0, -1);
11043
+ }
11044
+ if (parts.length === 1) {
11045
+ const stateOnly = stateCodeFor(parts[0]);
11046
+ if (!stateOnly) return null;
11047
+ return {
11048
+ canonicalLocation,
11049
+ level: "state",
11050
+ country: "US",
11051
+ state: stateOnly,
11052
+ city: "",
11053
+ cityCandidates: [],
11054
+ proxyName: proxyName("US", stateOnly),
11055
+ config: {
11056
+ country: "US",
11057
+ state: stateOnly
11058
+ }
11059
+ };
11060
+ }
11061
+ const [city = "", region = ""] = parts;
11062
+ if (!city || !region) return null;
11063
+ const state = stateCodeFor(region);
11064
+ if (!state) return null;
11065
+ const cityCandidates = kernelCityIdentifierCandidates(city);
11066
+ const primaryCity = cityCandidates[0];
11067
+ if (!primaryCity) return null;
11068
+ return {
11069
+ canonicalLocation,
11070
+ level: "city",
11071
+ country: "US",
11072
+ state,
11073
+ city: primaryCity,
11074
+ cityCandidates,
11075
+ proxyName: proxyName("US", state, primaryCity),
11076
+ config: {
11077
+ country: "US",
11078
+ state,
11079
+ city: primaryCity
11080
+ }
11081
+ };
11082
+ }
11083
+ function cityZipKey(target) {
11084
+ return `${target.city}|${target.state}`;
11085
+ }
11086
+ function knownZipFor(target, explicitZip) {
11087
+ if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
11088
+ return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
11089
+ }
11090
+ function zipTarget(target, zip) {
11091
+ return {
11092
+ ...target,
11093
+ level: "zip",
11094
+ zip,
11095
+ proxyName: zipProxyName(zip),
11096
+ config: {
11097
+ country: target.country,
11098
+ state: target.state,
11099
+ zip
11100
+ }
11101
+ };
11102
+ }
11103
+ function configMatches(config, target, city) {
11104
+ if (target.level === "zip") {
11105
+ return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
11106
+ }
11107
+ return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
11108
+ }
11109
+ function findExistingTargetProxy(proxies, target) {
11110
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
11111
+ }
11112
+ function findExistingProxy(proxies, target) {
11113
+ for (const city of target.cityCandidates) {
11114
+ const name = proxyName(target.country, target.state, city);
11115
+ const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
11116
+ if (found) return found;
11117
+ }
11118
+ return null;
11119
+ }
11120
+ function stateTarget(target) {
11121
+ return {
11122
+ ...target,
11123
+ level: "state",
11124
+ proxyName: proxyName(target.country, target.state),
11125
+ config: {
11126
+ country: target.country,
11127
+ state: target.state
11128
+ }
11129
+ };
11130
+ }
11131
+ function findExistingStateProxy(proxies, target) {
11132
+ const name = proxyName(target.country, target.state);
11133
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
11134
+ }
11135
+ function escalatedTargetLevel(target, attemptIndex) {
11136
+ return stateTarget(target);
11137
+ }
11138
+ function errorText2(err) {
11139
+ return err instanceof Error ? err.message : String(err);
11140
+ }
11141
+ async function resolveKernelProxyId(options) {
11142
+ if (options.proxyMode === "none") {
11143
+ return resolution("disabled", options.proxyMode, void 0, null, null);
11144
+ }
11145
+ if (options.proxyMode === "configured") {
11146
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
11147
+ }
11148
+ const target = parseKernelLocationProxyTarget(options.location, options.gl);
11149
+ if (!target || !options.kernelApiKey) {
11150
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
11151
+ }
11152
+ const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
11153
+ try {
11154
+ const attemptIndex = options.attemptIndex ?? 0;
11155
+ if (attemptIndex >= 1) {
11156
+ const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
11157
+ const createErrors2 = [];
11158
+ try {
11159
+ const created = await kernel.proxies.create({
11160
+ type: "residential",
11161
+ name: escalatedTarget.proxyName,
11162
+ config: escalatedTarget.config
11163
+ });
11164
+ if (created.id) {
11165
+ return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
11166
+ }
11167
+ createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
11168
+ } catch (err) {
11169
+ createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
11170
+ }
11171
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
11172
+ }
11173
+ const proxies = await kernel.proxies.list();
11174
+ const zip = knownZipFor(target, options.proxyZip);
11175
+ const createErrors = [];
11176
+ if (zip) {
11177
+ const targetZip = zipTarget(target, zip);
11178
+ const existingZip = findExistingTargetProxy(proxies, targetZip);
11179
+ if (existingZip?.id) {
11180
+ return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
11181
+ }
11182
+ try {
11183
+ const created = await kernel.proxies.create({
11184
+ type: "residential",
11185
+ name: targetZip.proxyName,
11186
+ config: {
11187
+ country: targetZip.country,
11188
+ zip
11189
+ }
11190
+ });
11191
+ if (created.id) {
11192
+ return resolution("location_created", options.proxyMode, created.id, targetZip, null);
11193
+ }
11194
+ createErrors.push(`${zip}: Kernel did not return a proxy id`);
11195
+ } catch (err) {
11196
+ createErrors.push(`${zip}: ${errorText2(err)}`);
11197
+ }
11198
+ }
11199
+ const existing = findExistingProxy(proxies, target);
11200
+ if (existing?.id) {
11201
+ return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
11202
+ }
11203
+ for (const city of target.cityCandidates) {
11204
+ try {
11205
+ const created = await kernel.proxies.create({
11206
+ type: "residential",
11207
+ name: proxyName(target.country, target.state, city),
11208
+ config: {
11209
+ country: target.country,
11210
+ state: target.state,
11211
+ city
11212
+ }
11213
+ });
11214
+ if (created.id) {
11215
+ return resolution("location_created", options.proxyMode, created.id, {
11216
+ ...target,
11217
+ level: "city",
11218
+ city,
11219
+ proxyName: proxyName(target.country, target.state, city),
11220
+ config: {
11221
+ country: target.country,
11222
+ state: target.state,
11223
+ city
11224
+ }
11225
+ }, null);
11226
+ }
11227
+ createErrors.push(`${city}: Kernel did not return a proxy id`);
11228
+ } catch (err) {
11229
+ createErrors.push(`${city}: ${errorText2(err)}`);
11230
+ }
11231
+ }
11232
+ const fallbackTarget = stateTarget(target);
11233
+ const existingState = findExistingStateProxy(proxies, fallbackTarget);
11234
+ if (existingState?.id) {
11235
+ return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
11236
+ }
11237
+ try {
11238
+ const created = await kernel.proxies.create({
11239
+ type: "residential",
11240
+ name: fallbackTarget.proxyName,
11241
+ config: fallbackTarget.config
11242
+ });
11243
+ if (created.id) {
11244
+ return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
11245
+ }
11246
+ createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
11247
+ } catch (err) {
11248
+ createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
11249
+ }
11250
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
11251
+ } catch (err) {
11252
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
11253
+ }
11254
+ }
11255
+ var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
11256
+ var init_kernel_proxy_resolver = __esm({
11257
+ "src/kernel-proxy-resolver.ts"() {
11258
+ "use strict";
11259
+ import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
11260
+ init_uule();
11261
+ US_STATE_CODES = {
11262
+ alabama: "AL",
11263
+ alaska: "AK",
11264
+ arizona: "AZ",
11265
+ arkansas: "AR",
11266
+ california: "CA",
11267
+ colorado: "CO",
11268
+ connecticut: "CT",
11269
+ delaware: "DE",
11270
+ florida: "FL",
11271
+ georgia: "GA",
11272
+ hawaii: "HI",
11273
+ idaho: "ID",
11274
+ illinois: "IL",
11275
+ indiana: "IN",
11276
+ iowa: "IA",
11277
+ kansas: "KS",
11278
+ kentucky: "KY",
11279
+ louisiana: "LA",
11280
+ maine: "ME",
11281
+ maryland: "MD",
11282
+ massachusetts: "MA",
11283
+ michigan: "MI",
11284
+ minnesota: "MN",
11285
+ mississippi: "MS",
11286
+ missouri: "MO",
11287
+ montana: "MT",
11288
+ nebraska: "NE",
11289
+ nevada: "NV",
11290
+ "new hampshire": "NH",
11291
+ "new jersey": "NJ",
11292
+ "new mexico": "NM",
11293
+ "new york": "NY",
11294
+ "north carolina": "NC",
11295
+ "north dakota": "ND",
11296
+ ohio: "OH",
11297
+ oklahoma: "OK",
11298
+ oregon: "OR",
11299
+ pennsylvania: "PA",
11300
+ "rhode island": "RI",
11301
+ "south carolina": "SC",
11302
+ "south dakota": "SD",
11303
+ tennessee: "TN",
11304
+ texas: "TX",
11305
+ utah: "UT",
11306
+ vermont: "VT",
11307
+ virginia: "VA",
11308
+ washington: "WA",
11309
+ "west virginia": "WV",
11310
+ wisconsin: "WI",
11311
+ wyoming: "WY"
11312
+ };
11313
+ US_CITY_CENTER_ZIPS = {
11314
+ "atlanta|GA": "30303",
11315
+ "austin|TX": "78701",
11316
+ "baltimore|MD": "21201",
11317
+ "boston|MA": "02108",
11318
+ "boulder|CO": "80302",
11319
+ "charlotte|NC": "28202",
11320
+ "chicago|IL": "60601",
11321
+ "colorado_springs|CO": "80903",
11322
+ "columbus|OH": "43215",
11323
+ "dallas|TX": "75201",
11324
+ "denver|CO": "80202",
11325
+ "detroit|MI": "48226",
11326
+ "fort_collins|CO": "80524",
11327
+ "fort_worth|TX": "76102",
11328
+ "houston|TX": "77002",
11329
+ "indianapolis|IN": "46204",
11330
+ "jacksonville|FL": "32202",
11331
+ "las_vegas|NV": "89101",
11332
+ "los_angeles|CA": "90012",
11333
+ "louisville|KY": "40202",
11334
+ "loveland|CO": "80537",
11335
+ "memphis|TN": "38103",
11336
+ "miami|FL": "33131",
11337
+ "minneapolis|MN": "55401",
11338
+ "nashville|TN": "37203",
11339
+ "new_york|NY": "10001",
11340
+ "orlando|FL": "32801",
11341
+ "philadelphia|PA": "19103",
11342
+ "phoenix|AZ": "85004",
11343
+ "portland|OR": "97205",
11344
+ "raleigh|NC": "27601",
11345
+ "richmond|VA": "23219",
11346
+ "sacramento|CA": "95814",
11347
+ "salt_lake_city|UT": "84101",
11348
+ "san_antonio|TX": "78205",
11349
+ "san_diego|CA": "92101",
11350
+ "san_francisco|CA": "94103",
11351
+ "san_jose|CA": "95113",
11352
+ "seattle|WA": "98101"
11353
+ };
11354
+ }
11355
+ });
11356
+
11357
+ // src/api/facebook-ad-routes.ts
11358
+ function invalidRequest(message) {
11359
+ return { error_code: "invalid_request", message };
11360
+ }
11361
+ async function detectSoftBlock(driver) {
11362
+ const page = driver.getPage();
11363
+ const bodyText = await page.evaluate(() => document.body?.innerText ?? "").catch(() => "");
11364
+ return bodyText.length < 200 || /Log in|log in|Create new account|You must log in/.test(bodyText);
11365
+ }
11366
+ function buildPageIntelUrl(body, country) {
11367
+ if (body.libraryId?.trim()) return `https://www.facebook.com/ads/library/?id=${FacebookAdExtractor.resolveLibraryId(body.libraryId.trim()) ?? body.libraryId.trim()}`;
11368
+ if (body.pageId?.trim()) return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&is_targeted_country=false&media_type=all&search_type=page&view_all_page_id=${body.pageId.trim()}`;
11369
+ return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
11370
+ }
11371
+ function kernelLaunchOpts() {
11372
+ return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
11373
+ }
11374
+ async function kernelLaunchOptsResidential() {
11375
+ let proxyId = process.env.KERNEL_PROXY_ID?.trim();
11376
+ try {
11377
+ const resolution2 = await resolveKernelProxyId({
11378
+ kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
11379
+ proxyMode: "location",
11380
+ configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
11381
+ location: "New York, NY",
11382
+ gl: "us"
11383
+ });
11384
+ if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
11385
+ } catch {
11386
+ proxyId = process.env.KERNEL_PROXY_ID?.trim();
11387
+ }
11388
+ return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
11389
+ }
11390
+ var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
11391
+ var init_facebook_ad_routes = __esm({
11392
+ "src/api/facebook-ad-routes.ts"() {
11393
+ "use strict";
11394
+ import_hono4 = require("hono");
11395
+ import_zod15 = require("zod");
11396
+ init_db();
11397
+ init_rates();
11398
+ init_BrowserDriver();
11399
+ init_FacebookAdExtractor();
11400
+ init_FacebookAdGraphql();
11401
+ init_kernel_proxy_resolver();
11402
+ import_client3 = require("@fal-ai/client");
11403
+ init_api_auth();
11404
+ init_url_utils();
11405
+ FacebookAdBodySchema = import_zod15.z.object({
11406
+ url: import_zod15.z.string().trim().optional(),
11407
+ libraryId: import_zod15.z.string().trim().optional(),
11408
+ openModal: import_zod15.z.boolean().optional()
11409
+ }).refine((d) => !!d.url || !!d.libraryId, { message: "url or libraryId is required" });
11410
+ FacebookPageIntelBodySchema = import_zod15.z.object({
11411
+ pageId: import_zod15.z.string().trim().optional(),
11412
+ query: import_zod15.z.string().trim().optional(),
11413
+ libraryId: import_zod15.z.string().trim().optional(),
11414
+ maxAds: import_zod15.z.number().int().min(1).max(200).optional(),
11415
+ country: import_zod15.z.string().trim().toUpperCase().optional()
11416
+ }).refine((d) => !!d.pageId || !!d.query || !!d.libraryId, {
11417
+ message: "pageId, libraryId, or query is required"
11418
+ });
11419
+ FacebookTranscribeBodySchema = import_zod15.z.object({
11420
+ videoUrl: import_zod15.z.string().trim().min(1, "videoUrl is required")
11421
+ });
11422
+ FacebookSearchBodySchema = import_zod15.z.object({
11423
+ query: import_zod15.z.string().trim().min(1, "query is required"),
11424
+ country: import_zod15.z.string().trim().toUpperCase().optional(),
11425
+ maxResults: import_zod15.z.number().int().min(1).max(20).optional()
11426
+ });
11427
+ FacebookMediaBodySchema = import_zod15.z.object({
11428
+ url: import_zod15.z.string().trim().min(1, "url is required"),
11429
+ filename: import_zod15.z.string().trim().optional()
11430
+ });
11431
+ facebookAdApp = new import_hono4.Hono();
11432
+ facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
11433
+ const raw = await c.req.json().catch(() => ({}));
11434
+ const parsed = FacebookAdBodySchema.safeParse(raw);
11435
+ if (!parsed.success) {
11436
+ return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
11437
+ }
11438
+ const body = parsed.data;
11439
+ const raw2 = body.url?.trim() ?? body.libraryId?.trim() ?? "";
11440
+ const libraryId = FacebookAdExtractor.resolveLibraryId(raw2);
11441
+ if (!libraryId) return c.json({ error: "Could not resolve a valid Facebook Ad Library ID from the provided input" }, 400);
11442
+ const fbUser = c.get("user");
11443
+ const { ok: adOk, balance_mc: adBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, raw2);
11444
+ if (!adOk) return c.json(insufficientBalanceResponse(adBal, MC_COSTS.fb_ad), 402);
11445
+ const driver = new BrowserDriver();
11446
+ try {
11447
+ await driver.launch(kernelLaunchOpts());
11448
+ const extractor = new FacebookAdExtractor(driver);
11449
+ const result = await extractor.extract(libraryId, { openModal: body.openModal !== false });
11450
+ await logRequestEvent({
11451
+ userId: fbUser.id,
11452
+ source: "facebook_ad",
11453
+ status: "done",
11454
+ query: raw2,
11455
+ resultCount: Array.isArray(result.variants) ? result.variants.length : null,
11456
+ result
11457
+ });
11458
+ return c.json(result);
11459
+ } catch (err) {
11460
+ await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
11461
+ const msg = err instanceof Error ? err.message : String(err);
11462
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_ad", status: "failed", query: raw2, error: msg });
11463
+ if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
11464
+ return c.json({ error: msg }, 503);
11465
+ }
11466
+ return c.json({ error: msg }, 500);
11467
+ } finally {
11468
+ await driver.close();
11469
+ }
11470
+ });
11471
+ facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
11472
+ const raw = await c.req.json().catch(() => ({}));
11473
+ const parsed = FacebookPageIntelBodySchema.safeParse(raw);
11474
+ if (!parsed.success) {
11475
+ return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
11476
+ }
11477
+ const body = parsed.data;
11478
+ const maxAds = Math.min(200, Math.max(1, body.maxAds ?? 50));
11479
+ const country = body.country?.trim().toUpperCase() ?? "US";
11480
+ const listingUrl = buildPageIntelUrl(body, country);
11481
+ const fbUser = c.get("user");
11482
+ const { ok: fbOk, balance_mc: fbBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, body.pageId ?? body.query ?? body.libraryId ?? "");
11483
+ if (!fbOk) return c.json(insufficientBalanceResponse(fbBal, MC_COSTS.fb_ad), 402);
11484
+ const driver = new BrowserDriver();
11485
+ let refunded = false;
11486
+ try {
11487
+ await driver.launch(await kernelLaunchOptsResidential());
11488
+ await driver.navigateTo(listingUrl);
11489
+ const extractor = new FacebookAdExtractor(driver);
11490
+ const result = await extractor.extractPageIntel(listingUrl, maxAds);
11491
+ if (result.ads.length === 0 && await detectSoftBlock(driver)) {
11492
+ await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "soft-block empty result");
11493
+ refunded = true;
11494
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: "soft-block: empty result refunded" });
11495
+ return c.json({ error: "soft-block: no ads returned (refunded)" }, 503);
11496
+ }
11497
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "done", query: body.pageId ?? body.query ?? body.libraryId ?? "", resultCount: result.ads.length, result });
11498
+ return c.json(result);
11499
+ } catch (err) {
11500
+ const msg = err instanceof Error ? err.message : String(err);
11501
+ if (!refunded) await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
11502
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: msg });
11503
+ if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
11504
+ return c.json({ error: msg }, 503);
11505
+ }
11506
+ return c.json({ error: msg }, 500);
11507
+ } finally {
11508
+ await driver.close();
11509
+ }
11510
+ });
11511
+ facebookAdApp.post("/transcribe", createApiKeyAuth(), async (c) => {
11512
+ const raw = await c.req.json().catch(() => ({}));
11513
+ const parsed = FacebookTranscribeBodySchema.safeParse(raw);
11514
+ if (!parsed.success) {
11515
+ return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
11516
+ }
11517
+ const body = parsed.data;
11518
+ const urlCheck = await validatePublicHttpUrl(body.videoUrl, { field: "videoUrl", requireHttps: false });
11519
+ if (urlCheck.error) {
11520
+ return c.json(invalidRequest(urlCheck.error), 400);
11521
+ }
11522
+ const videoUrl = urlCheck.parsed.href;
11523
+ const fbUser = c.get("user");
11524
+ const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE, videoUrl);
11525
+ if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_transcribe), 402);
11526
+ import_client3.fal.config({ credentials: process.env.FAL_KEY });
11527
+ try {
11528
+ const startMs = Date.now();
11529
+ const result = await import_client3.fal.subscribe("fal-ai/wizper", {
11530
+ input: { audio_url: videoUrl, task: "transcribe", language: "en" },
11531
+ logs: false,
11532
+ pollInterval: 3e3
11533
+ });
11534
+ const data = result.data;
11535
+ const text = data.text ?? "";
11536
+ const chunks = data.chunks ?? [];
11537
+ const durationMs = Date.now() - startMs;
11538
+ const fmtTs2 = (s) => `${Math.floor(s / 60)}:${String(Math.floor(s % 60)).padStart(2, "0")}`;
11539
+ const lines = ["# Facebook Ad Transcript", "", `*Transcribed in ${(durationMs / 1e3).toFixed(1)}s*`, "", "## Full Text", "", text, ""];
11540
+ if (chunks.length) {
11541
+ lines.push("## Timestamped Segments", "");
11542
+ for (const ch of chunks) {
11543
+ lines.push(`**[${fmtTs2(ch.timestamp[0])} \u2192 ${fmtTs2(ch.timestamp[1])}]** ${ch.text.trim()}`, "");
11544
+ }
11545
+ }
11546
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "done", query: videoUrl, resultCount: chunks.length, result: { text, chunks, durationMs } });
11547
+ return c.json({ text, chunks, durationMs, markdown: lines.join("\n") });
11548
+ } catch (err) {
11549
+ const msg = err instanceof Error ? err.message : String(err);
11550
+ await creditMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE_REFUND, "failed call");
11551
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "failed", query: videoUrl, error: msg });
11552
+ return c.json({ error: msg }, 500);
11553
+ }
11554
+ });
11555
+ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
11556
+ const raw = await c.req.json().catch(() => ({}));
11557
+ const parsed = FacebookSearchBodySchema.safeParse(raw);
11558
+ if (!parsed.success) {
11559
+ return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
11560
+ }
11561
+ const body = parsed.data;
11562
+ const country = body.country?.trim().toUpperCase() ?? "US";
11563
+ const maxResults = Math.min(20, Math.max(1, body.maxResults ?? 10));
11564
+ const searchUrl = `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
11565
+ const fbUser = c.get("user");
11566
+ const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH, body.query.trim());
11567
+ if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_search), 402);
11568
+ const driver = new BrowserDriver();
11569
+ let searchRefunded = false;
11570
+ try {
11571
+ await driver.launch(await kernelLaunchOptsResidential());
11572
+ const page = driver.getPage();
11573
+ const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
11574
+ const gqlAdvertisers = advertisersFromResults(collated, maxResults);
11575
+ if (gqlAdvertisers.length > 0) {
11576
+ const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
11577
+ const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
11578
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
11579
+ return c.json(searchResult2);
11580
+ }
11581
+ await page.waitForTimeout(1500);
11582
+ for (let scroll = 0; scroll < 3; scroll++) {
11583
+ await page.evaluate(() => {
11584
+ if (document.body) window.scrollTo(0, document.body.scrollHeight);
11585
+ });
11586
+ await page.waitForTimeout(1e3);
11587
+ }
11588
+ const rawBodyText = await page.evaluate(() => document.body?.innerText ?? "");
11589
+ if (rawBodyText.length < 200 || /Log in|You must log in|Create new account/.test(rawBodyText)) {
11590
+ await creditMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH_REFUND, "soft-block empty result");
11591
+ searchRefunded = true;
11592
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "failed", query: body.query.trim(), error: "soft-block: empty result refunded" });
11593
+ return c.json({ error: "soft-block: no results returned (refunded)" }, 503);
11594
+ }
11595
+ const bodyText = rawBodyText.replace(/​/g, " ").replace(/\s+/g, " ");
11596
+ const adChunks = [];
11597
+ const splitRe = /(?=(?:Active|Inactive)\s+Library ID[:\s]+\d{10,20})/g;
11598
+ let last = 0;
11599
+ let m;
11600
+ while ((m = splitRe.exec(bodyText)) !== null) {
11601
+ if (m.index > last) adChunks.push(bodyText.slice(last, m.index));
11602
+ last = m.index;
10934
11603
  if (splitRe.lastIndex === m.index) splitRe.lastIndex++;
10935
11604
  }
10936
11605
  if (last < bodyText.length) adChunks.push(bodyText.slice(last));
@@ -10953,7 +11622,7 @@ var init_facebook_ad_routes = __esm({
10953
11622
  advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
10954
11623
  }
10955
11624
  }
10956
- const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
11625
+ const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
10957
11626
  const searchResult = { query: body.query.trim(), searchUrl, results };
10958
11627
  await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
10959
11628
  return c.json(searchResult);
@@ -11927,153 +12596,44 @@ async function capturePageSnapshot(target, options = {}) {
11927
12596
  }
11928
12597
  }
11929
12598
  async function capturePageSnapshots(targets, options = {}) {
11930
- const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
11931
- const maxConcurrency = normalizeMaxConcurrency(options.maxConcurrency);
11932
- const limit = (0, import_p_limit3.default)(maxConcurrency);
11933
- const pageSnapshotArtifacts = await Promise.all(
11934
- targets.map((target) => limit(() => capturePageSnapshot(target, { ...options, timeoutMs })))
11935
- );
11936
- const attempts = pageSnapshotArtifacts.map((artifact, index) => ({
11937
- attemptNumber: index + 1,
11938
- outcome: artifact.status === "captured" ? "page_captured" : "page_failed",
11939
- startedAt: artifact.diagnostics.requestedAt,
11940
- completedAt: artifact.diagnostics.completedAt,
11941
- durationMs: artifact.diagnostics.durationMs,
11942
- ...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
11943
- }));
11944
- const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
11945
- return {
11946
- pageSnapshotArtifacts,
11947
- attempts,
11948
- diagnostics: {
11949
- requestedCount: targets.length,
11950
- capturedCount,
11951
- failedCount: targets.length - capturedCount,
11952
- maxConcurrency,
11953
- timeoutMs
11954
- }
11955
- };
11956
- }
11957
- var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
11958
- var init_page_snapshot_extractor = __esm({
11959
- "src/serp-intelligence/page-snapshot-extractor.ts"() {
11960
- "use strict";
11961
- import_node_crypto2 = require("crypto");
11962
- import_p_limit3 = __toESM(require("p-limit"), 1);
11963
- init_kpo_extractor();
11964
- init_url_utils();
11965
- DEFAULT_TIMEOUT_MS = 15e3;
11966
- DEFAULT_MAX_CONCURRENCY = 2;
11967
- DEFAULT_MAX_CONTENT_CHARS = 25e4;
11968
- }
11969
- });
11970
-
11971
- // src/locations.ts
11972
- var LOCATIONS;
11973
- var init_locations = __esm({
11974
- "src/locations.ts"() {
11975
- "use strict";
11976
- LOCATIONS = {
11977
- "austin": "Austin,Texas,United States",
11978
- "new york": "New York,New York,United States",
11979
- "new york city": "New York,New York,United States",
11980
- "nyc": "New York,New York,United States",
11981
- "los angeles": "Los Angeles,California,United States",
11982
- "la": "Los Angeles,California,United States",
11983
- "chicago": "Chicago,Illinois,United States",
11984
- "houston": "Houston,Texas,United States",
11985
- "phoenix": "Phoenix,Arizona,United States",
11986
- "philadelphia": "Philadelphia,Pennsylvania,United States",
11987
- "philly": "Philadelphia,Pennsylvania,United States",
11988
- "san antonio": "San Antonio,Texas,United States",
11989
- "dallas": "Dallas,Texas,United States",
11990
- "miami": "Miami,Florida,United States",
11991
- "seattle": "Seattle,Washington,United States",
11992
- "denver": "Denver,Colorado,United States",
11993
- "loveland": "Loveland,Colorado,United States",
11994
- "loveland co": "Loveland,Colorado,United States",
11995
- "fort collins": "Fort Collins,Colorado,United States",
11996
- "boulder": "Boulder,Colorado,United States",
11997
- "colorado springs": "Colorado Springs,Colorado,United States",
11998
- "boston": "Boston,Massachusetts,United States",
11999
- "atlanta": "Atlanta,Georgia,United States",
12000
- "san francisco": "San Francisco,California,United States",
12001
- "sf": "San Francisco,California,United States",
12002
- "portland": "Portland,Oregon,United States",
12003
- "las vegas": "Las Vegas,Nevada,United States",
12004
- "minneapolis": "Minneapolis,Minnesota,United States",
12005
- "detroit": "Detroit,Michigan,United States",
12006
- "nashville": "Nashville,Tennessee,United States",
12007
- "charlotte": "Charlotte,North Carolina,United States",
12008
- "orlando": "Orlando,Florida,United States",
12009
- "san diego": "San Diego,California,United States",
12010
- "baltimore": "Baltimore,Maryland,United States",
12011
- "sacramento": "Sacramento,California,United States",
12012
- "columbus": "Columbus,Ohio,United States",
12013
- "indianapolis": "Indianapolis,Indiana,United States",
12014
- "san jose": "San Jose,California,United States",
12015
- "fort worth": "Fort Worth,Texas,United States",
12016
- "jacksonville": "Jacksonville,Florida,United States",
12017
- "memphis": "Memphis,Tennessee,United States",
12018
- "louisville": "Louisville,Kentucky,United States",
12019
- "raleigh": "Raleigh,North Carolina,United States",
12020
- "richmond": "Richmond,Virginia,United States",
12021
- "salt lake city": "Salt Lake City,Utah,United States",
12022
- "toronto": "Toronto,Ontario,Canada",
12023
- "vancouver": "Vancouver,British Columbia,Canada",
12024
- "montreal": "Montreal,Quebec,Canada",
12025
- "calgary": "Calgary,Alberta,Canada",
12026
- "ottawa": "Ottawa,Ontario,Canada",
12027
- "london": "London,England,United Kingdom",
12028
- "manchester": "Manchester,England,United Kingdom",
12029
- "birmingham": "Birmingham,England,United Kingdom",
12030
- "edinburgh": "Edinburgh,Scotland,United Kingdom",
12031
- "glasgow": "Glasgow,Scotland,United Kingdom",
12032
- "leeds": "Leeds,England,United Kingdom",
12033
- "sydney": "Sydney,New South Wales,Australia",
12034
- "melbourne": "Melbourne,Victoria,Australia",
12035
- "brisbane": "Brisbane,Queensland,Australia",
12036
- "perth": "Perth,Western Australia,Australia",
12037
- "adelaide": "Adelaide,South Australia,Australia",
12038
- "dublin": "Dublin,Leinster,Ireland"
12039
- };
12040
- }
12041
- });
12042
-
12043
- // src/uule.ts
12044
- function encodeVarint(value) {
12045
- const bytes = [];
12046
- let remaining = value;
12047
- do {
12048
- let byte = remaining & 127;
12049
- remaining >>>= 7;
12050
- if (remaining > 0) byte |= 128;
12051
- bytes.push(byte);
12052
- } while (remaining > 0);
12053
- return bytes;
12054
- }
12055
- function encodeUule(name) {
12056
- const locationBytes = Buffer.from(name, "utf8");
12057
- const payload = Buffer.concat([
12058
- Buffer.from([8, 2, 16, 32, 34]),
12059
- Buffer.from(encodeVarint(locationBytes.length)),
12060
- locationBytes
12061
- ]);
12062
- return `w+${payload.toString("base64")}`;
12063
- }
12064
- function normalizeLocation(input) {
12065
- const raw = input.toLowerCase().trim();
12066
- if (LOCATIONS[raw]) return LOCATIONS[raw];
12067
- const beforeComma = raw.split(",")[0].trim();
12068
- if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
12069
- const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
12070
- if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
12071
- return input;
12599
+ const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
12600
+ const maxConcurrency = normalizeMaxConcurrency(options.maxConcurrency);
12601
+ const limit = (0, import_p_limit3.default)(maxConcurrency);
12602
+ const pageSnapshotArtifacts = await Promise.all(
12603
+ targets.map((target) => limit(() => capturePageSnapshot(target, { ...options, timeoutMs })))
12604
+ );
12605
+ const attempts = pageSnapshotArtifacts.map((artifact, index) => ({
12606
+ attemptNumber: index + 1,
12607
+ outcome: artifact.status === "captured" ? "page_captured" : "page_failed",
12608
+ startedAt: artifact.diagnostics.requestedAt,
12609
+ completedAt: artifact.diagnostics.completedAt,
12610
+ durationMs: artifact.diagnostics.durationMs,
12611
+ ...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
12612
+ }));
12613
+ const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
12614
+ return {
12615
+ pageSnapshotArtifacts,
12616
+ attempts,
12617
+ diagnostics: {
12618
+ requestedCount: targets.length,
12619
+ capturedCount,
12620
+ failedCount: targets.length - capturedCount,
12621
+ maxConcurrency,
12622
+ timeoutMs
12623
+ }
12624
+ };
12072
12625
  }
12073
- var init_uule = __esm({
12074
- "src/uule.ts"() {
12626
+ var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
12627
+ var init_page_snapshot_extractor = __esm({
12628
+ "src/serp-intelligence/page-snapshot-extractor.ts"() {
12075
12629
  "use strict";
12076
- init_locations();
12630
+ import_node_crypto2 = require("crypto");
12631
+ import_p_limit3 = __toESM(require("p-limit"), 1);
12632
+ init_kpo_extractor();
12633
+ init_url_utils();
12634
+ DEFAULT_TIMEOUT_MS = 15e3;
12635
+ DEFAULT_MAX_CONCURRENCY = 2;
12636
+ DEFAULT_MAX_CONTENT_CHARS = 25e4;
12077
12637
  }
12078
12638
  });
12079
12639
 
@@ -13364,425 +13924,59 @@ var init_OutputSerializer = __esm({
13364
13924
  await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13365
13925
  const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13366
13926
  const rows = citations.map((c, i) => ({
13367
- seed_query: seed,
13368
- response_text: i === 0 ? text ?? "" : "",
13369
- citation_text: c.text,
13370
- citation_href: c.href
13371
- }));
13372
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13373
- const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13374
- const fullPath = import_node_path5.default.join(outputDir, filename);
13375
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13376
- return fullPath;
13377
- }
13378
- async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13379
- await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13380
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13381
- const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13382
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13383
- const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13384
- const fullPath = import_node_path5.default.join(outputDir, filename);
13385
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13386
- return fullPath;
13387
- }
13388
- };
13389
- }
13390
- });
13391
-
13392
- // src/output/ProgressReporter.ts
13393
- var ProgressReporter;
13394
- var init_ProgressReporter = __esm({
13395
- "src/output/ProgressReporter.ts"() {
13396
- "use strict";
13397
- ProgressReporter = class {
13398
- onQuestion(node) {
13399
- process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13400
- }
13401
- onDepth(depth) {
13402
- process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13403
- }
13404
- onVideos(videos) {
13405
- for (const v of videos) {
13406
- process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13407
- }
13408
- }
13409
- onForums(forums) {
13410
- for (const f of forums) {
13411
- process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13412
- }
13413
- }
13414
- onComplete(stats) {
13415
- process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13416
- }
13417
- onError(err) {
13418
- process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13419
- }
13420
- };
13421
- }
13422
- });
13423
-
13424
- // src/kernel-proxy-resolver.ts
13425
- function proxyIdSuffix2(proxyId) {
13426
- return proxyId ? proxyId.slice(-6) : null;
13427
- }
13428
- function resolution(source, proxyMode, proxyId, target, error) {
13429
- return {
13430
- kernelProxyId: proxyId,
13431
- resolution: {
13432
- source,
13433
- proxyMode,
13434
- proxyIdPresent: Boolean(proxyId),
13435
- proxyIdSuffix: proxyIdSuffix2(proxyId),
13436
- target,
13437
- error
13438
- }
13439
- };
13440
- }
13441
- function normalizeStateName(value) {
13442
- return value.trim().toLowerCase().replace(/\s+/g, " ");
13443
- }
13444
- function normalizeCountryName(value) {
13445
- return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
13446
- }
13447
- function isUnitedStates(country) {
13448
- if (!country) return true;
13449
- const normalized = normalizeCountryName(country);
13450
- return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
13451
- }
13452
- function stateCodeFor(region) {
13453
- const trimmed = region.trim();
13454
- if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
13455
- return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
13456
- }
13457
- function kernelCityIdentifierCandidates(city) {
13458
- const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
13459
- const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
13460
- const underscored = words.join("_");
13461
- const compact = words.join("");
13462
- return Array.from(new Set([underscored, compact].filter(Boolean)));
13463
- }
13464
- function proxyName(country, state, city) {
13465
- return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
13466
- }
13467
- function zipProxyName(zip) {
13468
- return `mcp-serp-residential-us-zip-${zip}`;
13469
- }
13470
- function parseKernelLocationProxyTarget(location, gl) {
13471
- if (!location || gl.toLowerCase() !== "us") return null;
13472
- const canonicalLocation = normalizeLocation(location);
13473
- let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
13474
- if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
13475
- parts = parts.slice(0, -1);
13476
- }
13477
- if (parts.length === 1) {
13478
- const stateOnly = stateCodeFor(parts[0]);
13479
- if (!stateOnly) return null;
13480
- return {
13481
- canonicalLocation,
13482
- level: "state",
13483
- country: "US",
13484
- state: stateOnly,
13485
- city: "",
13486
- cityCandidates: [],
13487
- proxyName: proxyName("US", stateOnly),
13488
- config: {
13489
- country: "US",
13490
- state: stateOnly
13491
- }
13492
- };
13493
- }
13494
- const [city = "", region = ""] = parts;
13495
- if (!city || !region) return null;
13496
- const state = stateCodeFor(region);
13497
- if (!state) return null;
13498
- const cityCandidates = kernelCityIdentifierCandidates(city);
13499
- const primaryCity = cityCandidates[0];
13500
- if (!primaryCity) return null;
13501
- return {
13502
- canonicalLocation,
13503
- level: "city",
13504
- country: "US",
13505
- state,
13506
- city: primaryCity,
13507
- cityCandidates,
13508
- proxyName: proxyName("US", state, primaryCity),
13509
- config: {
13510
- country: "US",
13511
- state,
13512
- city: primaryCity
13513
- }
13514
- };
13515
- }
13516
- function cityZipKey(target) {
13517
- return `${target.city}|${target.state}`;
13518
- }
13519
- function knownZipFor(target, explicitZip) {
13520
- if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
13521
- return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
13522
- }
13523
- function zipTarget(target, zip) {
13524
- return {
13525
- ...target,
13526
- level: "zip",
13527
- zip,
13528
- proxyName: zipProxyName(zip),
13529
- config: {
13530
- country: target.country,
13531
- state: target.state,
13532
- zip
13533
- }
13534
- };
13535
- }
13536
- function configMatches(config, target, city) {
13537
- if (target.level === "zip") {
13538
- return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
13539
- }
13540
- return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
13541
- }
13542
- function findExistingTargetProxy(proxies, target) {
13543
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
13544
- }
13545
- function findExistingProxy(proxies, target) {
13546
- for (const city of target.cityCandidates) {
13547
- const name = proxyName(target.country, target.state, city);
13548
- const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
13549
- if (found) return found;
13550
- }
13551
- return null;
13552
- }
13553
- function stateTarget(target) {
13554
- return {
13555
- ...target,
13556
- level: "state",
13557
- proxyName: proxyName(target.country, target.state),
13558
- config: {
13559
- country: target.country,
13560
- state: target.state
13561
- }
13562
- };
13563
- }
13564
- function findExistingStateProxy(proxies, target) {
13565
- const name = proxyName(target.country, target.state);
13566
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
13567
- }
13568
- function escalatedTargetLevel(target, attemptIndex) {
13569
- return stateTarget(target);
13570
- }
13571
- function errorText2(err) {
13572
- return err instanceof Error ? err.message : String(err);
13573
- }
13574
- async function resolveKernelProxyId(options) {
13575
- if (options.proxyMode === "none") {
13576
- return resolution("disabled", options.proxyMode, void 0, null, null);
13577
- }
13578
- if (options.proxyMode === "configured") {
13579
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
13580
- }
13581
- const target = parseKernelLocationProxyTarget(options.location, options.gl);
13582
- if (!target || !options.kernelApiKey) {
13583
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
13927
+ seed_query: seed,
13928
+ response_text: i === 0 ? text ?? "" : "",
13929
+ citation_text: c.text,
13930
+ citation_href: c.href
13931
+ }));
13932
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13933
+ const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13934
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13935
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13936
+ return fullPath;
13937
+ }
13938
+ async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13939
+ await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13940
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13941
+ const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13942
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13943
+ const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13944
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13945
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13946
+ return fullPath;
13947
+ }
13948
+ };
13584
13949
  }
13585
- const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
13586
- try {
13587
- const attemptIndex = options.attemptIndex ?? 0;
13588
- if (attemptIndex >= 1) {
13589
- const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
13590
- const createErrors2 = [];
13591
- try {
13592
- const created = await kernel.proxies.create({
13593
- type: "residential",
13594
- name: escalatedTarget.proxyName,
13595
- config: escalatedTarget.config
13596
- });
13597
- if (created.id) {
13598
- return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
13599
- }
13600
- createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
13601
- } catch (err) {
13602
- createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
13950
+ });
13951
+
13952
+ // src/output/ProgressReporter.ts
13953
+ var ProgressReporter;
13954
+ var init_ProgressReporter = __esm({
13955
+ "src/output/ProgressReporter.ts"() {
13956
+ "use strict";
13957
+ ProgressReporter = class {
13958
+ onQuestion(node) {
13959
+ process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13603
13960
  }
13604
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
13605
- }
13606
- const proxies = await kernel.proxies.list();
13607
- const zip = knownZipFor(target, options.proxyZip);
13608
- const createErrors = [];
13609
- if (zip) {
13610
- const targetZip = zipTarget(target, zip);
13611
- const existingZip = findExistingTargetProxy(proxies, targetZip);
13612
- if (existingZip?.id) {
13613
- return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
13961
+ onDepth(depth) {
13962
+ process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13614
13963
  }
13615
- try {
13616
- const created = await kernel.proxies.create({
13617
- type: "residential",
13618
- name: targetZip.proxyName,
13619
- config: {
13620
- country: targetZip.country,
13621
- zip
13622
- }
13623
- });
13624
- if (created.id) {
13625
- return resolution("location_created", options.proxyMode, created.id, targetZip, null);
13964
+ onVideos(videos) {
13965
+ for (const v of videos) {
13966
+ process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13626
13967
  }
13627
- createErrors.push(`${zip}: Kernel did not return a proxy id`);
13628
- } catch (err) {
13629
- createErrors.push(`${zip}: ${errorText2(err)}`);
13630
13968
  }
13631
- }
13632
- const existing = findExistingProxy(proxies, target);
13633
- if (existing?.id) {
13634
- return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
13635
- }
13636
- for (const city of target.cityCandidates) {
13637
- try {
13638
- const created = await kernel.proxies.create({
13639
- type: "residential",
13640
- name: proxyName(target.country, target.state, city),
13641
- config: {
13642
- country: target.country,
13643
- state: target.state,
13644
- city
13645
- }
13646
- });
13647
- if (created.id) {
13648
- return resolution("location_created", options.proxyMode, created.id, {
13649
- ...target,
13650
- level: "city",
13651
- city,
13652
- proxyName: proxyName(target.country, target.state, city),
13653
- config: {
13654
- country: target.country,
13655
- state: target.state,
13656
- city
13657
- }
13658
- }, null);
13969
+ onForums(forums) {
13970
+ for (const f of forums) {
13971
+ process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13659
13972
  }
13660
- createErrors.push(`${city}: Kernel did not return a proxy id`);
13661
- } catch (err) {
13662
- createErrors.push(`${city}: ${errorText2(err)}`);
13663
13973
  }
13664
- }
13665
- const fallbackTarget = stateTarget(target);
13666
- const existingState = findExistingStateProxy(proxies, fallbackTarget);
13667
- if (existingState?.id) {
13668
- return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
13669
- }
13670
- try {
13671
- const created = await kernel.proxies.create({
13672
- type: "residential",
13673
- name: fallbackTarget.proxyName,
13674
- config: fallbackTarget.config
13675
- });
13676
- if (created.id) {
13677
- return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
13974
+ onComplete(stats) {
13975
+ process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13976
+ }
13977
+ onError(err) {
13978
+ process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13678
13979
  }
13679
- createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
13680
- } catch (err) {
13681
- createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
13682
- }
13683
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
13684
- } catch (err) {
13685
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
13686
- }
13687
- }
13688
- var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
13689
- var init_kernel_proxy_resolver = __esm({
13690
- "src/kernel-proxy-resolver.ts"() {
13691
- "use strict";
13692
- import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
13693
- init_uule();
13694
- US_STATE_CODES = {
13695
- alabama: "AL",
13696
- alaska: "AK",
13697
- arizona: "AZ",
13698
- arkansas: "AR",
13699
- california: "CA",
13700
- colorado: "CO",
13701
- connecticut: "CT",
13702
- delaware: "DE",
13703
- florida: "FL",
13704
- georgia: "GA",
13705
- hawaii: "HI",
13706
- idaho: "ID",
13707
- illinois: "IL",
13708
- indiana: "IN",
13709
- iowa: "IA",
13710
- kansas: "KS",
13711
- kentucky: "KY",
13712
- louisiana: "LA",
13713
- maine: "ME",
13714
- maryland: "MD",
13715
- massachusetts: "MA",
13716
- michigan: "MI",
13717
- minnesota: "MN",
13718
- mississippi: "MS",
13719
- missouri: "MO",
13720
- montana: "MT",
13721
- nebraska: "NE",
13722
- nevada: "NV",
13723
- "new hampshire": "NH",
13724
- "new jersey": "NJ",
13725
- "new mexico": "NM",
13726
- "new york": "NY",
13727
- "north carolina": "NC",
13728
- "north dakota": "ND",
13729
- ohio: "OH",
13730
- oklahoma: "OK",
13731
- oregon: "OR",
13732
- pennsylvania: "PA",
13733
- "rhode island": "RI",
13734
- "south carolina": "SC",
13735
- "south dakota": "SD",
13736
- tennessee: "TN",
13737
- texas: "TX",
13738
- utah: "UT",
13739
- vermont: "VT",
13740
- virginia: "VA",
13741
- washington: "WA",
13742
- "west virginia": "WV",
13743
- wisconsin: "WI",
13744
- wyoming: "WY"
13745
- };
13746
- US_CITY_CENTER_ZIPS = {
13747
- "atlanta|GA": "30303",
13748
- "austin|TX": "78701",
13749
- "baltimore|MD": "21201",
13750
- "boston|MA": "02108",
13751
- "boulder|CO": "80302",
13752
- "charlotte|NC": "28202",
13753
- "chicago|IL": "60601",
13754
- "colorado_springs|CO": "80903",
13755
- "columbus|OH": "43215",
13756
- "dallas|TX": "75201",
13757
- "denver|CO": "80202",
13758
- "detroit|MI": "48226",
13759
- "fort_collins|CO": "80524",
13760
- "fort_worth|TX": "76102",
13761
- "houston|TX": "77002",
13762
- "indianapolis|IN": "46204",
13763
- "jacksonville|FL": "32202",
13764
- "las_vegas|NV": "89101",
13765
- "los_angeles|CA": "90012",
13766
- "louisville|KY": "40202",
13767
- "loveland|CO": "80537",
13768
- "memphis|TN": "38103",
13769
- "miami|FL": "33131",
13770
- "minneapolis|MN": "55401",
13771
- "nashville|TN": "37203",
13772
- "new_york|NY": "10001",
13773
- "orlando|FL": "32801",
13774
- "philadelphia|PA": "19103",
13775
- "phoenix|AZ": "85004",
13776
- "portland|OR": "97205",
13777
- "raleigh|NC": "27601",
13778
- "richmond|VA": "23219",
13779
- "sacramento|CA": "95814",
13780
- "salt_lake_city|UT": "84101",
13781
- "san_antonio|TX": "78205",
13782
- "san_diego|CA": "92101",
13783
- "san_francisco|CA": "94103",
13784
- "san_jose|CA": "95113",
13785
- "seattle|WA": "98101"
13786
13980
  };
13787
13981
  }
13788
13982
  });
@@ -14879,9 +15073,12 @@ function reportTitle(full) {
14879
15073
  const title = full.split("\n").find((line) => line.startsWith("# "));
14880
15074
  return title?.replace(/^#\s+/, "").trim() || "MCP Scraper Report";
14881
15075
  }
15076
+ function outputBaseDir() {
15077
+ return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
15078
+ }
14882
15079
  function saveFullReport(full) {
14883
15080
  if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
14884
- const outDir = process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
15081
+ const outDir = outputBaseDir();
14885
15082
  try {
14886
15083
  (0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
14887
15084
  const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
@@ -14892,6 +15089,20 @@ function saveFullReport(full) {
14892
15089
  return null;
14893
15090
  }
14894
15091
  }
15092
+ function persistScreenshotLocally(base64, url) {
15093
+ if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
15094
+ try {
15095
+ const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
15096
+ (0, import_node_fs4.mkdirSync)(dir, { recursive: true });
15097
+ const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
15098
+ const slug = url.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
15099
+ const filePath = (0, import_node_path6.join)(dir, `${stamp}-${slug}.png`);
15100
+ (0, import_node_fs4.writeFileSync)(filePath, Buffer.from(base64, "base64"));
15101
+ return filePath;
15102
+ } catch {
15103
+ return null;
15104
+ }
15105
+ }
14895
15106
  function oneBlock(content) {
14896
15107
  const filePath = saveFullReport(content);
14897
15108
  const text = filePath ? `${content}
@@ -15112,6 +15323,7 @@ function formatExtractUrl(raw, input) {
15112
15323
  const bodyMd = d.bodyMarkdown ?? "";
15113
15324
  const schema = d.schema;
15114
15325
  const screenshotMeta = d.screenshot;
15326
+ const screenshotPath = screenshotMeta?.base64 ? persistScreenshotLocally(screenshotMeta.base64, url) : null;
15115
15327
  const branding = d.branding;
15116
15328
  const media = d.media;
15117
15329
  const h1Lines = headings.filter((h) => h.level === 1).map((h) => `- ${h.text}`).join("\n");
@@ -15138,7 +15350,7 @@ ${[h1Lines, h2Lines].filter(Boolean).join("\n")}` : "";
15138
15350
  ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
15139
15351
  const screenshotSection = screenshotMeta ? `
15140
15352
  ## Screenshot
15141
- - **File:** ${screenshotMeta.savedPath}
15353
+ - **File:** ${screenshotPath ?? "(returned inline only \u2014 disk write unavailable in this environment)"}
15142
15354
  - **Size:** ${(screenshotMeta.sizeBytes / 1024).toFixed(1)} KB
15143
15355
  - **Device:** ${screenshotMeta.device}` : "";
15144
15356
  const brandingSection = branding ? [
@@ -15167,17 +15379,13 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
15167
15379
  **${title}**
15168
15380
  ${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
15169
15381
  const textResult = oneBlock(full);
15170
- if (screenshotMeta?.savedPath) {
15171
- try {
15172
- const imgBuf = (0, import_node_fs4.readFileSync)(screenshotMeta.savedPath);
15173
- return {
15174
- content: [
15175
- ...textResult.content,
15176
- { type: "image", data: imgBuf.toString("base64"), mimeType: "image/png" }
15177
- ]
15178
- };
15179
- } catch {
15180
- }
15382
+ if (screenshotMeta?.base64) {
15383
+ return {
15384
+ content: [
15385
+ ...textResult.content,
15386
+ { type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
15387
+ ]
15388
+ };
15181
15389
  }
15182
15390
  return textResult;
15183
15391
  }
@@ -15377,7 +15585,7 @@ function formatFacebookAdSearch(raw, input) {
15377
15585
  const d = parsed.data;
15378
15586
  const advertisers = d.results ?? d.advertisers ?? [];
15379
15587
  const rows = advertisers.map(
15380
- (a, i) => `| ${i + 1} | ${cell(a.name)} | ${a.adCount ?? "\u2014"} | \`${a.libraryId ?? "\u2014"}\` |`
15588
+ (a, i) => `| ${i + 1} | ${cell(a.pageName ?? a.name)} | ${a.adCount ?? "\u2014"} | \`${a.sampleLibraryId ?? a.libraryId ?? "\u2014"}\` |`
15381
15589
  ).join("\n");
15382
15590
  const full = [
15383
15591
  `# Facebook Ad Library Search: "${input.query}"`,
@@ -15679,16 +15887,20 @@ var HttpMcpToolExecutor;
15679
15887
  var init_http_mcp_tool_executor = __esm({
15680
15888
  "src/mcp/http-mcp-tool-executor.ts"() {
15681
15889
  "use strict";
15890
+ init_harvest_timeout();
15682
15891
  HttpMcpToolExecutor = class {
15683
15892
  baseUrl;
15684
15893
  apiKey;
15685
15894
  timeoutMs;
15895
+ httpTimeoutOverrideMs;
15686
15896
  serpIntelligenceTimeoutMs;
15687
15897
  constructor(baseUrl, apiKey) {
15688
15898
  this.baseUrl = baseUrl.replace(/\/$/, "");
15689
15899
  this.apiKey = apiKey;
15690
- const configuredTimeoutMs = Number(process.env.MCP_SCRAPER_HTTP_TIMEOUT_MS ?? 11e4);
15691
- this.timeoutMs = Number.isFinite(configuredTimeoutMs) && configuredTimeoutMs > 0 ? configuredTimeoutMs : 11e4;
15900
+ const rawOverride = process.env.MCP_SCRAPER_HTTP_TIMEOUT_MS;
15901
+ const parsedOverride = rawOverride === void 0 ? NaN : Number(rawOverride);
15902
+ this.httpTimeoutOverrideMs = Number.isFinite(parsedOverride) && parsedOverride > 0 ? parsedOverride : null;
15903
+ this.timeoutMs = this.httpTimeoutOverrideMs ?? 11e4;
15692
15904
  const configuredSerpIntelligenceTimeoutMs = Number(process.env.MCP_SCRAPER_SERP_INTELLIGENCE_HTTP_TIMEOUT_MS ?? this.timeoutMs);
15693
15905
  this.serpIntelligenceTimeoutMs = Number.isFinite(configuredSerpIntelligenceTimeoutMs) && configuredSerpIntelligenceTimeoutMs > 0 ? configuredSerpIntelligenceTimeoutMs : this.timeoutMs;
15694
15906
  }
@@ -15730,10 +15942,12 @@ var init_http_mcp_tool_executor = __esm({
15730
15942
  }
15731
15943
  }
15732
15944
  harvestPaa(input) {
15733
- return this.call("/harvest/sync", input);
15945
+ const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(input.maxQuestions ?? 30).clientMs;
15946
+ return this.call("/harvest/sync", input, timeoutMs);
15734
15947
  }
15735
15948
  searchSerp(input) {
15736
- return this.call("/harvest/sync", { ...input, serpOnly: true });
15949
+ const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(0, true).clientMs;
15950
+ return this.call("/harvest/sync", { ...input, serpOnly: true }, timeoutMs);
15737
15951
  }
15738
15952
  extractUrl(input) {
15739
15953
  return this.call("/extract-url", input);
@@ -16380,18 +16594,16 @@ async function checkHarvestLimits(userId, email, extraSlots = 0) {
16380
16594
  if (active >= limit) return { error: `You have ${active} job${active !== 1 ? "s" : ""} running. Your account allows ${limit} concurrent job${limit !== 1 ? "s" : ""}. Wait for one to finish or add a concurrency slot at mcpscraper.dev/billing.` };
16381
16595
  return null;
16382
16596
  }
16383
- var import_resend, import_node_fs5, import_node_os4, import_node_path7, import_hono9, import_hono10, import_factory6, import_cookie, import_stripe2, secureCookies, isProduction2, sessionCookieOptions, requireAllowedOrigin, auth, adminAuth, sessionAuth, app, STRIPE_API_VERSION, BYPASS_EMAILS, SYNC_HARVEST_TIMEOUT_MS;
16597
+ var import_resend, import_hono9, import_hono10, import_factory6, import_cookie, import_stripe2, secureCookies, isProduction2, sessionCookieOptions, requireAllowedOrigin, auth, adminAuth, sessionAuth, app, STRIPE_API_VERSION, BYPASS_EMAILS, SYNC_HARVEST_TIMEOUT_OVERRIDE_MS;
16384
16598
  var init_server = __esm({
16385
16599
  "src/api/server.ts"() {
16386
16600
  "use strict";
16601
+ init_harvest_timeout();
16387
16602
  init_registry();
16388
16603
  init_template();
16389
16604
  init_og();
16390
16605
  import_resend = require("resend");
16391
16606
  init_url_utils();
16392
- import_node_fs5 = require("fs");
16393
- import_node_os4 = require("os");
16394
- import_node_path7 = require("path");
16395
16607
  init_kpo_extractor();
16396
16608
  init_screenshot();
16397
16609
  init_media_extractor();
@@ -16621,7 +16833,11 @@ var init_server = __esm({
16621
16833
  BYPASS_EMAILS = new Set(
16622
16834
  (process.env.HARVEST_LIMIT_BYPASS_EMAILS ?? "").split(",").map((e) => e.trim()).filter(Boolean)
16623
16835
  );
16624
- SYNC_HARVEST_TIMEOUT_MS = Number(process.env.SYNC_HARVEST_TIMEOUT_MS ?? 105e3);
16836
+ SYNC_HARVEST_TIMEOUT_OVERRIDE_MS = (() => {
16837
+ const raw = process.env.SYNC_HARVEST_TIMEOUT_MS;
16838
+ const parsed = raw === void 0 ? NaN : Number(raw);
16839
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
16840
+ })();
16625
16841
  app.post("/harvest", auth, async (c) => {
16626
16842
  const user = c.get("user");
16627
16843
  const raw = await c.req.json().catch(() => ({}));
@@ -16691,9 +16907,10 @@ var init_server = __esm({
16691
16907
  if (!syncOk) return c.json(insufficientBalanceResponse(syncBal, syncCost), 402);
16692
16908
  const jobId = await createRunningJob(user.id, options.query, options);
16693
16909
  const recordAttempt = createHarvestAttemptRecorder(jobId, user.id);
16910
+ const syncTimeoutMs = SYNC_HARVEST_TIMEOUT_OVERRIDE_MS ?? harvestTimeoutBudget(options.maxQuestions, options.serpOnly).serverMs;
16694
16911
  const syncSignal = combineAbortSignals([
16695
16912
  c.req.raw.signal,
16696
- AbortSignal.timeout(Number.isFinite(SYNC_HARVEST_TIMEOUT_MS) && SYNC_HARVEST_TIMEOUT_MS > 0 ? SYNC_HARVEST_TIMEOUT_MS : 105e3)
16913
+ AbortSignal.timeout(syncTimeoutMs)
16697
16914
  ]);
16698
16915
  try {
16699
16916
  const result = await harvest({
@@ -16841,13 +17058,7 @@ var init_server = __esm({
16841
17058
  const brandingData = pageData?.branding ?? null;
16842
17059
  let screenshotMeta = null;
16843
17060
  if (screenshotBuf) {
16844
- const outDir = (0, import_node_path7.join)((0, import_node_os4.homedir)(), "Downloads", "mcp-scraper", "screenshots");
16845
- (0, import_node_fs5.mkdirSync)(outDir, { recursive: true });
16846
- const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
16847
- const slug = canonicalUrl.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
16848
- const filePath = (0, import_node_path7.join)(outDir, `${stamp}-${slug}.png`);
16849
- (0, import_node_fs5.writeFileSync)(filePath, screenshotBuf);
16850
- screenshotMeta = { savedPath: filePath, sizeBytes: screenshotBuf.length, device };
17061
+ screenshotMeta = { base64: screenshotBuf.toString("base64"), sizeBytes: screenshotBuf.length, device };
16851
17062
  }
16852
17063
  const mediaMeta = downloadMedia ? await harvestPageMedia(result.bodyHtml, canonicalUrl, { types: mediaTypes ?? ["image", "video", "audio"] }) : null;
16853
17064
  await logRequestEvent({ userId: user.id, source: "extract_url", status: "done", query: canonicalUrl, resultCount: result.headings.length, result });
@@ -17185,10 +17396,10 @@ var init_server = __esm({
17185
17396
  });
17186
17397
 
17187
17398
  // bin/api-server.ts
17188
- var import_node_fs6 = require("fs");
17399
+ var import_node_fs5 = require("fs");
17189
17400
  function loadDotEnv() {
17190
17401
  try {
17191
- for (const line of (0, import_node_fs6.readFileSync)(".env", "utf8").split("\n")) {
17402
+ for (const line of (0, import_node_fs5.readFileSync)(".env", "utf8").split("\n")) {
17192
17403
  const eq = line.indexOf("=");
17193
17404
  if (eq < 1 || line.trimStart().startsWith("#")) continue;
17194
17405
  const k = line.slice(0, eq).trim();