mcp-scraper 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +1040 -829
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +51 -18
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/bin/paa-harvest.js +1 -1
- package/dist/{chunk-JQKZWEON.js → chunk-4OHPDEZM.js} +54 -20
- package/dist/chunk-4OHPDEZM.js.map +1 -0
- package/dist/{chunk-Y74EXABN.js → chunk-7HB7NDOY.js} +2 -2
- package/dist/{chunk-HERFK7W6.js → chunk-W4P2U5VF.js} +2 -1
- package/dist/index.js +1 -1
- package/dist/{server-6CHHLOII.js → server-V5XMVRYE.js} +209 -47
- package/dist/server-V5XMVRYE.js.map +1 -0
- package/dist/{worker-D4D2YQTA.js → worker-UT4ZQU2T.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-JQKZWEON.js.map +0 -1
- package/dist/server-6CHHLOII.js.map +0 -1
- /package/dist/{chunk-Y74EXABN.js.map → chunk-7HB7NDOY.js.map} +0 -0
- /package/dist/{chunk-HERFK7W6.js.map → chunk-W4P2U5VF.js.map} +0 -0
- /package/dist/{worker-D4D2YQTA.js.map → worker-UT4ZQU2T.js.map} +0 -0
package/dist/bin/api-server.cjs
CHANGED
|
@@ -30,6 +30,26 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
30
|
mod
|
|
31
31
|
));
|
|
32
32
|
|
|
33
|
+
// src/harvest-timeout.ts
|
|
34
|
+
function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
|
|
35
|
+
const requested = Number.isFinite(maxQuestions) && maxQuestions > 0 ? Math.trunc(maxQuestions) : 30;
|
|
36
|
+
let serverMs;
|
|
37
|
+
if (serpOnly || requested <= 50) serverMs = 11e4;
|
|
38
|
+
else if (requested <= 100) serverMs = 18e4;
|
|
39
|
+
else if (requested <= 150) serverMs = 24e4;
|
|
40
|
+
else serverMs = 28e4;
|
|
41
|
+
const clientMs = Math.min(serverMs + CLIENT_OVER_SERVER_MARGIN_MS, VERCEL_FUNCTION_MAX_MS - 5e3);
|
|
42
|
+
return { serverMs, clientMs };
|
|
43
|
+
}
|
|
44
|
+
var VERCEL_FUNCTION_MAX_MS, CLIENT_OVER_SERVER_MARGIN_MS;
|
|
45
|
+
var init_harvest_timeout = __esm({
|
|
46
|
+
"src/harvest-timeout.ts"() {
|
|
47
|
+
"use strict";
|
|
48
|
+
VERCEL_FUNCTION_MAX_MS = 3e5;
|
|
49
|
+
CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
33
53
|
// src/blog/registry.ts
|
|
34
54
|
var posts;
|
|
35
55
|
var init_registry = __esm({
|
|
@@ -3825,25 +3845,73 @@ function firstFont(fontFamily) {
|
|
|
3825
3845
|
const first = fontFamily.split(",")[0].trim().replace(/['"]/g, "");
|
|
3826
3846
|
return first || null;
|
|
3827
3847
|
}
|
|
3848
|
+
function dominantColor(freq) {
|
|
3849
|
+
return Object.entries(freq).filter(([hex]) => !isTransparentOrWhite(hex) && hex !== "#000000" && hex !== "#020101").sort((a, b) => b[1] - a[1])[0]?.[0] ?? null;
|
|
3850
|
+
}
|
|
3828
3851
|
async function extractBrandingFromPage(page) {
|
|
3829
3852
|
const evalScript = `
|
|
3830
3853
|
(function() {
|
|
3831
3854
|
function cs(el) { return el ? window.getComputedStyle(el) : null; }
|
|
3855
|
+
function toHex(rgb) {
|
|
3856
|
+
var m = rgb && rgb.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/);
|
|
3857
|
+
if (!m) return null;
|
|
3858
|
+
return '#' + [m[1],m[2],m[3]].map(function(v){ return ('0'+parseInt(v).toString(16)).slice(-2); }).join('');
|
|
3859
|
+
}
|
|
3860
|
+
function isUsable(hex) {
|
|
3861
|
+
if (!hex) return false;
|
|
3862
|
+
if (hex === '#000000' || hex === '#020101' || hex === '#ffffff' || hex === '#fffffe') return false;
|
|
3863
|
+
var r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
|
|
3864
|
+
return (0.2126*r + 0.7152*g + 0.0722*b) <= 230;
|
|
3865
|
+
}
|
|
3866
|
+
|
|
3832
3867
|
var navEl = document.querySelector('nav, header, [role="banner"]');
|
|
3833
3868
|
var bodyEl = document.body;
|
|
3834
3869
|
var h1El = document.querySelector('h1');
|
|
3835
3870
|
var btnEl = document.querySelector(
|
|
3836
3871
|
'a.btn-primary, button.btn-primary, .btn-primary, .cta-btn,' +
|
|
3837
3872
|
'a.button--primary, button.button--primary, [class*="btn-cta"],' +
|
|
3838
|
-
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a'
|
|
3873
|
+
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a,' +
|
|
3874
|
+
'.elementor-button, .elementor-button-link,' +
|
|
3875
|
+
'.et_pb_button,' +
|
|
3876
|
+
'.fl-button,' +
|
|
3877
|
+
'.vc_btn,' +
|
|
3878
|
+
'[class*="cta"][href], [class*="get-started"], [class*="contact-btn"]'
|
|
3839
3879
|
);
|
|
3840
3880
|
var navStyle = cs(navEl);
|
|
3841
3881
|
var bodyStyle = cs(bodyEl);
|
|
3842
3882
|
var h1Style = cs(h1El);
|
|
3843
3883
|
var btnStyle = cs(btnEl);
|
|
3844
|
-
|
|
3884
|
+
|
|
3885
|
+
var svgFreq = {};
|
|
3886
|
+
var svgScope = navEl || document.querySelector('header, [role="banner"]');
|
|
3887
|
+
if (svgScope) {
|
|
3888
|
+
var svgEls = svgScope.querySelectorAll('svg *, [fill], path, circle, rect, polygon, polyline');
|
|
3889
|
+
for (var si = 0; si < svgEls.length; si++) {
|
|
3890
|
+
var svgEl = svgEls[si];
|
|
3891
|
+
var fillComp = cs(svgEl) ? cs(svgEl).fill : null;
|
|
3892
|
+
var fillAttr = svgEl.getAttribute('fill');
|
|
3893
|
+
var fillHex = null;
|
|
3894
|
+
if (fillComp && fillComp !== 'none') { fillHex = toHex(fillComp); }
|
|
3895
|
+
else if (fillAttr && fillAttr !== 'none' && fillAttr.startsWith('#')) { fillHex = fillAttr; }
|
|
3896
|
+
if (fillHex && isUsable(fillHex)) { svgFreq[fillHex] = (svgFreq[fillHex] || 0) + 1; }
|
|
3897
|
+
}
|
|
3898
|
+
}
|
|
3899
|
+
|
|
3900
|
+
var navChildBgFreq = {};
|
|
3901
|
+
if (navEl) {
|
|
3902
|
+
var navChildren = navEl.querySelectorAll('li, a, button, [class*="menu-item"]');
|
|
3903
|
+
for (var ni = 0; ni < navChildren.length; ni++) {
|
|
3904
|
+
var nbg = cs(navChildren[ni]);
|
|
3905
|
+
if (nbg) {
|
|
3906
|
+
var bghex = toHex(nbg.backgroundColor);
|
|
3907
|
+
if (bghex && isUsable(bghex)) { navChildBgFreq[bghex] = (navChildBgFreq[bghex] || 0) + 1; }
|
|
3908
|
+
}
|
|
3909
|
+
}
|
|
3910
|
+
}
|
|
3911
|
+
|
|
3912
|
+
var pageHost = window.location.hostname.replace(/^www\\./, '');
|
|
3845
3913
|
function isSameDomain(src) {
|
|
3846
|
-
try { return new URL(src).hostname.replace(/^www
|
|
3914
|
+
try { return new URL(src).hostname.replace(/^www\\./, '').endsWith(pageHost); } catch { return false; }
|
|
3847
3915
|
}
|
|
3848
3916
|
var logoSelectors = [
|
|
3849
3917
|
'header img[class*="logo"]', 'nav img[class*="logo"]',
|
|
@@ -3866,22 +3934,27 @@ async function extractBrandingFromPage(page) {
|
|
|
3866
3934
|
'link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'
|
|
3867
3935
|
);
|
|
3868
3936
|
return {
|
|
3869
|
-
navBg:
|
|
3870
|
-
bodyBg:
|
|
3871
|
-
bodyColor:
|
|
3872
|
-
h1Color:
|
|
3873
|
-
btnBg:
|
|
3874
|
-
bodyFont:
|
|
3875
|
-
h1Font:
|
|
3876
|
-
logoSrc:
|
|
3877
|
-
faviconHref:
|
|
3937
|
+
navBg: navStyle ? navStyle.backgroundColor : null,
|
|
3938
|
+
bodyBg: bodyStyle ? bodyStyle.backgroundColor : null,
|
|
3939
|
+
bodyColor: bodyStyle ? bodyStyle.color : null,
|
|
3940
|
+
h1Color: h1Style ? h1Style.color : null,
|
|
3941
|
+
btnBg: btnStyle ? btnStyle.backgroundColor : null,
|
|
3942
|
+
bodyFont: bodyStyle ? bodyStyle.fontFamily : null,
|
|
3943
|
+
h1Font: h1Style ? h1Style.fontFamily : null,
|
|
3944
|
+
logoSrc: logoSrc,
|
|
3945
|
+
faviconHref: faviconEl ? faviconEl.href : null,
|
|
3946
|
+
svgFreq: svgFreq,
|
|
3947
|
+
navChildBgFreq: navChildBgFreq,
|
|
3878
3948
|
};
|
|
3879
3949
|
})()
|
|
3880
3950
|
`;
|
|
3881
3951
|
const raw = await page.evaluate(evalScript);
|
|
3882
3952
|
const navBgHex = rgbToHex(raw.navBg ?? "");
|
|
3883
3953
|
const bodyBgHex = rgbToHex(raw.bodyBg ?? "");
|
|
3884
|
-
const
|
|
3954
|
+
const navBgUsable = navBgHex && !isTransparentOrWhite(navBgHex) && navBgHex !== "#000000" && navBgHex !== "#020101" ? navBgHex : null;
|
|
3955
|
+
const svgPrimary = dominantColor(raw.svgFreq ?? {});
|
|
3956
|
+
const navChildBg = dominantColor(raw.navChildBgFreq ?? {});
|
|
3957
|
+
const primary = navBgUsable ?? svgPrimary ?? navChildBg ?? bodyBgHex;
|
|
3885
3958
|
const accent = rgbToHex(raw.btnBg ?? "");
|
|
3886
3959
|
const text = rgbToHex(raw.bodyColor ?? "");
|
|
3887
3960
|
const heading = rgbToHex(raw.h1Color ?? "");
|
|
@@ -10700,237 +10773,833 @@ var init_FacebookAdExtractor = __esm({
|
|
|
10700
10773
|
}
|
|
10701
10774
|
});
|
|
10702
10775
|
|
|
10703
|
-
// src/
|
|
10704
|
-
function
|
|
10705
|
-
|
|
10706
|
-
|
|
10707
|
-
|
|
10708
|
-
|
|
10709
|
-
|
|
10710
|
-
|
|
10711
|
-
|
|
10712
|
-
|
|
10713
|
-
|
|
10714
|
-
if (body.pageId?.trim()) return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&is_targeted_country=false&media_type=all&search_type=page&view_all_page_id=${body.pageId.trim()}`;
|
|
10715
|
-
return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
|
|
10716
|
-
}
|
|
10717
|
-
function kernelLaunchOpts() {
|
|
10718
|
-
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
10719
|
-
}
|
|
10720
|
-
var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
|
|
10721
|
-
var init_facebook_ad_routes = __esm({
|
|
10722
|
-
"src/api/facebook-ad-routes.ts"() {
|
|
10723
|
-
"use strict";
|
|
10724
|
-
import_hono4 = require("hono");
|
|
10725
|
-
import_zod15 = require("zod");
|
|
10726
|
-
init_db();
|
|
10727
|
-
init_rates();
|
|
10728
|
-
init_BrowserDriver();
|
|
10729
|
-
init_FacebookAdExtractor();
|
|
10730
|
-
import_client3 = require("@fal-ai/client");
|
|
10731
|
-
init_api_auth();
|
|
10732
|
-
init_url_utils();
|
|
10733
|
-
FacebookAdBodySchema = import_zod15.z.object({
|
|
10734
|
-
url: import_zod15.z.string().trim().optional(),
|
|
10735
|
-
libraryId: import_zod15.z.string().trim().optional(),
|
|
10736
|
-
openModal: import_zod15.z.boolean().optional()
|
|
10737
|
-
}).refine((d) => !!d.url || !!d.libraryId, { message: "url or libraryId is required" });
|
|
10738
|
-
FacebookPageIntelBodySchema = import_zod15.z.object({
|
|
10739
|
-
pageId: import_zod15.z.string().trim().optional(),
|
|
10740
|
-
query: import_zod15.z.string().trim().optional(),
|
|
10741
|
-
libraryId: import_zod15.z.string().trim().optional(),
|
|
10742
|
-
maxAds: import_zod15.z.number().int().min(1).max(200).optional(),
|
|
10743
|
-
country: import_zod15.z.string().trim().toUpperCase().optional()
|
|
10744
|
-
}).refine((d) => !!d.pageId || !!d.query || !!d.libraryId, {
|
|
10745
|
-
message: "pageId, libraryId, or query is required"
|
|
10746
|
-
});
|
|
10747
|
-
FacebookTranscribeBodySchema = import_zod15.z.object({
|
|
10748
|
-
videoUrl: import_zod15.z.string().trim().min(1, "videoUrl is required")
|
|
10749
|
-
});
|
|
10750
|
-
FacebookSearchBodySchema = import_zod15.z.object({
|
|
10751
|
-
query: import_zod15.z.string().trim().min(1, "query is required"),
|
|
10752
|
-
country: import_zod15.z.string().trim().toUpperCase().optional(),
|
|
10753
|
-
maxResults: import_zod15.z.number().int().min(1).max(20).optional()
|
|
10754
|
-
});
|
|
10755
|
-
FacebookMediaBodySchema = import_zod15.z.object({
|
|
10756
|
-
url: import_zod15.z.string().trim().min(1, "url is required"),
|
|
10757
|
-
filename: import_zod15.z.string().trim().optional()
|
|
10758
|
-
});
|
|
10759
|
-
facebookAdApp = new import_hono4.Hono();
|
|
10760
|
-
facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
|
|
10761
|
-
const raw = await c.req.json().catch(() => ({}));
|
|
10762
|
-
const parsed = FacebookAdBodySchema.safeParse(raw);
|
|
10763
|
-
if (!parsed.success) {
|
|
10764
|
-
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
10765
|
-
}
|
|
10766
|
-
const body = parsed.data;
|
|
10767
|
-
const raw2 = body.url?.trim() ?? body.libraryId?.trim() ?? "";
|
|
10768
|
-
const libraryId = FacebookAdExtractor.resolveLibraryId(raw2);
|
|
10769
|
-
if (!libraryId) return c.json({ error: "Could not resolve a valid Facebook Ad Library ID from the provided input" }, 400);
|
|
10770
|
-
const fbUser = c.get("user");
|
|
10771
|
-
const { ok: adOk, balance_mc: adBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, raw2);
|
|
10772
|
-
if (!adOk) return c.json(insufficientBalanceResponse(adBal, MC_COSTS.fb_ad), 402);
|
|
10773
|
-
const driver = new BrowserDriver();
|
|
10774
|
-
try {
|
|
10775
|
-
await driver.launch(kernelLaunchOpts());
|
|
10776
|
-
const extractor = new FacebookAdExtractor(driver);
|
|
10777
|
-
const result = await extractor.extract(libraryId, { openModal: body.openModal !== false });
|
|
10778
|
-
await logRequestEvent({
|
|
10779
|
-
userId: fbUser.id,
|
|
10780
|
-
source: "facebook_ad",
|
|
10781
|
-
status: "done",
|
|
10782
|
-
query: raw2,
|
|
10783
|
-
resultCount: Array.isArray(result.variants) ? result.variants.length : null,
|
|
10784
|
-
result
|
|
10785
|
-
});
|
|
10786
|
-
return c.json(result);
|
|
10787
|
-
} catch (err) {
|
|
10788
|
-
await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
|
|
10789
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
10790
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_ad", status: "failed", query: raw2, error: msg });
|
|
10791
|
-
if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
|
|
10792
|
-
return c.json({ error: msg }, 503);
|
|
10793
|
-
}
|
|
10794
|
-
return c.json({ error: msg }, 500);
|
|
10795
|
-
} finally {
|
|
10796
|
-
await driver.close();
|
|
10797
|
-
}
|
|
10798
|
-
});
|
|
10799
|
-
facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
|
|
10800
|
-
const raw = await c.req.json().catch(() => ({}));
|
|
10801
|
-
const parsed = FacebookPageIntelBodySchema.safeParse(raw);
|
|
10802
|
-
if (!parsed.success) {
|
|
10803
|
-
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
10804
|
-
}
|
|
10805
|
-
const body = parsed.data;
|
|
10806
|
-
const maxAds = Math.min(200, Math.max(1, body.maxAds ?? 50));
|
|
10807
|
-
const country = body.country?.trim().toUpperCase() ?? "US";
|
|
10808
|
-
const listingUrl = buildPageIntelUrl(body, country);
|
|
10809
|
-
const fbUser = c.get("user");
|
|
10810
|
-
const { ok: fbOk, balance_mc: fbBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, body.pageId ?? body.query ?? body.libraryId ?? "");
|
|
10811
|
-
if (!fbOk) return c.json(insufficientBalanceResponse(fbBal, MC_COSTS.fb_ad), 402);
|
|
10812
|
-
const driver = new BrowserDriver();
|
|
10813
|
-
let refunded = false;
|
|
10776
|
+
// src/extractor/FacebookAdGraphql.ts
|
|
10777
|
+
function parseFbGraphqlJson(text) {
|
|
10778
|
+
const out = [];
|
|
10779
|
+
const body = text.replace(/^for\s*\(;;\);/, "").trim();
|
|
10780
|
+
try {
|
|
10781
|
+
out.push(JSON.parse(body));
|
|
10782
|
+
return out;
|
|
10783
|
+
} catch {
|
|
10784
|
+
for (const line of body.split("\n")) {
|
|
10785
|
+
const trimmed = line.trim();
|
|
10786
|
+
if (!trimmed) continue;
|
|
10814
10787
|
try {
|
|
10815
|
-
|
|
10816
|
-
|
|
10817
|
-
|
|
10818
|
-
const result = await extractor.extractPageIntel(listingUrl, maxAds);
|
|
10819
|
-
if (result.ads.length === 0 && await detectSoftBlock(driver)) {
|
|
10820
|
-
await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "soft-block empty result");
|
|
10821
|
-
refunded = true;
|
|
10822
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: "soft-block: empty result refunded" });
|
|
10823
|
-
return c.json({ error: "soft-block: no ads returned (refunded)" }, 503);
|
|
10824
|
-
}
|
|
10825
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "done", query: body.pageId ?? body.query ?? body.libraryId ?? "", resultCount: result.ads.length, result });
|
|
10826
|
-
return c.json(result);
|
|
10827
|
-
} catch (err) {
|
|
10828
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
10829
|
-
if (!refunded) await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
|
|
10830
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: msg });
|
|
10831
|
-
if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
|
|
10832
|
-
return c.json({ error: msg }, 503);
|
|
10833
|
-
}
|
|
10834
|
-
return c.json({ error: msg }, 500);
|
|
10835
|
-
} finally {
|
|
10836
|
-
await driver.close();
|
|
10837
|
-
}
|
|
10838
|
-
});
|
|
10839
|
-
facebookAdApp.post("/transcribe", createApiKeyAuth(), async (c) => {
|
|
10840
|
-
const raw = await c.req.json().catch(() => ({}));
|
|
10841
|
-
const parsed = FacebookTranscribeBodySchema.safeParse(raw);
|
|
10842
|
-
if (!parsed.success) {
|
|
10843
|
-
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
10844
|
-
}
|
|
10845
|
-
const body = parsed.data;
|
|
10846
|
-
const urlCheck = await validatePublicHttpUrl(body.videoUrl, { field: "videoUrl", requireHttps: false });
|
|
10847
|
-
if (urlCheck.error) {
|
|
10848
|
-
return c.json(invalidRequest(urlCheck.error), 400);
|
|
10788
|
+
out.push(JSON.parse(trimmed));
|
|
10789
|
+
} catch {
|
|
10790
|
+
continue;
|
|
10849
10791
|
}
|
|
10850
|
-
|
|
10851
|
-
|
|
10852
|
-
|
|
10853
|
-
|
|
10854
|
-
|
|
10855
|
-
|
|
10856
|
-
|
|
10857
|
-
|
|
10858
|
-
|
|
10859
|
-
|
|
10860
|
-
|
|
10861
|
-
|
|
10862
|
-
|
|
10863
|
-
|
|
10864
|
-
|
|
10865
|
-
|
|
10866
|
-
|
|
10867
|
-
|
|
10868
|
-
|
|
10869
|
-
|
|
10870
|
-
|
|
10871
|
-
|
|
10872
|
-
|
|
10792
|
+
}
|
|
10793
|
+
return out;
|
|
10794
|
+
}
|
|
10795
|
+
}
|
|
10796
|
+
function extractCollatedResults(payload) {
|
|
10797
|
+
const root = payload;
|
|
10798
|
+
const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
|
|
10799
|
+
const results = [];
|
|
10800
|
+
for (const edge of edges) {
|
|
10801
|
+
const node = edge?.node;
|
|
10802
|
+
for (const raw of node?.collated_results ?? []) {
|
|
10803
|
+
const r = raw;
|
|
10804
|
+
const id = r.ad_archive_id;
|
|
10805
|
+
if (id === void 0 || id === null) continue;
|
|
10806
|
+
const snapshot = r.snapshot ?? null;
|
|
10807
|
+
results.push({
|
|
10808
|
+
ad_archive_id: String(id),
|
|
10809
|
+
page_id: r.page_id != null ? String(r.page_id) : "",
|
|
10810
|
+
page_name: r.page_name ?? snapshot?.page_name ?? "",
|
|
10811
|
+
is_active: Boolean(r.is_active),
|
|
10812
|
+
collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
|
|
10813
|
+
snapshot
|
|
10814
|
+
});
|
|
10815
|
+
}
|
|
10816
|
+
}
|
|
10817
|
+
return results;
|
|
10818
|
+
}
|
|
10819
|
+
async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
|
|
10820
|
+
const captureMs = opts.captureMs ?? 3e4;
|
|
10821
|
+
const collected = [];
|
|
10822
|
+
const seen = /* @__PURE__ */ new Set();
|
|
10823
|
+
const handler = (resp) => {
|
|
10824
|
+
if (!resp.url().includes("/api/graphql")) return;
|
|
10825
|
+
const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
|
|
10826
|
+
if (friendlyName !== AD_LIBRARY_QUERY) return;
|
|
10827
|
+
void resp.text().then((text) => {
|
|
10828
|
+
for (const payload of parseFbGraphqlJson(text)) {
|
|
10829
|
+
for (const result of extractCollatedResults(payload)) {
|
|
10830
|
+
if (seen.has(result.ad_archive_id)) continue;
|
|
10831
|
+
seen.add(result.ad_archive_id);
|
|
10832
|
+
collected.push(result);
|
|
10873
10833
|
}
|
|
10874
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "done", query: videoUrl, resultCount: chunks.length, result: { text, chunks, durationMs } });
|
|
10875
|
-
return c.json({ text, chunks, durationMs, markdown: lines.join("\n") });
|
|
10876
|
-
} catch (err) {
|
|
10877
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
10878
|
-
await creditMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE_REFUND, "failed call");
|
|
10879
|
-
await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "failed", query: videoUrl, error: msg });
|
|
10880
|
-
return c.json({ error: msg }, 500);
|
|
10881
10834
|
}
|
|
10882
|
-
});
|
|
10883
|
-
|
|
10884
|
-
|
|
10885
|
-
|
|
10886
|
-
|
|
10887
|
-
|
|
10835
|
+
}).catch(() => void 0);
|
|
10836
|
+
};
|
|
10837
|
+
page.on("response", handler);
|
|
10838
|
+
try {
|
|
10839
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
|
|
10840
|
+
const deadline = Date.now() + captureMs;
|
|
10841
|
+
let lastCount = -1;
|
|
10842
|
+
let stableRounds = 0;
|
|
10843
|
+
while (Date.now() < deadline && collected.length < maxResults) {
|
|
10844
|
+
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
|
|
10845
|
+
await page.waitForTimeout(2e3);
|
|
10846
|
+
if (collected.length === lastCount) {
|
|
10847
|
+
stableRounds++;
|
|
10848
|
+
if (stableRounds >= 2 && collected.length > 0) break;
|
|
10849
|
+
} else {
|
|
10850
|
+
stableRounds = 0;
|
|
10888
10851
|
}
|
|
10889
|
-
|
|
10890
|
-
|
|
10891
|
-
|
|
10892
|
-
|
|
10893
|
-
|
|
10894
|
-
|
|
10895
|
-
|
|
10896
|
-
|
|
10897
|
-
|
|
10898
|
-
|
|
10899
|
-
|
|
10900
|
-
|
|
10901
|
-
|
|
10902
|
-
|
|
10903
|
-
|
|
10904
|
-
|
|
10905
|
-
|
|
10906
|
-
|
|
10907
|
-
|
|
10908
|
-
|
|
10909
|
-
|
|
10910
|
-
|
|
10911
|
-
|
|
10912
|
-
|
|
10913
|
-
|
|
10914
|
-
|
|
10915
|
-
|
|
10916
|
-
|
|
10917
|
-
|
|
10918
|
-
|
|
10919
|
-
|
|
10920
|
-
|
|
10921
|
-
|
|
10922
|
-
|
|
10923
|
-
|
|
10924
|
-
|
|
10925
|
-
|
|
10926
|
-
|
|
10927
|
-
|
|
10928
|
-
|
|
10929
|
-
|
|
10930
|
-
|
|
10931
|
-
|
|
10932
|
-
|
|
10933
|
-
|
|
10852
|
+
lastCount = collected.length;
|
|
10853
|
+
}
|
|
10854
|
+
} finally {
|
|
10855
|
+
page.off("response", handler);
|
|
10856
|
+
}
|
|
10857
|
+
return collected.slice(0, maxResults);
|
|
10858
|
+
}
|
|
10859
|
+
function advertisersFromResults(results, maxResults) {
|
|
10860
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
10861
|
+
for (const r of results) {
|
|
10862
|
+
if (!r.page_id || !r.page_name) continue;
|
|
10863
|
+
const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
|
|
10864
|
+
const existing = byPage.get(r.page_id);
|
|
10865
|
+
if (existing) {
|
|
10866
|
+
existing.resultCount++;
|
|
10867
|
+
existing.maxCollation = Math.max(existing.maxCollation, collation);
|
|
10868
|
+
} else {
|
|
10869
|
+
byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
|
|
10870
|
+
}
|
|
10871
|
+
}
|
|
10872
|
+
return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
10873
|
+
}
|
|
10874
|
+
var AD_LIBRARY_QUERY;
|
|
10875
|
+
var init_FacebookAdGraphql = __esm({
|
|
10876
|
+
"src/extractor/FacebookAdGraphql.ts"() {
|
|
10877
|
+
"use strict";
|
|
10878
|
+
AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
|
|
10879
|
+
}
|
|
10880
|
+
});
|
|
10881
|
+
|
|
10882
|
+
// src/locations.ts
|
|
10883
|
+
var LOCATIONS;
|
|
10884
|
+
var init_locations = __esm({
|
|
10885
|
+
"src/locations.ts"() {
|
|
10886
|
+
"use strict";
|
|
10887
|
+
LOCATIONS = {
|
|
10888
|
+
"austin": "Austin,Texas,United States",
|
|
10889
|
+
"new york": "New York,New York,United States",
|
|
10890
|
+
"new york city": "New York,New York,United States",
|
|
10891
|
+
"nyc": "New York,New York,United States",
|
|
10892
|
+
"los angeles": "Los Angeles,California,United States",
|
|
10893
|
+
"la": "Los Angeles,California,United States",
|
|
10894
|
+
"chicago": "Chicago,Illinois,United States",
|
|
10895
|
+
"houston": "Houston,Texas,United States",
|
|
10896
|
+
"phoenix": "Phoenix,Arizona,United States",
|
|
10897
|
+
"philadelphia": "Philadelphia,Pennsylvania,United States",
|
|
10898
|
+
"philly": "Philadelphia,Pennsylvania,United States",
|
|
10899
|
+
"san antonio": "San Antonio,Texas,United States",
|
|
10900
|
+
"dallas": "Dallas,Texas,United States",
|
|
10901
|
+
"miami": "Miami,Florida,United States",
|
|
10902
|
+
"seattle": "Seattle,Washington,United States",
|
|
10903
|
+
"denver": "Denver,Colorado,United States",
|
|
10904
|
+
"loveland": "Loveland,Colorado,United States",
|
|
10905
|
+
"loveland co": "Loveland,Colorado,United States",
|
|
10906
|
+
"fort collins": "Fort Collins,Colorado,United States",
|
|
10907
|
+
"boulder": "Boulder,Colorado,United States",
|
|
10908
|
+
"colorado springs": "Colorado Springs,Colorado,United States",
|
|
10909
|
+
"boston": "Boston,Massachusetts,United States",
|
|
10910
|
+
"atlanta": "Atlanta,Georgia,United States",
|
|
10911
|
+
"san francisco": "San Francisco,California,United States",
|
|
10912
|
+
"sf": "San Francisco,California,United States",
|
|
10913
|
+
"portland": "Portland,Oregon,United States",
|
|
10914
|
+
"las vegas": "Las Vegas,Nevada,United States",
|
|
10915
|
+
"minneapolis": "Minneapolis,Minnesota,United States",
|
|
10916
|
+
"detroit": "Detroit,Michigan,United States",
|
|
10917
|
+
"nashville": "Nashville,Tennessee,United States",
|
|
10918
|
+
"charlotte": "Charlotte,North Carolina,United States",
|
|
10919
|
+
"orlando": "Orlando,Florida,United States",
|
|
10920
|
+
"san diego": "San Diego,California,United States",
|
|
10921
|
+
"baltimore": "Baltimore,Maryland,United States",
|
|
10922
|
+
"sacramento": "Sacramento,California,United States",
|
|
10923
|
+
"columbus": "Columbus,Ohio,United States",
|
|
10924
|
+
"indianapolis": "Indianapolis,Indiana,United States",
|
|
10925
|
+
"san jose": "San Jose,California,United States",
|
|
10926
|
+
"fort worth": "Fort Worth,Texas,United States",
|
|
10927
|
+
"jacksonville": "Jacksonville,Florida,United States",
|
|
10928
|
+
"memphis": "Memphis,Tennessee,United States",
|
|
10929
|
+
"louisville": "Louisville,Kentucky,United States",
|
|
10930
|
+
"raleigh": "Raleigh,North Carolina,United States",
|
|
10931
|
+
"richmond": "Richmond,Virginia,United States",
|
|
10932
|
+
"salt lake city": "Salt Lake City,Utah,United States",
|
|
10933
|
+
"toronto": "Toronto,Ontario,Canada",
|
|
10934
|
+
"vancouver": "Vancouver,British Columbia,Canada",
|
|
10935
|
+
"montreal": "Montreal,Quebec,Canada",
|
|
10936
|
+
"calgary": "Calgary,Alberta,Canada",
|
|
10937
|
+
"ottawa": "Ottawa,Ontario,Canada",
|
|
10938
|
+
"london": "London,England,United Kingdom",
|
|
10939
|
+
"manchester": "Manchester,England,United Kingdom",
|
|
10940
|
+
"birmingham": "Birmingham,England,United Kingdom",
|
|
10941
|
+
"edinburgh": "Edinburgh,Scotland,United Kingdom",
|
|
10942
|
+
"glasgow": "Glasgow,Scotland,United Kingdom",
|
|
10943
|
+
"leeds": "Leeds,England,United Kingdom",
|
|
10944
|
+
"sydney": "Sydney,New South Wales,Australia",
|
|
10945
|
+
"melbourne": "Melbourne,Victoria,Australia",
|
|
10946
|
+
"brisbane": "Brisbane,Queensland,Australia",
|
|
10947
|
+
"perth": "Perth,Western Australia,Australia",
|
|
10948
|
+
"adelaide": "Adelaide,South Australia,Australia",
|
|
10949
|
+
"dublin": "Dublin,Leinster,Ireland"
|
|
10950
|
+
};
|
|
10951
|
+
}
|
|
10952
|
+
});
|
|
10953
|
+
|
|
10954
|
+
// src/uule.ts
|
|
10955
|
+
function encodeVarint(value) {
|
|
10956
|
+
const bytes = [];
|
|
10957
|
+
let remaining = value;
|
|
10958
|
+
do {
|
|
10959
|
+
let byte = remaining & 127;
|
|
10960
|
+
remaining >>>= 7;
|
|
10961
|
+
if (remaining > 0) byte |= 128;
|
|
10962
|
+
bytes.push(byte);
|
|
10963
|
+
} while (remaining > 0);
|
|
10964
|
+
return bytes;
|
|
10965
|
+
}
|
|
10966
|
+
function encodeUule(name) {
|
|
10967
|
+
const locationBytes = Buffer.from(name, "utf8");
|
|
10968
|
+
const payload = Buffer.concat([
|
|
10969
|
+
Buffer.from([8, 2, 16, 32, 34]),
|
|
10970
|
+
Buffer.from(encodeVarint(locationBytes.length)),
|
|
10971
|
+
locationBytes
|
|
10972
|
+
]);
|
|
10973
|
+
return `w+${payload.toString("base64")}`;
|
|
10974
|
+
}
|
|
10975
|
+
function normalizeLocation(input) {
|
|
10976
|
+
const raw = input.toLowerCase().trim();
|
|
10977
|
+
if (LOCATIONS[raw]) return LOCATIONS[raw];
|
|
10978
|
+
const beforeComma = raw.split(",")[0].trim();
|
|
10979
|
+
if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
|
|
10980
|
+
const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
|
|
10981
|
+
if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
|
|
10982
|
+
return input;
|
|
10983
|
+
}
|
|
10984
|
+
var init_uule = __esm({
|
|
10985
|
+
"src/uule.ts"() {
|
|
10986
|
+
"use strict";
|
|
10987
|
+
init_locations();
|
|
10988
|
+
}
|
|
10989
|
+
});
|
|
10990
|
+
|
|
10991
|
+
// src/kernel-proxy-resolver.ts
|
|
10992
|
+
function proxyIdSuffix2(proxyId) {
|
|
10993
|
+
return proxyId ? proxyId.slice(-6) : null;
|
|
10994
|
+
}
|
|
10995
|
+
function resolution(source, proxyMode, proxyId, target, error) {
|
|
10996
|
+
return {
|
|
10997
|
+
kernelProxyId: proxyId,
|
|
10998
|
+
resolution: {
|
|
10999
|
+
source,
|
|
11000
|
+
proxyMode,
|
|
11001
|
+
proxyIdPresent: Boolean(proxyId),
|
|
11002
|
+
proxyIdSuffix: proxyIdSuffix2(proxyId),
|
|
11003
|
+
target,
|
|
11004
|
+
error
|
|
11005
|
+
}
|
|
11006
|
+
};
|
|
11007
|
+
}
|
|
11008
|
+
function normalizeStateName(value) {
|
|
11009
|
+
return value.trim().toLowerCase().replace(/\s+/g, " ");
|
|
11010
|
+
}
|
|
11011
|
+
function normalizeCountryName(value) {
|
|
11012
|
+
return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
|
|
11013
|
+
}
|
|
11014
|
+
function isUnitedStates(country) {
|
|
11015
|
+
if (!country) return true;
|
|
11016
|
+
const normalized = normalizeCountryName(country);
|
|
11017
|
+
return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
|
|
11018
|
+
}
|
|
11019
|
+
function stateCodeFor(region) {
|
|
11020
|
+
const trimmed = region.trim();
|
|
11021
|
+
if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
|
|
11022
|
+
return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
|
|
11023
|
+
}
|
|
11024
|
+
function kernelCityIdentifierCandidates(city) {
|
|
11025
|
+
const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
|
|
11026
|
+
const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
|
|
11027
|
+
const underscored = words.join("_");
|
|
11028
|
+
const compact = words.join("");
|
|
11029
|
+
return Array.from(new Set([underscored, compact].filter(Boolean)));
|
|
11030
|
+
}
|
|
11031
|
+
function proxyName(country, state, city) {
|
|
11032
|
+
return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
|
|
11033
|
+
}
|
|
11034
|
+
function zipProxyName(zip) {
|
|
11035
|
+
return `mcp-serp-residential-us-zip-${zip}`;
|
|
11036
|
+
}
|
|
11037
|
+
function parseKernelLocationProxyTarget(location, gl) {
|
|
11038
|
+
if (!location || gl.toLowerCase() !== "us") return null;
|
|
11039
|
+
const canonicalLocation = normalizeLocation(location);
|
|
11040
|
+
let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
|
|
11041
|
+
if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
|
|
11042
|
+
parts = parts.slice(0, -1);
|
|
11043
|
+
}
|
|
11044
|
+
if (parts.length === 1) {
|
|
11045
|
+
const stateOnly = stateCodeFor(parts[0]);
|
|
11046
|
+
if (!stateOnly) return null;
|
|
11047
|
+
return {
|
|
11048
|
+
canonicalLocation,
|
|
11049
|
+
level: "state",
|
|
11050
|
+
country: "US",
|
|
11051
|
+
state: stateOnly,
|
|
11052
|
+
city: "",
|
|
11053
|
+
cityCandidates: [],
|
|
11054
|
+
proxyName: proxyName("US", stateOnly),
|
|
11055
|
+
config: {
|
|
11056
|
+
country: "US",
|
|
11057
|
+
state: stateOnly
|
|
11058
|
+
}
|
|
11059
|
+
};
|
|
11060
|
+
}
|
|
11061
|
+
const [city = "", region = ""] = parts;
|
|
11062
|
+
if (!city || !region) return null;
|
|
11063
|
+
const state = stateCodeFor(region);
|
|
11064
|
+
if (!state) return null;
|
|
11065
|
+
const cityCandidates = kernelCityIdentifierCandidates(city);
|
|
11066
|
+
const primaryCity = cityCandidates[0];
|
|
11067
|
+
if (!primaryCity) return null;
|
|
11068
|
+
return {
|
|
11069
|
+
canonicalLocation,
|
|
11070
|
+
level: "city",
|
|
11071
|
+
country: "US",
|
|
11072
|
+
state,
|
|
11073
|
+
city: primaryCity,
|
|
11074
|
+
cityCandidates,
|
|
11075
|
+
proxyName: proxyName("US", state, primaryCity),
|
|
11076
|
+
config: {
|
|
11077
|
+
country: "US",
|
|
11078
|
+
state,
|
|
11079
|
+
city: primaryCity
|
|
11080
|
+
}
|
|
11081
|
+
};
|
|
11082
|
+
}
|
|
11083
|
+
function cityZipKey(target) {
|
|
11084
|
+
return `${target.city}|${target.state}`;
|
|
11085
|
+
}
|
|
11086
|
+
function knownZipFor(target, explicitZip) {
|
|
11087
|
+
if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
|
|
11088
|
+
return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
|
|
11089
|
+
}
|
|
11090
|
+
function zipTarget(target, zip) {
|
|
11091
|
+
return {
|
|
11092
|
+
...target,
|
|
11093
|
+
level: "zip",
|
|
11094
|
+
zip,
|
|
11095
|
+
proxyName: zipProxyName(zip),
|
|
11096
|
+
config: {
|
|
11097
|
+
country: target.country,
|
|
11098
|
+
state: target.state,
|
|
11099
|
+
zip
|
|
11100
|
+
}
|
|
11101
|
+
};
|
|
11102
|
+
}
|
|
11103
|
+
function configMatches(config, target, city) {
|
|
11104
|
+
if (target.level === "zip") {
|
|
11105
|
+
return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
|
|
11106
|
+
}
|
|
11107
|
+
return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
|
|
11108
|
+
}
|
|
11109
|
+
function findExistingTargetProxy(proxies, target) {
|
|
11110
|
+
return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
|
|
11111
|
+
}
|
|
11112
|
+
function findExistingProxy(proxies, target) {
|
|
11113
|
+
for (const city of target.cityCandidates) {
|
|
11114
|
+
const name = proxyName(target.country, target.state, city);
|
|
11115
|
+
const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
|
|
11116
|
+
if (found) return found;
|
|
11117
|
+
}
|
|
11118
|
+
return null;
|
|
11119
|
+
}
|
|
11120
|
+
function stateTarget(target) {
|
|
11121
|
+
return {
|
|
11122
|
+
...target,
|
|
11123
|
+
level: "state",
|
|
11124
|
+
proxyName: proxyName(target.country, target.state),
|
|
11125
|
+
config: {
|
|
11126
|
+
country: target.country,
|
|
11127
|
+
state: target.state
|
|
11128
|
+
}
|
|
11129
|
+
};
|
|
11130
|
+
}
|
|
11131
|
+
function findExistingStateProxy(proxies, target) {
|
|
11132
|
+
const name = proxyName(target.country, target.state);
|
|
11133
|
+
return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
|
|
11134
|
+
}
|
|
11135
|
+
function escalatedTargetLevel(target, attemptIndex) {
|
|
11136
|
+
return stateTarget(target);
|
|
11137
|
+
}
|
|
11138
|
+
function errorText2(err) {
|
|
11139
|
+
return err instanceof Error ? err.message : String(err);
|
|
11140
|
+
}
|
|
11141
|
+
async function resolveKernelProxyId(options) {
|
|
11142
|
+
if (options.proxyMode === "none") {
|
|
11143
|
+
return resolution("disabled", options.proxyMode, void 0, null, null);
|
|
11144
|
+
}
|
|
11145
|
+
if (options.proxyMode === "configured") {
|
|
11146
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
|
|
11147
|
+
}
|
|
11148
|
+
const target = parseKernelLocationProxyTarget(options.location, options.gl);
|
|
11149
|
+
if (!target || !options.kernelApiKey) {
|
|
11150
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
|
|
11151
|
+
}
|
|
11152
|
+
const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
|
|
11153
|
+
try {
|
|
11154
|
+
const attemptIndex = options.attemptIndex ?? 0;
|
|
11155
|
+
if (attemptIndex >= 1) {
|
|
11156
|
+
const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
|
|
11157
|
+
const createErrors2 = [];
|
|
11158
|
+
try {
|
|
11159
|
+
const created = await kernel.proxies.create({
|
|
11160
|
+
type: "residential",
|
|
11161
|
+
name: escalatedTarget.proxyName,
|
|
11162
|
+
config: escalatedTarget.config
|
|
11163
|
+
});
|
|
11164
|
+
if (created.id) {
|
|
11165
|
+
return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
|
|
11166
|
+
}
|
|
11167
|
+
createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
|
|
11168
|
+
} catch (err) {
|
|
11169
|
+
createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
|
|
11170
|
+
}
|
|
11171
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
|
|
11172
|
+
}
|
|
11173
|
+
const proxies = await kernel.proxies.list();
|
|
11174
|
+
const zip = knownZipFor(target, options.proxyZip);
|
|
11175
|
+
const createErrors = [];
|
|
11176
|
+
if (zip) {
|
|
11177
|
+
const targetZip = zipTarget(target, zip);
|
|
11178
|
+
const existingZip = findExistingTargetProxy(proxies, targetZip);
|
|
11179
|
+
if (existingZip?.id) {
|
|
11180
|
+
return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
|
|
11181
|
+
}
|
|
11182
|
+
try {
|
|
11183
|
+
const created = await kernel.proxies.create({
|
|
11184
|
+
type: "residential",
|
|
11185
|
+
name: targetZip.proxyName,
|
|
11186
|
+
config: {
|
|
11187
|
+
country: targetZip.country,
|
|
11188
|
+
zip
|
|
11189
|
+
}
|
|
11190
|
+
});
|
|
11191
|
+
if (created.id) {
|
|
11192
|
+
return resolution("location_created", options.proxyMode, created.id, targetZip, null);
|
|
11193
|
+
}
|
|
11194
|
+
createErrors.push(`${zip}: Kernel did not return a proxy id`);
|
|
11195
|
+
} catch (err) {
|
|
11196
|
+
createErrors.push(`${zip}: ${errorText2(err)}`);
|
|
11197
|
+
}
|
|
11198
|
+
}
|
|
11199
|
+
const existing = findExistingProxy(proxies, target);
|
|
11200
|
+
if (existing?.id) {
|
|
11201
|
+
return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
|
|
11202
|
+
}
|
|
11203
|
+
for (const city of target.cityCandidates) {
|
|
11204
|
+
try {
|
|
11205
|
+
const created = await kernel.proxies.create({
|
|
11206
|
+
type: "residential",
|
|
11207
|
+
name: proxyName(target.country, target.state, city),
|
|
11208
|
+
config: {
|
|
11209
|
+
country: target.country,
|
|
11210
|
+
state: target.state,
|
|
11211
|
+
city
|
|
11212
|
+
}
|
|
11213
|
+
});
|
|
11214
|
+
if (created.id) {
|
|
11215
|
+
return resolution("location_created", options.proxyMode, created.id, {
|
|
11216
|
+
...target,
|
|
11217
|
+
level: "city",
|
|
11218
|
+
city,
|
|
11219
|
+
proxyName: proxyName(target.country, target.state, city),
|
|
11220
|
+
config: {
|
|
11221
|
+
country: target.country,
|
|
11222
|
+
state: target.state,
|
|
11223
|
+
city
|
|
11224
|
+
}
|
|
11225
|
+
}, null);
|
|
11226
|
+
}
|
|
11227
|
+
createErrors.push(`${city}: Kernel did not return a proxy id`);
|
|
11228
|
+
} catch (err) {
|
|
11229
|
+
createErrors.push(`${city}: ${errorText2(err)}`);
|
|
11230
|
+
}
|
|
11231
|
+
}
|
|
11232
|
+
const fallbackTarget = stateTarget(target);
|
|
11233
|
+
const existingState = findExistingStateProxy(proxies, fallbackTarget);
|
|
11234
|
+
if (existingState?.id) {
|
|
11235
|
+
return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
|
|
11236
|
+
}
|
|
11237
|
+
try {
|
|
11238
|
+
const created = await kernel.proxies.create({
|
|
11239
|
+
type: "residential",
|
|
11240
|
+
name: fallbackTarget.proxyName,
|
|
11241
|
+
config: fallbackTarget.config
|
|
11242
|
+
});
|
|
11243
|
+
if (created.id) {
|
|
11244
|
+
return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
|
|
11245
|
+
}
|
|
11246
|
+
createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
|
|
11247
|
+
} catch (err) {
|
|
11248
|
+
createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
|
|
11249
|
+
}
|
|
11250
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
|
|
11251
|
+
} catch (err) {
|
|
11252
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
|
|
11253
|
+
}
|
|
11254
|
+
}
|
|
11255
|
+
var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
|
|
11256
|
+
var init_kernel_proxy_resolver = __esm({
|
|
11257
|
+
"src/kernel-proxy-resolver.ts"() {
|
|
11258
|
+
"use strict";
|
|
11259
|
+
import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
|
|
11260
|
+
init_uule();
|
|
11261
|
+
US_STATE_CODES = {
|
|
11262
|
+
alabama: "AL",
|
|
11263
|
+
alaska: "AK",
|
|
11264
|
+
arizona: "AZ",
|
|
11265
|
+
arkansas: "AR",
|
|
11266
|
+
california: "CA",
|
|
11267
|
+
colorado: "CO",
|
|
11268
|
+
connecticut: "CT",
|
|
11269
|
+
delaware: "DE",
|
|
11270
|
+
florida: "FL",
|
|
11271
|
+
georgia: "GA",
|
|
11272
|
+
hawaii: "HI",
|
|
11273
|
+
idaho: "ID",
|
|
11274
|
+
illinois: "IL",
|
|
11275
|
+
indiana: "IN",
|
|
11276
|
+
iowa: "IA",
|
|
11277
|
+
kansas: "KS",
|
|
11278
|
+
kentucky: "KY",
|
|
11279
|
+
louisiana: "LA",
|
|
11280
|
+
maine: "ME",
|
|
11281
|
+
maryland: "MD",
|
|
11282
|
+
massachusetts: "MA",
|
|
11283
|
+
michigan: "MI",
|
|
11284
|
+
minnesota: "MN",
|
|
11285
|
+
mississippi: "MS",
|
|
11286
|
+
missouri: "MO",
|
|
11287
|
+
montana: "MT",
|
|
11288
|
+
nebraska: "NE",
|
|
11289
|
+
nevada: "NV",
|
|
11290
|
+
"new hampshire": "NH",
|
|
11291
|
+
"new jersey": "NJ",
|
|
11292
|
+
"new mexico": "NM",
|
|
11293
|
+
"new york": "NY",
|
|
11294
|
+
"north carolina": "NC",
|
|
11295
|
+
"north dakota": "ND",
|
|
11296
|
+
ohio: "OH",
|
|
11297
|
+
oklahoma: "OK",
|
|
11298
|
+
oregon: "OR",
|
|
11299
|
+
pennsylvania: "PA",
|
|
11300
|
+
"rhode island": "RI",
|
|
11301
|
+
"south carolina": "SC",
|
|
11302
|
+
"south dakota": "SD",
|
|
11303
|
+
tennessee: "TN",
|
|
11304
|
+
texas: "TX",
|
|
11305
|
+
utah: "UT",
|
|
11306
|
+
vermont: "VT",
|
|
11307
|
+
virginia: "VA",
|
|
11308
|
+
washington: "WA",
|
|
11309
|
+
"west virginia": "WV",
|
|
11310
|
+
wisconsin: "WI",
|
|
11311
|
+
wyoming: "WY"
|
|
11312
|
+
};
|
|
11313
|
+
US_CITY_CENTER_ZIPS = {
|
|
11314
|
+
"atlanta|GA": "30303",
|
|
11315
|
+
"austin|TX": "78701",
|
|
11316
|
+
"baltimore|MD": "21201",
|
|
11317
|
+
"boston|MA": "02108",
|
|
11318
|
+
"boulder|CO": "80302",
|
|
11319
|
+
"charlotte|NC": "28202",
|
|
11320
|
+
"chicago|IL": "60601",
|
|
11321
|
+
"colorado_springs|CO": "80903",
|
|
11322
|
+
"columbus|OH": "43215",
|
|
11323
|
+
"dallas|TX": "75201",
|
|
11324
|
+
"denver|CO": "80202",
|
|
11325
|
+
"detroit|MI": "48226",
|
|
11326
|
+
"fort_collins|CO": "80524",
|
|
11327
|
+
"fort_worth|TX": "76102",
|
|
11328
|
+
"houston|TX": "77002",
|
|
11329
|
+
"indianapolis|IN": "46204",
|
|
11330
|
+
"jacksonville|FL": "32202",
|
|
11331
|
+
"las_vegas|NV": "89101",
|
|
11332
|
+
"los_angeles|CA": "90012",
|
|
11333
|
+
"louisville|KY": "40202",
|
|
11334
|
+
"loveland|CO": "80537",
|
|
11335
|
+
"memphis|TN": "38103",
|
|
11336
|
+
"miami|FL": "33131",
|
|
11337
|
+
"minneapolis|MN": "55401",
|
|
11338
|
+
"nashville|TN": "37203",
|
|
11339
|
+
"new_york|NY": "10001",
|
|
11340
|
+
"orlando|FL": "32801",
|
|
11341
|
+
"philadelphia|PA": "19103",
|
|
11342
|
+
"phoenix|AZ": "85004",
|
|
11343
|
+
"portland|OR": "97205",
|
|
11344
|
+
"raleigh|NC": "27601",
|
|
11345
|
+
"richmond|VA": "23219",
|
|
11346
|
+
"sacramento|CA": "95814",
|
|
11347
|
+
"salt_lake_city|UT": "84101",
|
|
11348
|
+
"san_antonio|TX": "78205",
|
|
11349
|
+
"san_diego|CA": "92101",
|
|
11350
|
+
"san_francisco|CA": "94103",
|
|
11351
|
+
"san_jose|CA": "95113",
|
|
11352
|
+
"seattle|WA": "98101"
|
|
11353
|
+
};
|
|
11354
|
+
}
|
|
11355
|
+
});
|
|
11356
|
+
|
|
11357
|
+
// src/api/facebook-ad-routes.ts
|
|
11358
|
+
function invalidRequest(message) {
|
|
11359
|
+
return { error_code: "invalid_request", message };
|
|
11360
|
+
}
|
|
11361
|
+
async function detectSoftBlock(driver) {
|
|
11362
|
+
const page = driver.getPage();
|
|
11363
|
+
const bodyText = await page.evaluate(() => document.body?.innerText ?? "").catch(() => "");
|
|
11364
|
+
return bodyText.length < 200 || /Log in|log in|Create new account|You must log in/.test(bodyText);
|
|
11365
|
+
}
|
|
11366
|
+
function buildPageIntelUrl(body, country) {
|
|
11367
|
+
if (body.libraryId?.trim()) return `https://www.facebook.com/ads/library/?id=${FacebookAdExtractor.resolveLibraryId(body.libraryId.trim()) ?? body.libraryId.trim()}`;
|
|
11368
|
+
if (body.pageId?.trim()) return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&is_targeted_country=false&media_type=all&search_type=page&view_all_page_id=${body.pageId.trim()}`;
|
|
11369
|
+
return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
|
|
11370
|
+
}
|
|
11371
|
+
function kernelLaunchOpts() {
|
|
11372
|
+
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
11373
|
+
}
|
|
11374
|
+
async function kernelLaunchOptsResidential() {
|
|
11375
|
+
let proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
11376
|
+
try {
|
|
11377
|
+
const resolution2 = await resolveKernelProxyId({
|
|
11378
|
+
kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
|
|
11379
|
+
proxyMode: "location",
|
|
11380
|
+
configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
|
|
11381
|
+
location: "New York, NY",
|
|
11382
|
+
gl: "us"
|
|
11383
|
+
});
|
|
11384
|
+
if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
|
|
11385
|
+
} catch {
|
|
11386
|
+
proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
11387
|
+
}
|
|
11388
|
+
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
11389
|
+
}
|
|
11390
|
+
var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
|
|
11391
|
+
var init_facebook_ad_routes = __esm({
|
|
11392
|
+
"src/api/facebook-ad-routes.ts"() {
|
|
11393
|
+
"use strict";
|
|
11394
|
+
import_hono4 = require("hono");
|
|
11395
|
+
import_zod15 = require("zod");
|
|
11396
|
+
init_db();
|
|
11397
|
+
init_rates();
|
|
11398
|
+
init_BrowserDriver();
|
|
11399
|
+
init_FacebookAdExtractor();
|
|
11400
|
+
init_FacebookAdGraphql();
|
|
11401
|
+
init_kernel_proxy_resolver();
|
|
11402
|
+
import_client3 = require("@fal-ai/client");
|
|
11403
|
+
init_api_auth();
|
|
11404
|
+
init_url_utils();
|
|
11405
|
+
FacebookAdBodySchema = import_zod15.z.object({
|
|
11406
|
+
url: import_zod15.z.string().trim().optional(),
|
|
11407
|
+
libraryId: import_zod15.z.string().trim().optional(),
|
|
11408
|
+
openModal: import_zod15.z.boolean().optional()
|
|
11409
|
+
}).refine((d) => !!d.url || !!d.libraryId, { message: "url or libraryId is required" });
|
|
11410
|
+
FacebookPageIntelBodySchema = import_zod15.z.object({
|
|
11411
|
+
pageId: import_zod15.z.string().trim().optional(),
|
|
11412
|
+
query: import_zod15.z.string().trim().optional(),
|
|
11413
|
+
libraryId: import_zod15.z.string().trim().optional(),
|
|
11414
|
+
maxAds: import_zod15.z.number().int().min(1).max(200).optional(),
|
|
11415
|
+
country: import_zod15.z.string().trim().toUpperCase().optional()
|
|
11416
|
+
}).refine((d) => !!d.pageId || !!d.query || !!d.libraryId, {
|
|
11417
|
+
message: "pageId, libraryId, or query is required"
|
|
11418
|
+
});
|
|
11419
|
+
FacebookTranscribeBodySchema = import_zod15.z.object({
|
|
11420
|
+
videoUrl: import_zod15.z.string().trim().min(1, "videoUrl is required")
|
|
11421
|
+
});
|
|
11422
|
+
FacebookSearchBodySchema = import_zod15.z.object({
|
|
11423
|
+
query: import_zod15.z.string().trim().min(1, "query is required"),
|
|
11424
|
+
country: import_zod15.z.string().trim().toUpperCase().optional(),
|
|
11425
|
+
maxResults: import_zod15.z.number().int().min(1).max(20).optional()
|
|
11426
|
+
});
|
|
11427
|
+
FacebookMediaBodySchema = import_zod15.z.object({
|
|
11428
|
+
url: import_zod15.z.string().trim().min(1, "url is required"),
|
|
11429
|
+
filename: import_zod15.z.string().trim().optional()
|
|
11430
|
+
});
|
|
11431
|
+
facebookAdApp = new import_hono4.Hono();
|
|
11432
|
+
facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
|
|
11433
|
+
const raw = await c.req.json().catch(() => ({}));
|
|
11434
|
+
const parsed = FacebookAdBodySchema.safeParse(raw);
|
|
11435
|
+
if (!parsed.success) {
|
|
11436
|
+
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
11437
|
+
}
|
|
11438
|
+
const body = parsed.data;
|
|
11439
|
+
const raw2 = body.url?.trim() ?? body.libraryId?.trim() ?? "";
|
|
11440
|
+
const libraryId = FacebookAdExtractor.resolveLibraryId(raw2);
|
|
11441
|
+
if (!libraryId) return c.json({ error: "Could not resolve a valid Facebook Ad Library ID from the provided input" }, 400);
|
|
11442
|
+
const fbUser = c.get("user");
|
|
11443
|
+
const { ok: adOk, balance_mc: adBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, raw2);
|
|
11444
|
+
if (!adOk) return c.json(insufficientBalanceResponse(adBal, MC_COSTS.fb_ad), 402);
|
|
11445
|
+
const driver = new BrowserDriver();
|
|
11446
|
+
try {
|
|
11447
|
+
await driver.launch(kernelLaunchOpts());
|
|
11448
|
+
const extractor = new FacebookAdExtractor(driver);
|
|
11449
|
+
const result = await extractor.extract(libraryId, { openModal: body.openModal !== false });
|
|
11450
|
+
await logRequestEvent({
|
|
11451
|
+
userId: fbUser.id,
|
|
11452
|
+
source: "facebook_ad",
|
|
11453
|
+
status: "done",
|
|
11454
|
+
query: raw2,
|
|
11455
|
+
resultCount: Array.isArray(result.variants) ? result.variants.length : null,
|
|
11456
|
+
result
|
|
11457
|
+
});
|
|
11458
|
+
return c.json(result);
|
|
11459
|
+
} catch (err) {
|
|
11460
|
+
await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
|
|
11461
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
11462
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_ad", status: "failed", query: raw2, error: msg });
|
|
11463
|
+
if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
|
|
11464
|
+
return c.json({ error: msg }, 503);
|
|
11465
|
+
}
|
|
11466
|
+
return c.json({ error: msg }, 500);
|
|
11467
|
+
} finally {
|
|
11468
|
+
await driver.close();
|
|
11469
|
+
}
|
|
11470
|
+
});
|
|
11471
|
+
facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
|
|
11472
|
+
const raw = await c.req.json().catch(() => ({}));
|
|
11473
|
+
const parsed = FacebookPageIntelBodySchema.safeParse(raw);
|
|
11474
|
+
if (!parsed.success) {
|
|
11475
|
+
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
11476
|
+
}
|
|
11477
|
+
const body = parsed.data;
|
|
11478
|
+
const maxAds = Math.min(200, Math.max(1, body.maxAds ?? 50));
|
|
11479
|
+
const country = body.country?.trim().toUpperCase() ?? "US";
|
|
11480
|
+
const listingUrl = buildPageIntelUrl(body, country);
|
|
11481
|
+
const fbUser = c.get("user");
|
|
11482
|
+
const { ok: fbOk, balance_mc: fbBal } = await debitMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD, body.pageId ?? body.query ?? body.libraryId ?? "");
|
|
11483
|
+
if (!fbOk) return c.json(insufficientBalanceResponse(fbBal, MC_COSTS.fb_ad), 402);
|
|
11484
|
+
const driver = new BrowserDriver();
|
|
11485
|
+
let refunded = false;
|
|
11486
|
+
try {
|
|
11487
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
11488
|
+
await driver.navigateTo(listingUrl);
|
|
11489
|
+
const extractor = new FacebookAdExtractor(driver);
|
|
11490
|
+
const result = await extractor.extractPageIntel(listingUrl, maxAds);
|
|
11491
|
+
if (result.ads.length === 0 && await detectSoftBlock(driver)) {
|
|
11492
|
+
await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "soft-block empty result");
|
|
11493
|
+
refunded = true;
|
|
11494
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: "soft-block: empty result refunded" });
|
|
11495
|
+
return c.json({ error: "soft-block: no ads returned (refunded)" }, 503);
|
|
11496
|
+
}
|
|
11497
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "done", query: body.pageId ?? body.query ?? body.libraryId ?? "", resultCount: result.ads.length, result });
|
|
11498
|
+
return c.json(result);
|
|
11499
|
+
} catch (err) {
|
|
11500
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
11501
|
+
if (!refunded) await creditMc(fbUser.id, MC_COSTS.fb_ad, LedgerOperation.FB_AD_REFUND, "failed call");
|
|
11502
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_page_intel", status: "failed", query: body.pageId ?? body.query ?? body.libraryId ?? "", error: msg });
|
|
11503
|
+
if (msg.toLowerCase().includes("blocked") || msg.toLowerCase().includes("captcha")) {
|
|
11504
|
+
return c.json({ error: msg }, 503);
|
|
11505
|
+
}
|
|
11506
|
+
return c.json({ error: msg }, 500);
|
|
11507
|
+
} finally {
|
|
11508
|
+
await driver.close();
|
|
11509
|
+
}
|
|
11510
|
+
});
|
|
11511
|
+
facebookAdApp.post("/transcribe", createApiKeyAuth(), async (c) => {
|
|
11512
|
+
const raw = await c.req.json().catch(() => ({}));
|
|
11513
|
+
const parsed = FacebookTranscribeBodySchema.safeParse(raw);
|
|
11514
|
+
if (!parsed.success) {
|
|
11515
|
+
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
11516
|
+
}
|
|
11517
|
+
const body = parsed.data;
|
|
11518
|
+
const urlCheck = await validatePublicHttpUrl(body.videoUrl, { field: "videoUrl", requireHttps: false });
|
|
11519
|
+
if (urlCheck.error) {
|
|
11520
|
+
return c.json(invalidRequest(urlCheck.error), 400);
|
|
11521
|
+
}
|
|
11522
|
+
const videoUrl = urlCheck.parsed.href;
|
|
11523
|
+
const fbUser = c.get("user");
|
|
11524
|
+
const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE, videoUrl);
|
|
11525
|
+
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_transcribe), 402);
|
|
11526
|
+
import_client3.fal.config({ credentials: process.env.FAL_KEY });
|
|
11527
|
+
try {
|
|
11528
|
+
const startMs = Date.now();
|
|
11529
|
+
const result = await import_client3.fal.subscribe("fal-ai/wizper", {
|
|
11530
|
+
input: { audio_url: videoUrl, task: "transcribe", language: "en" },
|
|
11531
|
+
logs: false,
|
|
11532
|
+
pollInterval: 3e3
|
|
11533
|
+
});
|
|
11534
|
+
const data = result.data;
|
|
11535
|
+
const text = data.text ?? "";
|
|
11536
|
+
const chunks = data.chunks ?? [];
|
|
11537
|
+
const durationMs = Date.now() - startMs;
|
|
11538
|
+
const fmtTs2 = (s) => `${Math.floor(s / 60)}:${String(Math.floor(s % 60)).padStart(2, "0")}`;
|
|
11539
|
+
const lines = ["# Facebook Ad Transcript", "", `*Transcribed in ${(durationMs / 1e3).toFixed(1)}s*`, "", "## Full Text", "", text, ""];
|
|
11540
|
+
if (chunks.length) {
|
|
11541
|
+
lines.push("## Timestamped Segments", "");
|
|
11542
|
+
for (const ch of chunks) {
|
|
11543
|
+
lines.push(`**[${fmtTs2(ch.timestamp[0])} \u2192 ${fmtTs2(ch.timestamp[1])}]** ${ch.text.trim()}`, "");
|
|
11544
|
+
}
|
|
11545
|
+
}
|
|
11546
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "done", query: videoUrl, resultCount: chunks.length, result: { text, chunks, durationMs } });
|
|
11547
|
+
return c.json({ text, chunks, durationMs, markdown: lines.join("\n") });
|
|
11548
|
+
} catch (err) {
|
|
11549
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
11550
|
+
await creditMc(fbUser.id, MC_COSTS.fb_transcribe, LedgerOperation.FB_TRANSCRIBE_REFUND, "failed call");
|
|
11551
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_transcribe", status: "failed", query: videoUrl, error: msg });
|
|
11552
|
+
return c.json({ error: msg }, 500);
|
|
11553
|
+
}
|
|
11554
|
+
});
|
|
11555
|
+
facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
11556
|
+
const raw = await c.req.json().catch(() => ({}));
|
|
11557
|
+
const parsed = FacebookSearchBodySchema.safeParse(raw);
|
|
11558
|
+
if (!parsed.success) {
|
|
11559
|
+
return c.json(invalidRequest(parsed.error.issues[0]?.message ?? "Invalid request"), 400);
|
|
11560
|
+
}
|
|
11561
|
+
const body = parsed.data;
|
|
11562
|
+
const country = body.country?.trim().toUpperCase() ?? "US";
|
|
11563
|
+
const maxResults = Math.min(20, Math.max(1, body.maxResults ?? 10));
|
|
11564
|
+
const searchUrl = `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
|
|
11565
|
+
const fbUser = c.get("user");
|
|
11566
|
+
const { ok, balance_mc } = await debitMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH, body.query.trim());
|
|
11567
|
+
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.fb_search), 402);
|
|
11568
|
+
const driver = new BrowserDriver();
|
|
11569
|
+
let searchRefunded = false;
|
|
11570
|
+
try {
|
|
11571
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
11572
|
+
const page = driver.getPage();
|
|
11573
|
+
const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
|
|
11574
|
+
const gqlAdvertisers = advertisersFromResults(collated, maxResults);
|
|
11575
|
+
if (gqlAdvertisers.length > 0) {
|
|
11576
|
+
const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
11577
|
+
const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
|
|
11578
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
|
|
11579
|
+
return c.json(searchResult2);
|
|
11580
|
+
}
|
|
11581
|
+
await page.waitForTimeout(1500);
|
|
11582
|
+
for (let scroll = 0; scroll < 3; scroll++) {
|
|
11583
|
+
await page.evaluate(() => {
|
|
11584
|
+
if (document.body) window.scrollTo(0, document.body.scrollHeight);
|
|
11585
|
+
});
|
|
11586
|
+
await page.waitForTimeout(1e3);
|
|
11587
|
+
}
|
|
11588
|
+
const rawBodyText = await page.evaluate(() => document.body?.innerText ?? "");
|
|
11589
|
+
if (rawBodyText.length < 200 || /Log in|You must log in|Create new account/.test(rawBodyText)) {
|
|
11590
|
+
await creditMc(fbUser.id, MC_COSTS.fb_search, LedgerOperation.FB_SEARCH_REFUND, "soft-block empty result");
|
|
11591
|
+
searchRefunded = true;
|
|
11592
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "failed", query: body.query.trim(), error: "soft-block: empty result refunded" });
|
|
11593
|
+
return c.json({ error: "soft-block: no results returned (refunded)" }, 503);
|
|
11594
|
+
}
|
|
11595
|
+
const bodyText = rawBodyText.replace(//g, " ").replace(/\s+/g, " ");
|
|
11596
|
+
const adChunks = [];
|
|
11597
|
+
const splitRe = /(?=(?:Active|Inactive)\s+Library ID[:\s]+\d{10,20})/g;
|
|
11598
|
+
let last = 0;
|
|
11599
|
+
let m;
|
|
11600
|
+
while ((m = splitRe.exec(bodyText)) !== null) {
|
|
11601
|
+
if (m.index > last) adChunks.push(bodyText.slice(last, m.index));
|
|
11602
|
+
last = m.index;
|
|
10934
11603
|
if (splitRe.lastIndex === m.index) splitRe.lastIndex++;
|
|
10935
11604
|
}
|
|
10936
11605
|
if (last < bodyText.length) adChunks.push(bodyText.slice(last));
|
|
@@ -10953,7 +11622,7 @@ var init_facebook_ad_routes = __esm({
|
|
|
10953
11622
|
advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
|
|
10954
11623
|
}
|
|
10955
11624
|
}
|
|
10956
|
-
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
11625
|
+
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
10957
11626
|
const searchResult = { query: body.query.trim(), searchUrl, results };
|
|
10958
11627
|
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
|
|
10959
11628
|
return c.json(searchResult);
|
|
@@ -11927,153 +12596,44 @@ async function capturePageSnapshot(target, options = {}) {
|
|
|
11927
12596
|
}
|
|
11928
12597
|
}
|
|
11929
12598
|
async function capturePageSnapshots(targets, options = {}) {
|
|
11930
|
-
const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
|
|
11931
|
-
const maxConcurrency = normalizeMaxConcurrency(options.maxConcurrency);
|
|
11932
|
-
const limit = (0, import_p_limit3.default)(maxConcurrency);
|
|
11933
|
-
const pageSnapshotArtifacts = await Promise.all(
|
|
11934
|
-
targets.map((target) => limit(() => capturePageSnapshot(target, { ...options, timeoutMs })))
|
|
11935
|
-
);
|
|
11936
|
-
const attempts = pageSnapshotArtifacts.map((artifact, index) => ({
|
|
11937
|
-
attemptNumber: index + 1,
|
|
11938
|
-
outcome: artifact.status === "captured" ? "page_captured" : "page_failed",
|
|
11939
|
-
startedAt: artifact.diagnostics.requestedAt,
|
|
11940
|
-
completedAt: artifact.diagnostics.completedAt,
|
|
11941
|
-
durationMs: artifact.diagnostics.durationMs,
|
|
11942
|
-
...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
|
|
11943
|
-
}));
|
|
11944
|
-
const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
|
|
11945
|
-
return {
|
|
11946
|
-
pageSnapshotArtifacts,
|
|
11947
|
-
attempts,
|
|
11948
|
-
diagnostics: {
|
|
11949
|
-
requestedCount: targets.length,
|
|
11950
|
-
capturedCount,
|
|
11951
|
-
failedCount: targets.length - capturedCount,
|
|
11952
|
-
maxConcurrency,
|
|
11953
|
-
timeoutMs
|
|
11954
|
-
}
|
|
11955
|
-
};
|
|
11956
|
-
}
|
|
11957
|
-
var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
|
|
11958
|
-
var init_page_snapshot_extractor = __esm({
|
|
11959
|
-
"src/serp-intelligence/page-snapshot-extractor.ts"() {
|
|
11960
|
-
"use strict";
|
|
11961
|
-
import_node_crypto2 = require("crypto");
|
|
11962
|
-
import_p_limit3 = __toESM(require("p-limit"), 1);
|
|
11963
|
-
init_kpo_extractor();
|
|
11964
|
-
init_url_utils();
|
|
11965
|
-
DEFAULT_TIMEOUT_MS = 15e3;
|
|
11966
|
-
DEFAULT_MAX_CONCURRENCY = 2;
|
|
11967
|
-
DEFAULT_MAX_CONTENT_CHARS = 25e4;
|
|
11968
|
-
}
|
|
11969
|
-
});
|
|
11970
|
-
|
|
11971
|
-
// src/locations.ts
|
|
11972
|
-
var LOCATIONS;
|
|
11973
|
-
var init_locations = __esm({
|
|
11974
|
-
"src/locations.ts"() {
|
|
11975
|
-
"use strict";
|
|
11976
|
-
LOCATIONS = {
|
|
11977
|
-
"austin": "Austin,Texas,United States",
|
|
11978
|
-
"new york": "New York,New York,United States",
|
|
11979
|
-
"new york city": "New York,New York,United States",
|
|
11980
|
-
"nyc": "New York,New York,United States",
|
|
11981
|
-
"los angeles": "Los Angeles,California,United States",
|
|
11982
|
-
"la": "Los Angeles,California,United States",
|
|
11983
|
-
"chicago": "Chicago,Illinois,United States",
|
|
11984
|
-
"houston": "Houston,Texas,United States",
|
|
11985
|
-
"phoenix": "Phoenix,Arizona,United States",
|
|
11986
|
-
"philadelphia": "Philadelphia,Pennsylvania,United States",
|
|
11987
|
-
"philly": "Philadelphia,Pennsylvania,United States",
|
|
11988
|
-
"san antonio": "San Antonio,Texas,United States",
|
|
11989
|
-
"dallas": "Dallas,Texas,United States",
|
|
11990
|
-
"miami": "Miami,Florida,United States",
|
|
11991
|
-
"seattle": "Seattle,Washington,United States",
|
|
11992
|
-
"denver": "Denver,Colorado,United States",
|
|
11993
|
-
"loveland": "Loveland,Colorado,United States",
|
|
11994
|
-
"loveland co": "Loveland,Colorado,United States",
|
|
11995
|
-
"fort collins": "Fort Collins,Colorado,United States",
|
|
11996
|
-
"boulder": "Boulder,Colorado,United States",
|
|
11997
|
-
"colorado springs": "Colorado Springs,Colorado,United States",
|
|
11998
|
-
"boston": "Boston,Massachusetts,United States",
|
|
11999
|
-
"atlanta": "Atlanta,Georgia,United States",
|
|
12000
|
-
"san francisco": "San Francisco,California,United States",
|
|
12001
|
-
"sf": "San Francisco,California,United States",
|
|
12002
|
-
"portland": "Portland,Oregon,United States",
|
|
12003
|
-
"las vegas": "Las Vegas,Nevada,United States",
|
|
12004
|
-
"minneapolis": "Minneapolis,Minnesota,United States",
|
|
12005
|
-
"detroit": "Detroit,Michigan,United States",
|
|
12006
|
-
"nashville": "Nashville,Tennessee,United States",
|
|
12007
|
-
"charlotte": "Charlotte,North Carolina,United States",
|
|
12008
|
-
"orlando": "Orlando,Florida,United States",
|
|
12009
|
-
"san diego": "San Diego,California,United States",
|
|
12010
|
-
"baltimore": "Baltimore,Maryland,United States",
|
|
12011
|
-
"sacramento": "Sacramento,California,United States",
|
|
12012
|
-
"columbus": "Columbus,Ohio,United States",
|
|
12013
|
-
"indianapolis": "Indianapolis,Indiana,United States",
|
|
12014
|
-
"san jose": "San Jose,California,United States",
|
|
12015
|
-
"fort worth": "Fort Worth,Texas,United States",
|
|
12016
|
-
"jacksonville": "Jacksonville,Florida,United States",
|
|
12017
|
-
"memphis": "Memphis,Tennessee,United States",
|
|
12018
|
-
"louisville": "Louisville,Kentucky,United States",
|
|
12019
|
-
"raleigh": "Raleigh,North Carolina,United States",
|
|
12020
|
-
"richmond": "Richmond,Virginia,United States",
|
|
12021
|
-
"salt lake city": "Salt Lake City,Utah,United States",
|
|
12022
|
-
"toronto": "Toronto,Ontario,Canada",
|
|
12023
|
-
"vancouver": "Vancouver,British Columbia,Canada",
|
|
12024
|
-
"montreal": "Montreal,Quebec,Canada",
|
|
12025
|
-
"calgary": "Calgary,Alberta,Canada",
|
|
12026
|
-
"ottawa": "Ottawa,Ontario,Canada",
|
|
12027
|
-
"london": "London,England,United Kingdom",
|
|
12028
|
-
"manchester": "Manchester,England,United Kingdom",
|
|
12029
|
-
"birmingham": "Birmingham,England,United Kingdom",
|
|
12030
|
-
"edinburgh": "Edinburgh,Scotland,United Kingdom",
|
|
12031
|
-
"glasgow": "Glasgow,Scotland,United Kingdom",
|
|
12032
|
-
"leeds": "Leeds,England,United Kingdom",
|
|
12033
|
-
"sydney": "Sydney,New South Wales,Australia",
|
|
12034
|
-
"melbourne": "Melbourne,Victoria,Australia",
|
|
12035
|
-
"brisbane": "Brisbane,Queensland,Australia",
|
|
12036
|
-
"perth": "Perth,Western Australia,Australia",
|
|
12037
|
-
"adelaide": "Adelaide,South Australia,Australia",
|
|
12038
|
-
"dublin": "Dublin,Leinster,Ireland"
|
|
12039
|
-
};
|
|
12040
|
-
}
|
|
12041
|
-
});
|
|
12042
|
-
|
|
12043
|
-
// src/uule.ts
|
|
12044
|
-
function encodeVarint(value) {
|
|
12045
|
-
const bytes = [];
|
|
12046
|
-
let remaining = value;
|
|
12047
|
-
do {
|
|
12048
|
-
let byte = remaining & 127;
|
|
12049
|
-
remaining >>>= 7;
|
|
12050
|
-
if (remaining > 0) byte |= 128;
|
|
12051
|
-
bytes.push(byte);
|
|
12052
|
-
} while (remaining > 0);
|
|
12053
|
-
return bytes;
|
|
12054
|
-
}
|
|
12055
|
-
function encodeUule(name) {
|
|
12056
|
-
const locationBytes = Buffer.from(name, "utf8");
|
|
12057
|
-
const payload = Buffer.concat([
|
|
12058
|
-
Buffer.from([8, 2, 16, 32, 34]),
|
|
12059
|
-
Buffer.from(encodeVarint(locationBytes.length)),
|
|
12060
|
-
locationBytes
|
|
12061
|
-
]);
|
|
12062
|
-
return `w+${payload.toString("base64")}`;
|
|
12063
|
-
}
|
|
12064
|
-
function normalizeLocation(input) {
|
|
12065
|
-
const raw = input.toLowerCase().trim();
|
|
12066
|
-
if (LOCATIONS[raw]) return LOCATIONS[raw];
|
|
12067
|
-
const beforeComma = raw.split(",")[0].trim();
|
|
12068
|
-
if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
|
|
12069
|
-
const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
|
|
12070
|
-
if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
|
|
12071
|
-
return input;
|
|
12599
|
+
const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
|
|
12600
|
+
const maxConcurrency = normalizeMaxConcurrency(options.maxConcurrency);
|
|
12601
|
+
const limit = (0, import_p_limit3.default)(maxConcurrency);
|
|
12602
|
+
const pageSnapshotArtifacts = await Promise.all(
|
|
12603
|
+
targets.map((target) => limit(() => capturePageSnapshot(target, { ...options, timeoutMs })))
|
|
12604
|
+
);
|
|
12605
|
+
const attempts = pageSnapshotArtifacts.map((artifact, index) => ({
|
|
12606
|
+
attemptNumber: index + 1,
|
|
12607
|
+
outcome: artifact.status === "captured" ? "page_captured" : "page_failed",
|
|
12608
|
+
startedAt: artifact.diagnostics.requestedAt,
|
|
12609
|
+
completedAt: artifact.diagnostics.completedAt,
|
|
12610
|
+
durationMs: artifact.diagnostics.durationMs,
|
|
12611
|
+
...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
|
|
12612
|
+
}));
|
|
12613
|
+
const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
|
|
12614
|
+
return {
|
|
12615
|
+
pageSnapshotArtifacts,
|
|
12616
|
+
attempts,
|
|
12617
|
+
diagnostics: {
|
|
12618
|
+
requestedCount: targets.length,
|
|
12619
|
+
capturedCount,
|
|
12620
|
+
failedCount: targets.length - capturedCount,
|
|
12621
|
+
maxConcurrency,
|
|
12622
|
+
timeoutMs
|
|
12623
|
+
}
|
|
12624
|
+
};
|
|
12072
12625
|
}
|
|
12073
|
-
var
|
|
12074
|
-
|
|
12626
|
+
var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
|
|
12627
|
+
var init_page_snapshot_extractor = __esm({
|
|
12628
|
+
"src/serp-intelligence/page-snapshot-extractor.ts"() {
|
|
12075
12629
|
"use strict";
|
|
12076
|
-
|
|
12630
|
+
import_node_crypto2 = require("crypto");
|
|
12631
|
+
import_p_limit3 = __toESM(require("p-limit"), 1);
|
|
12632
|
+
init_kpo_extractor();
|
|
12633
|
+
init_url_utils();
|
|
12634
|
+
DEFAULT_TIMEOUT_MS = 15e3;
|
|
12635
|
+
DEFAULT_MAX_CONCURRENCY = 2;
|
|
12636
|
+
DEFAULT_MAX_CONTENT_CHARS = 25e4;
|
|
12077
12637
|
}
|
|
12078
12638
|
});
|
|
12079
12639
|
|
|
@@ -13364,425 +13924,59 @@ var init_OutputSerializer = __esm({
|
|
|
13364
13924
|
await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
|
|
13365
13925
|
const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
|
|
13366
13926
|
const rows = citations.map((c, i) => ({
|
|
13367
|
-
seed_query: seed,
|
|
13368
|
-
response_text: i === 0 ? text ?? "" : "",
|
|
13369
|
-
citation_text: c.text,
|
|
13370
|
-
citation_href: c.href
|
|
13371
|
-
}));
|
|
13372
|
-
const csv = import_papaparse2.default.unparse(rows, { header: true });
|
|
13373
|
-
const filename = `${slug}-ai-mode-${Date.now()}.csv`;
|
|
13374
|
-
const fullPath = import_node_path5.default.join(outputDir, filename);
|
|
13375
|
-
await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
|
|
13376
|
-
return fullPath;
|
|
13377
|
-
}
|
|
13378
|
-
async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
|
|
13379
|
-
await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
|
|
13380
|
-
const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
|
|
13381
|
-
const rows = cards.map((c) => ({ seed_query: seed, ...c }));
|
|
13382
|
-
const csv = import_papaparse2.default.unparse(rows, { header: true });
|
|
13383
|
-
const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
|
|
13384
|
-
const fullPath = import_node_path5.default.join(outputDir, filename);
|
|
13385
|
-
await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
|
|
13386
|
-
return fullPath;
|
|
13387
|
-
}
|
|
13388
|
-
};
|
|
13389
|
-
}
|
|
13390
|
-
});
|
|
13391
|
-
|
|
13392
|
-
// src/output/ProgressReporter.ts
|
|
13393
|
-
var ProgressReporter;
|
|
13394
|
-
var init_ProgressReporter = __esm({
|
|
13395
|
-
"src/output/ProgressReporter.ts"() {
|
|
13396
|
-
"use strict";
|
|
13397
|
-
ProgressReporter = class {
|
|
13398
|
-
onQuestion(node) {
|
|
13399
|
-
process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
|
|
13400
|
-
}
|
|
13401
|
-
onDepth(depth) {
|
|
13402
|
-
process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
|
|
13403
|
-
}
|
|
13404
|
-
onVideos(videos) {
|
|
13405
|
-
for (const v of videos) {
|
|
13406
|
-
process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
|
|
13407
|
-
}
|
|
13408
|
-
}
|
|
13409
|
-
onForums(forums) {
|
|
13410
|
-
for (const f of forums) {
|
|
13411
|
-
process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
|
|
13412
|
-
}
|
|
13413
|
-
}
|
|
13414
|
-
onComplete(stats) {
|
|
13415
|
-
process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
|
|
13416
|
-
}
|
|
13417
|
-
onError(err) {
|
|
13418
|
-
process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
|
|
13419
|
-
}
|
|
13420
|
-
};
|
|
13421
|
-
}
|
|
13422
|
-
});
|
|
13423
|
-
|
|
13424
|
-
// src/kernel-proxy-resolver.ts
|
|
13425
|
-
function proxyIdSuffix2(proxyId) {
|
|
13426
|
-
return proxyId ? proxyId.slice(-6) : null;
|
|
13427
|
-
}
|
|
13428
|
-
function resolution(source, proxyMode, proxyId, target, error) {
|
|
13429
|
-
return {
|
|
13430
|
-
kernelProxyId: proxyId,
|
|
13431
|
-
resolution: {
|
|
13432
|
-
source,
|
|
13433
|
-
proxyMode,
|
|
13434
|
-
proxyIdPresent: Boolean(proxyId),
|
|
13435
|
-
proxyIdSuffix: proxyIdSuffix2(proxyId),
|
|
13436
|
-
target,
|
|
13437
|
-
error
|
|
13438
|
-
}
|
|
13439
|
-
};
|
|
13440
|
-
}
|
|
13441
|
-
function normalizeStateName(value) {
|
|
13442
|
-
return value.trim().toLowerCase().replace(/\s+/g, " ");
|
|
13443
|
-
}
|
|
13444
|
-
function normalizeCountryName(value) {
|
|
13445
|
-
return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
|
|
13446
|
-
}
|
|
13447
|
-
function isUnitedStates(country) {
|
|
13448
|
-
if (!country) return true;
|
|
13449
|
-
const normalized = normalizeCountryName(country);
|
|
13450
|
-
return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
|
|
13451
|
-
}
|
|
13452
|
-
function stateCodeFor(region) {
|
|
13453
|
-
const trimmed = region.trim();
|
|
13454
|
-
if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
|
|
13455
|
-
return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
|
|
13456
|
-
}
|
|
13457
|
-
function kernelCityIdentifierCandidates(city) {
|
|
13458
|
-
const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
|
|
13459
|
-
const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
|
|
13460
|
-
const underscored = words.join("_");
|
|
13461
|
-
const compact = words.join("");
|
|
13462
|
-
return Array.from(new Set([underscored, compact].filter(Boolean)));
|
|
13463
|
-
}
|
|
13464
|
-
function proxyName(country, state, city) {
|
|
13465
|
-
return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
|
|
13466
|
-
}
|
|
13467
|
-
function zipProxyName(zip) {
|
|
13468
|
-
return `mcp-serp-residential-us-zip-${zip}`;
|
|
13469
|
-
}
|
|
13470
|
-
function parseKernelLocationProxyTarget(location, gl) {
|
|
13471
|
-
if (!location || gl.toLowerCase() !== "us") return null;
|
|
13472
|
-
const canonicalLocation = normalizeLocation(location);
|
|
13473
|
-
let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
|
|
13474
|
-
if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
|
|
13475
|
-
parts = parts.slice(0, -1);
|
|
13476
|
-
}
|
|
13477
|
-
if (parts.length === 1) {
|
|
13478
|
-
const stateOnly = stateCodeFor(parts[0]);
|
|
13479
|
-
if (!stateOnly) return null;
|
|
13480
|
-
return {
|
|
13481
|
-
canonicalLocation,
|
|
13482
|
-
level: "state",
|
|
13483
|
-
country: "US",
|
|
13484
|
-
state: stateOnly,
|
|
13485
|
-
city: "",
|
|
13486
|
-
cityCandidates: [],
|
|
13487
|
-
proxyName: proxyName("US", stateOnly),
|
|
13488
|
-
config: {
|
|
13489
|
-
country: "US",
|
|
13490
|
-
state: stateOnly
|
|
13491
|
-
}
|
|
13492
|
-
};
|
|
13493
|
-
}
|
|
13494
|
-
const [city = "", region = ""] = parts;
|
|
13495
|
-
if (!city || !region) return null;
|
|
13496
|
-
const state = stateCodeFor(region);
|
|
13497
|
-
if (!state) return null;
|
|
13498
|
-
const cityCandidates = kernelCityIdentifierCandidates(city);
|
|
13499
|
-
const primaryCity = cityCandidates[0];
|
|
13500
|
-
if (!primaryCity) return null;
|
|
13501
|
-
return {
|
|
13502
|
-
canonicalLocation,
|
|
13503
|
-
level: "city",
|
|
13504
|
-
country: "US",
|
|
13505
|
-
state,
|
|
13506
|
-
city: primaryCity,
|
|
13507
|
-
cityCandidates,
|
|
13508
|
-
proxyName: proxyName("US", state, primaryCity),
|
|
13509
|
-
config: {
|
|
13510
|
-
country: "US",
|
|
13511
|
-
state,
|
|
13512
|
-
city: primaryCity
|
|
13513
|
-
}
|
|
13514
|
-
};
|
|
13515
|
-
}
|
|
13516
|
-
function cityZipKey(target) {
|
|
13517
|
-
return `${target.city}|${target.state}`;
|
|
13518
|
-
}
|
|
13519
|
-
function knownZipFor(target, explicitZip) {
|
|
13520
|
-
if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
|
|
13521
|
-
return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
|
|
13522
|
-
}
|
|
13523
|
-
function zipTarget(target, zip) {
|
|
13524
|
-
return {
|
|
13525
|
-
...target,
|
|
13526
|
-
level: "zip",
|
|
13527
|
-
zip,
|
|
13528
|
-
proxyName: zipProxyName(zip),
|
|
13529
|
-
config: {
|
|
13530
|
-
country: target.country,
|
|
13531
|
-
state: target.state,
|
|
13532
|
-
zip
|
|
13533
|
-
}
|
|
13534
|
-
};
|
|
13535
|
-
}
|
|
13536
|
-
function configMatches(config, target, city) {
|
|
13537
|
-
if (target.level === "zip") {
|
|
13538
|
-
return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
|
|
13539
|
-
}
|
|
13540
|
-
return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
|
|
13541
|
-
}
|
|
13542
|
-
function findExistingTargetProxy(proxies, target) {
|
|
13543
|
-
return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
|
|
13544
|
-
}
|
|
13545
|
-
function findExistingProxy(proxies, target) {
|
|
13546
|
-
for (const city of target.cityCandidates) {
|
|
13547
|
-
const name = proxyName(target.country, target.state, city);
|
|
13548
|
-
const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
|
|
13549
|
-
if (found) return found;
|
|
13550
|
-
}
|
|
13551
|
-
return null;
|
|
13552
|
-
}
|
|
13553
|
-
function stateTarget(target) {
|
|
13554
|
-
return {
|
|
13555
|
-
...target,
|
|
13556
|
-
level: "state",
|
|
13557
|
-
proxyName: proxyName(target.country, target.state),
|
|
13558
|
-
config: {
|
|
13559
|
-
country: target.country,
|
|
13560
|
-
state: target.state
|
|
13561
|
-
}
|
|
13562
|
-
};
|
|
13563
|
-
}
|
|
13564
|
-
function findExistingStateProxy(proxies, target) {
|
|
13565
|
-
const name = proxyName(target.country, target.state);
|
|
13566
|
-
return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
|
|
13567
|
-
}
|
|
13568
|
-
function escalatedTargetLevel(target, attemptIndex) {
|
|
13569
|
-
return stateTarget(target);
|
|
13570
|
-
}
|
|
13571
|
-
function errorText2(err) {
|
|
13572
|
-
return err instanceof Error ? err.message : String(err);
|
|
13573
|
-
}
|
|
13574
|
-
async function resolveKernelProxyId(options) {
|
|
13575
|
-
if (options.proxyMode === "none") {
|
|
13576
|
-
return resolution("disabled", options.proxyMode, void 0, null, null);
|
|
13577
|
-
}
|
|
13578
|
-
if (options.proxyMode === "configured") {
|
|
13579
|
-
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
|
|
13580
|
-
}
|
|
13581
|
-
const target = parseKernelLocationProxyTarget(options.location, options.gl);
|
|
13582
|
-
if (!target || !options.kernelApiKey) {
|
|
13583
|
-
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
|
|
13927
|
+
seed_query: seed,
|
|
13928
|
+
response_text: i === 0 ? text ?? "" : "",
|
|
13929
|
+
citation_text: c.text,
|
|
13930
|
+
citation_href: c.href
|
|
13931
|
+
}));
|
|
13932
|
+
const csv = import_papaparse2.default.unparse(rows, { header: true });
|
|
13933
|
+
const filename = `${slug}-ai-mode-${Date.now()}.csv`;
|
|
13934
|
+
const fullPath = import_node_path5.default.join(outputDir, filename);
|
|
13935
|
+
await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
|
|
13936
|
+
return fullPath;
|
|
13937
|
+
}
|
|
13938
|
+
async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
|
|
13939
|
+
await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
|
|
13940
|
+
const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
|
|
13941
|
+
const rows = cards.map((c) => ({ seed_query: seed, ...c }));
|
|
13942
|
+
const csv = import_papaparse2.default.unparse(rows, { header: true });
|
|
13943
|
+
const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
|
|
13944
|
+
const fullPath = import_node_path5.default.join(outputDir, filename);
|
|
13945
|
+
await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
|
|
13946
|
+
return fullPath;
|
|
13947
|
+
}
|
|
13948
|
+
};
|
|
13584
13949
|
}
|
|
13585
|
-
|
|
13586
|
-
|
|
13587
|
-
|
|
13588
|
-
|
|
13589
|
-
|
|
13590
|
-
|
|
13591
|
-
|
|
13592
|
-
|
|
13593
|
-
|
|
13594
|
-
|
|
13595
|
-
config: escalatedTarget.config
|
|
13596
|
-
});
|
|
13597
|
-
if (created.id) {
|
|
13598
|
-
return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
|
|
13599
|
-
}
|
|
13600
|
-
createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
|
|
13601
|
-
} catch (err) {
|
|
13602
|
-
createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
|
|
13950
|
+
});
|
|
13951
|
+
|
|
13952
|
+
// src/output/ProgressReporter.ts
|
|
13953
|
+
var ProgressReporter;
|
|
13954
|
+
var init_ProgressReporter = __esm({
|
|
13955
|
+
"src/output/ProgressReporter.ts"() {
|
|
13956
|
+
"use strict";
|
|
13957
|
+
ProgressReporter = class {
|
|
13958
|
+
onQuestion(node) {
|
|
13959
|
+
process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
|
|
13603
13960
|
}
|
|
13604
|
-
|
|
13605
|
-
|
|
13606
|
-
const proxies = await kernel.proxies.list();
|
|
13607
|
-
const zip = knownZipFor(target, options.proxyZip);
|
|
13608
|
-
const createErrors = [];
|
|
13609
|
-
if (zip) {
|
|
13610
|
-
const targetZip = zipTarget(target, zip);
|
|
13611
|
-
const existingZip = findExistingTargetProxy(proxies, targetZip);
|
|
13612
|
-
if (existingZip?.id) {
|
|
13613
|
-
return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
|
|
13961
|
+
onDepth(depth) {
|
|
13962
|
+
process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
|
|
13614
13963
|
}
|
|
13615
|
-
|
|
13616
|
-
const
|
|
13617
|
-
type: "
|
|
13618
|
-
name: targetZip.proxyName,
|
|
13619
|
-
config: {
|
|
13620
|
-
country: targetZip.country,
|
|
13621
|
-
zip
|
|
13622
|
-
}
|
|
13623
|
-
});
|
|
13624
|
-
if (created.id) {
|
|
13625
|
-
return resolution("location_created", options.proxyMode, created.id, targetZip, null);
|
|
13964
|
+
onVideos(videos) {
|
|
13965
|
+
for (const v of videos) {
|
|
13966
|
+
process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
|
|
13626
13967
|
}
|
|
13627
|
-
createErrors.push(`${zip}: Kernel did not return a proxy id`);
|
|
13628
|
-
} catch (err) {
|
|
13629
|
-
createErrors.push(`${zip}: ${errorText2(err)}`);
|
|
13630
13968
|
}
|
|
13631
|
-
|
|
13632
|
-
|
|
13633
|
-
|
|
13634
|
-
return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
|
|
13635
|
-
}
|
|
13636
|
-
for (const city of target.cityCandidates) {
|
|
13637
|
-
try {
|
|
13638
|
-
const created = await kernel.proxies.create({
|
|
13639
|
-
type: "residential",
|
|
13640
|
-
name: proxyName(target.country, target.state, city),
|
|
13641
|
-
config: {
|
|
13642
|
-
country: target.country,
|
|
13643
|
-
state: target.state,
|
|
13644
|
-
city
|
|
13645
|
-
}
|
|
13646
|
-
});
|
|
13647
|
-
if (created.id) {
|
|
13648
|
-
return resolution("location_created", options.proxyMode, created.id, {
|
|
13649
|
-
...target,
|
|
13650
|
-
level: "city",
|
|
13651
|
-
city,
|
|
13652
|
-
proxyName: proxyName(target.country, target.state, city),
|
|
13653
|
-
config: {
|
|
13654
|
-
country: target.country,
|
|
13655
|
-
state: target.state,
|
|
13656
|
-
city
|
|
13657
|
-
}
|
|
13658
|
-
}, null);
|
|
13969
|
+
onForums(forums) {
|
|
13970
|
+
for (const f of forums) {
|
|
13971
|
+
process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
|
|
13659
13972
|
}
|
|
13660
|
-
createErrors.push(`${city}: Kernel did not return a proxy id`);
|
|
13661
|
-
} catch (err) {
|
|
13662
|
-
createErrors.push(`${city}: ${errorText2(err)}`);
|
|
13663
13973
|
}
|
|
13664
|
-
|
|
13665
|
-
|
|
13666
|
-
|
|
13667
|
-
|
|
13668
|
-
|
|
13669
|
-
}
|
|
13670
|
-
try {
|
|
13671
|
-
const created = await kernel.proxies.create({
|
|
13672
|
-
type: "residential",
|
|
13673
|
-
name: fallbackTarget.proxyName,
|
|
13674
|
-
config: fallbackTarget.config
|
|
13675
|
-
});
|
|
13676
|
-
if (created.id) {
|
|
13677
|
-
return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
|
|
13974
|
+
onComplete(stats) {
|
|
13975
|
+
process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
|
|
13976
|
+
}
|
|
13977
|
+
onError(err) {
|
|
13978
|
+
process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
|
|
13678
13979
|
}
|
|
13679
|
-
createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
|
|
13680
|
-
} catch (err) {
|
|
13681
|
-
createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
|
|
13682
|
-
}
|
|
13683
|
-
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
|
|
13684
|
-
} catch (err) {
|
|
13685
|
-
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
|
|
13686
|
-
}
|
|
13687
|
-
}
|
|
13688
|
-
var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
|
|
13689
|
-
var init_kernel_proxy_resolver = __esm({
|
|
13690
|
-
"src/kernel-proxy-resolver.ts"() {
|
|
13691
|
-
"use strict";
|
|
13692
|
-
import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
|
|
13693
|
-
init_uule();
|
|
13694
|
-
US_STATE_CODES = {
|
|
13695
|
-
alabama: "AL",
|
|
13696
|
-
alaska: "AK",
|
|
13697
|
-
arizona: "AZ",
|
|
13698
|
-
arkansas: "AR",
|
|
13699
|
-
california: "CA",
|
|
13700
|
-
colorado: "CO",
|
|
13701
|
-
connecticut: "CT",
|
|
13702
|
-
delaware: "DE",
|
|
13703
|
-
florida: "FL",
|
|
13704
|
-
georgia: "GA",
|
|
13705
|
-
hawaii: "HI",
|
|
13706
|
-
idaho: "ID",
|
|
13707
|
-
illinois: "IL",
|
|
13708
|
-
indiana: "IN",
|
|
13709
|
-
iowa: "IA",
|
|
13710
|
-
kansas: "KS",
|
|
13711
|
-
kentucky: "KY",
|
|
13712
|
-
louisiana: "LA",
|
|
13713
|
-
maine: "ME",
|
|
13714
|
-
maryland: "MD",
|
|
13715
|
-
massachusetts: "MA",
|
|
13716
|
-
michigan: "MI",
|
|
13717
|
-
minnesota: "MN",
|
|
13718
|
-
mississippi: "MS",
|
|
13719
|
-
missouri: "MO",
|
|
13720
|
-
montana: "MT",
|
|
13721
|
-
nebraska: "NE",
|
|
13722
|
-
nevada: "NV",
|
|
13723
|
-
"new hampshire": "NH",
|
|
13724
|
-
"new jersey": "NJ",
|
|
13725
|
-
"new mexico": "NM",
|
|
13726
|
-
"new york": "NY",
|
|
13727
|
-
"north carolina": "NC",
|
|
13728
|
-
"north dakota": "ND",
|
|
13729
|
-
ohio: "OH",
|
|
13730
|
-
oklahoma: "OK",
|
|
13731
|
-
oregon: "OR",
|
|
13732
|
-
pennsylvania: "PA",
|
|
13733
|
-
"rhode island": "RI",
|
|
13734
|
-
"south carolina": "SC",
|
|
13735
|
-
"south dakota": "SD",
|
|
13736
|
-
tennessee: "TN",
|
|
13737
|
-
texas: "TX",
|
|
13738
|
-
utah: "UT",
|
|
13739
|
-
vermont: "VT",
|
|
13740
|
-
virginia: "VA",
|
|
13741
|
-
washington: "WA",
|
|
13742
|
-
"west virginia": "WV",
|
|
13743
|
-
wisconsin: "WI",
|
|
13744
|
-
wyoming: "WY"
|
|
13745
|
-
};
|
|
13746
|
-
US_CITY_CENTER_ZIPS = {
|
|
13747
|
-
"atlanta|GA": "30303",
|
|
13748
|
-
"austin|TX": "78701",
|
|
13749
|
-
"baltimore|MD": "21201",
|
|
13750
|
-
"boston|MA": "02108",
|
|
13751
|
-
"boulder|CO": "80302",
|
|
13752
|
-
"charlotte|NC": "28202",
|
|
13753
|
-
"chicago|IL": "60601",
|
|
13754
|
-
"colorado_springs|CO": "80903",
|
|
13755
|
-
"columbus|OH": "43215",
|
|
13756
|
-
"dallas|TX": "75201",
|
|
13757
|
-
"denver|CO": "80202",
|
|
13758
|
-
"detroit|MI": "48226",
|
|
13759
|
-
"fort_collins|CO": "80524",
|
|
13760
|
-
"fort_worth|TX": "76102",
|
|
13761
|
-
"houston|TX": "77002",
|
|
13762
|
-
"indianapolis|IN": "46204",
|
|
13763
|
-
"jacksonville|FL": "32202",
|
|
13764
|
-
"las_vegas|NV": "89101",
|
|
13765
|
-
"los_angeles|CA": "90012",
|
|
13766
|
-
"louisville|KY": "40202",
|
|
13767
|
-
"loveland|CO": "80537",
|
|
13768
|
-
"memphis|TN": "38103",
|
|
13769
|
-
"miami|FL": "33131",
|
|
13770
|
-
"minneapolis|MN": "55401",
|
|
13771
|
-
"nashville|TN": "37203",
|
|
13772
|
-
"new_york|NY": "10001",
|
|
13773
|
-
"orlando|FL": "32801",
|
|
13774
|
-
"philadelphia|PA": "19103",
|
|
13775
|
-
"phoenix|AZ": "85004",
|
|
13776
|
-
"portland|OR": "97205",
|
|
13777
|
-
"raleigh|NC": "27601",
|
|
13778
|
-
"richmond|VA": "23219",
|
|
13779
|
-
"sacramento|CA": "95814",
|
|
13780
|
-
"salt_lake_city|UT": "84101",
|
|
13781
|
-
"san_antonio|TX": "78205",
|
|
13782
|
-
"san_diego|CA": "92101",
|
|
13783
|
-
"san_francisco|CA": "94103",
|
|
13784
|
-
"san_jose|CA": "95113",
|
|
13785
|
-
"seattle|WA": "98101"
|
|
13786
13980
|
};
|
|
13787
13981
|
}
|
|
13788
13982
|
});
|
|
@@ -14879,9 +15073,12 @@ function reportTitle(full) {
|
|
|
14879
15073
|
const title = full.split("\n").find((line) => line.startsWith("# "));
|
|
14880
15074
|
return title?.replace(/^#\s+/, "").trim() || "MCP Scraper Report";
|
|
14881
15075
|
}
|
|
15076
|
+
function outputBaseDir() {
|
|
15077
|
+
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
|
|
15078
|
+
}
|
|
14882
15079
|
function saveFullReport(full) {
|
|
14883
15080
|
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
14884
|
-
const outDir =
|
|
15081
|
+
const outDir = outputBaseDir();
|
|
14885
15082
|
try {
|
|
14886
15083
|
(0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
|
|
14887
15084
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
@@ -14892,6 +15089,20 @@ function saveFullReport(full) {
|
|
|
14892
15089
|
return null;
|
|
14893
15090
|
}
|
|
14894
15091
|
}
|
|
15092
|
+
function persistScreenshotLocally(base64, url) {
|
|
15093
|
+
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15094
|
+
try {
|
|
15095
|
+
const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
|
|
15096
|
+
(0, import_node_fs4.mkdirSync)(dir, { recursive: true });
|
|
15097
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
15098
|
+
const slug = url.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
15099
|
+
const filePath = (0, import_node_path6.join)(dir, `${stamp}-${slug}.png`);
|
|
15100
|
+
(0, import_node_fs4.writeFileSync)(filePath, Buffer.from(base64, "base64"));
|
|
15101
|
+
return filePath;
|
|
15102
|
+
} catch {
|
|
15103
|
+
return null;
|
|
15104
|
+
}
|
|
15105
|
+
}
|
|
14895
15106
|
function oneBlock(content) {
|
|
14896
15107
|
const filePath = saveFullReport(content);
|
|
14897
15108
|
const text = filePath ? `${content}
|
|
@@ -15112,6 +15323,7 @@ function formatExtractUrl(raw, input) {
|
|
|
15112
15323
|
const bodyMd = d.bodyMarkdown ?? "";
|
|
15113
15324
|
const schema = d.schema;
|
|
15114
15325
|
const screenshotMeta = d.screenshot;
|
|
15326
|
+
const screenshotPath = screenshotMeta?.base64 ? persistScreenshotLocally(screenshotMeta.base64, url) : null;
|
|
15115
15327
|
const branding = d.branding;
|
|
15116
15328
|
const media = d.media;
|
|
15117
15329
|
const h1Lines = headings.filter((h) => h.level === 1).map((h) => `- ${h.text}`).join("\n");
|
|
@@ -15138,7 +15350,7 @@ ${[h1Lines, h2Lines].filter(Boolean).join("\n")}` : "";
|
|
|
15138
15350
|
${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
15139
15351
|
const screenshotSection = screenshotMeta ? `
|
|
15140
15352
|
## Screenshot
|
|
15141
|
-
- **File:** ${
|
|
15353
|
+
- **File:** ${screenshotPath ?? "(returned inline only \u2014 disk write unavailable in this environment)"}
|
|
15142
15354
|
- **Size:** ${(screenshotMeta.sizeBytes / 1024).toFixed(1)} KB
|
|
15143
15355
|
- **Device:** ${screenshotMeta.device}` : "";
|
|
15144
15356
|
const brandingSection = branding ? [
|
|
@@ -15167,17 +15379,13 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
15167
15379
|
**${title}**
|
|
15168
15380
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
15169
15381
|
const textResult = oneBlock(full);
|
|
15170
|
-
if (screenshotMeta?.
|
|
15171
|
-
|
|
15172
|
-
|
|
15173
|
-
|
|
15174
|
-
|
|
15175
|
-
|
|
15176
|
-
|
|
15177
|
-
]
|
|
15178
|
-
};
|
|
15179
|
-
} catch {
|
|
15180
|
-
}
|
|
15382
|
+
if (screenshotMeta?.base64) {
|
|
15383
|
+
return {
|
|
15384
|
+
content: [
|
|
15385
|
+
...textResult.content,
|
|
15386
|
+
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
15387
|
+
]
|
|
15388
|
+
};
|
|
15181
15389
|
}
|
|
15182
15390
|
return textResult;
|
|
15183
15391
|
}
|
|
@@ -15377,7 +15585,7 @@ function formatFacebookAdSearch(raw, input) {
|
|
|
15377
15585
|
const d = parsed.data;
|
|
15378
15586
|
const advertisers = d.results ?? d.advertisers ?? [];
|
|
15379
15587
|
const rows = advertisers.map(
|
|
15380
|
-
(a, i) => `| ${i + 1} | ${cell(a.name)} | ${a.adCount ?? "\u2014"} | \`${a.libraryId ?? "\u2014"}\` |`
|
|
15588
|
+
(a, i) => `| ${i + 1} | ${cell(a.pageName ?? a.name)} | ${a.adCount ?? "\u2014"} | \`${a.sampleLibraryId ?? a.libraryId ?? "\u2014"}\` |`
|
|
15381
15589
|
).join("\n");
|
|
15382
15590
|
const full = [
|
|
15383
15591
|
`# Facebook Ad Library Search: "${input.query}"`,
|
|
@@ -15679,16 +15887,20 @@ var HttpMcpToolExecutor;
|
|
|
15679
15887
|
var init_http_mcp_tool_executor = __esm({
|
|
15680
15888
|
"src/mcp/http-mcp-tool-executor.ts"() {
|
|
15681
15889
|
"use strict";
|
|
15890
|
+
init_harvest_timeout();
|
|
15682
15891
|
HttpMcpToolExecutor = class {
|
|
15683
15892
|
baseUrl;
|
|
15684
15893
|
apiKey;
|
|
15685
15894
|
timeoutMs;
|
|
15895
|
+
httpTimeoutOverrideMs;
|
|
15686
15896
|
serpIntelligenceTimeoutMs;
|
|
15687
15897
|
constructor(baseUrl, apiKey) {
|
|
15688
15898
|
this.baseUrl = baseUrl.replace(/\/$/, "");
|
|
15689
15899
|
this.apiKey = apiKey;
|
|
15690
|
-
const
|
|
15691
|
-
|
|
15900
|
+
const rawOverride = process.env.MCP_SCRAPER_HTTP_TIMEOUT_MS;
|
|
15901
|
+
const parsedOverride = rawOverride === void 0 ? NaN : Number(rawOverride);
|
|
15902
|
+
this.httpTimeoutOverrideMs = Number.isFinite(parsedOverride) && parsedOverride > 0 ? parsedOverride : null;
|
|
15903
|
+
this.timeoutMs = this.httpTimeoutOverrideMs ?? 11e4;
|
|
15692
15904
|
const configuredSerpIntelligenceTimeoutMs = Number(process.env.MCP_SCRAPER_SERP_INTELLIGENCE_HTTP_TIMEOUT_MS ?? this.timeoutMs);
|
|
15693
15905
|
this.serpIntelligenceTimeoutMs = Number.isFinite(configuredSerpIntelligenceTimeoutMs) && configuredSerpIntelligenceTimeoutMs > 0 ? configuredSerpIntelligenceTimeoutMs : this.timeoutMs;
|
|
15694
15906
|
}
|
|
@@ -15730,10 +15942,12 @@ var init_http_mcp_tool_executor = __esm({
|
|
|
15730
15942
|
}
|
|
15731
15943
|
}
|
|
15732
15944
|
harvestPaa(input) {
|
|
15733
|
-
|
|
15945
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(input.maxQuestions ?? 30).clientMs;
|
|
15946
|
+
return this.call("/harvest/sync", input, timeoutMs);
|
|
15734
15947
|
}
|
|
15735
15948
|
searchSerp(input) {
|
|
15736
|
-
|
|
15949
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(0, true).clientMs;
|
|
15950
|
+
return this.call("/harvest/sync", { ...input, serpOnly: true }, timeoutMs);
|
|
15737
15951
|
}
|
|
15738
15952
|
extractUrl(input) {
|
|
15739
15953
|
return this.call("/extract-url", input);
|
|
@@ -16380,18 +16594,16 @@ async function checkHarvestLimits(userId, email, extraSlots = 0) {
|
|
|
16380
16594
|
if (active >= limit) return { error: `You have ${active} job${active !== 1 ? "s" : ""} running. Your account allows ${limit} concurrent job${limit !== 1 ? "s" : ""}. Wait for one to finish or add a concurrency slot at mcpscraper.dev/billing.` };
|
|
16381
16595
|
return null;
|
|
16382
16596
|
}
|
|
16383
|
-
var import_resend,
|
|
16597
|
+
var import_resend, import_hono9, import_hono10, import_factory6, import_cookie, import_stripe2, secureCookies, isProduction2, sessionCookieOptions, requireAllowedOrigin, auth, adminAuth, sessionAuth, app, STRIPE_API_VERSION, BYPASS_EMAILS, SYNC_HARVEST_TIMEOUT_OVERRIDE_MS;
|
|
16384
16598
|
var init_server = __esm({
|
|
16385
16599
|
"src/api/server.ts"() {
|
|
16386
16600
|
"use strict";
|
|
16601
|
+
init_harvest_timeout();
|
|
16387
16602
|
init_registry();
|
|
16388
16603
|
init_template();
|
|
16389
16604
|
init_og();
|
|
16390
16605
|
import_resend = require("resend");
|
|
16391
16606
|
init_url_utils();
|
|
16392
|
-
import_node_fs5 = require("fs");
|
|
16393
|
-
import_node_os4 = require("os");
|
|
16394
|
-
import_node_path7 = require("path");
|
|
16395
16607
|
init_kpo_extractor();
|
|
16396
16608
|
init_screenshot();
|
|
16397
16609
|
init_media_extractor();
|
|
@@ -16621,7 +16833,11 @@ var init_server = __esm({
|
|
|
16621
16833
|
BYPASS_EMAILS = new Set(
|
|
16622
16834
|
(process.env.HARVEST_LIMIT_BYPASS_EMAILS ?? "").split(",").map((e) => e.trim()).filter(Boolean)
|
|
16623
16835
|
);
|
|
16624
|
-
|
|
16836
|
+
SYNC_HARVEST_TIMEOUT_OVERRIDE_MS = (() => {
|
|
16837
|
+
const raw = process.env.SYNC_HARVEST_TIMEOUT_MS;
|
|
16838
|
+
const parsed = raw === void 0 ? NaN : Number(raw);
|
|
16839
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
|
16840
|
+
})();
|
|
16625
16841
|
app.post("/harvest", auth, async (c) => {
|
|
16626
16842
|
const user = c.get("user");
|
|
16627
16843
|
const raw = await c.req.json().catch(() => ({}));
|
|
@@ -16691,9 +16907,10 @@ var init_server = __esm({
|
|
|
16691
16907
|
if (!syncOk) return c.json(insufficientBalanceResponse(syncBal, syncCost), 402);
|
|
16692
16908
|
const jobId = await createRunningJob(user.id, options.query, options);
|
|
16693
16909
|
const recordAttempt = createHarvestAttemptRecorder(jobId, user.id);
|
|
16910
|
+
const syncTimeoutMs = SYNC_HARVEST_TIMEOUT_OVERRIDE_MS ?? harvestTimeoutBudget(options.maxQuestions, options.serpOnly).serverMs;
|
|
16694
16911
|
const syncSignal = combineAbortSignals([
|
|
16695
16912
|
c.req.raw.signal,
|
|
16696
|
-
AbortSignal.timeout(
|
|
16913
|
+
AbortSignal.timeout(syncTimeoutMs)
|
|
16697
16914
|
]);
|
|
16698
16915
|
try {
|
|
16699
16916
|
const result = await harvest({
|
|
@@ -16841,13 +17058,7 @@ var init_server = __esm({
|
|
|
16841
17058
|
const brandingData = pageData?.branding ?? null;
|
|
16842
17059
|
let screenshotMeta = null;
|
|
16843
17060
|
if (screenshotBuf) {
|
|
16844
|
-
|
|
16845
|
-
(0, import_node_fs5.mkdirSync)(outDir, { recursive: true });
|
|
16846
|
-
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
16847
|
-
const slug = canonicalUrl.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
16848
|
-
const filePath = (0, import_node_path7.join)(outDir, `${stamp}-${slug}.png`);
|
|
16849
|
-
(0, import_node_fs5.writeFileSync)(filePath, screenshotBuf);
|
|
16850
|
-
screenshotMeta = { savedPath: filePath, sizeBytes: screenshotBuf.length, device };
|
|
17061
|
+
screenshotMeta = { base64: screenshotBuf.toString("base64"), sizeBytes: screenshotBuf.length, device };
|
|
16851
17062
|
}
|
|
16852
17063
|
const mediaMeta = downloadMedia ? await harvestPageMedia(result.bodyHtml, canonicalUrl, { types: mediaTypes ?? ["image", "video", "audio"] }) : null;
|
|
16853
17064
|
await logRequestEvent({ userId: user.id, source: "extract_url", status: "done", query: canonicalUrl, resultCount: result.headings.length, result });
|
|
@@ -17185,10 +17396,10 @@ var init_server = __esm({
|
|
|
17185
17396
|
});
|
|
17186
17397
|
|
|
17187
17398
|
// bin/api-server.ts
|
|
17188
|
-
var
|
|
17399
|
+
var import_node_fs5 = require("fs");
|
|
17189
17400
|
function loadDotEnv() {
|
|
17190
17401
|
try {
|
|
17191
|
-
for (const line of (0,
|
|
17402
|
+
for (const line of (0, import_node_fs5.readFileSync)(".env", "utf8").split("\n")) {
|
|
17192
17403
|
const eq = line.indexOf("=");
|
|
17193
17404
|
if (eq < 1 || line.trimStart().startsWith("#")) continue;
|
|
17194
17405
|
const k = line.slice(0, eq).trim();
|