mcp-scraper 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10700,6 +10700,587 @@ var init_FacebookAdExtractor = __esm({
10700
10700
  }
10701
10701
  });
10702
10702
 
10703
+ // src/extractor/FacebookAdGraphql.ts
10704
+ function parseFbGraphqlJson(text) {
10705
+ const out = [];
10706
+ const body = text.replace(/^for\s*\(;;\);/, "").trim();
10707
+ try {
10708
+ out.push(JSON.parse(body));
10709
+ return out;
10710
+ } catch {
10711
+ for (const line of body.split("\n")) {
10712
+ const trimmed = line.trim();
10713
+ if (!trimmed) continue;
10714
+ try {
10715
+ out.push(JSON.parse(trimmed));
10716
+ } catch {
10717
+ continue;
10718
+ }
10719
+ }
10720
+ return out;
10721
+ }
10722
+ }
10723
+ function extractCollatedResults(payload) {
10724
+ const root = payload;
10725
+ const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
10726
+ const results = [];
10727
+ for (const edge of edges) {
10728
+ const node = edge?.node;
10729
+ for (const raw of node?.collated_results ?? []) {
10730
+ const r = raw;
10731
+ const id = r.ad_archive_id;
10732
+ if (id === void 0 || id === null) continue;
10733
+ const snapshot = r.snapshot ?? null;
10734
+ results.push({
10735
+ ad_archive_id: String(id),
10736
+ page_id: r.page_id != null ? String(r.page_id) : "",
10737
+ page_name: r.page_name ?? snapshot?.page_name ?? "",
10738
+ is_active: Boolean(r.is_active),
10739
+ collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
10740
+ snapshot
10741
+ });
10742
+ }
10743
+ }
10744
+ return results;
10745
+ }
10746
+ async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
10747
+ const captureMs = opts.captureMs ?? 3e4;
10748
+ const collected = [];
10749
+ const seen = /* @__PURE__ */ new Set();
10750
+ const handler = (resp) => {
10751
+ if (!resp.url().includes("/api/graphql")) return;
10752
+ const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
10753
+ if (friendlyName !== AD_LIBRARY_QUERY) return;
10754
+ void resp.text().then((text) => {
10755
+ for (const payload of parseFbGraphqlJson(text)) {
10756
+ for (const result of extractCollatedResults(payload)) {
10757
+ if (seen.has(result.ad_archive_id)) continue;
10758
+ seen.add(result.ad_archive_id);
10759
+ collected.push(result);
10760
+ }
10761
+ }
10762
+ }).catch(() => void 0);
10763
+ };
10764
+ page.on("response", handler);
10765
+ try {
10766
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
10767
+ const deadline = Date.now() + captureMs;
10768
+ let lastCount = -1;
10769
+ let stableRounds = 0;
10770
+ while (Date.now() < deadline && collected.length < maxResults) {
10771
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
10772
+ await page.waitForTimeout(2e3);
10773
+ if (collected.length === lastCount) {
10774
+ stableRounds++;
10775
+ if (stableRounds >= 2 && collected.length > 0) break;
10776
+ } else {
10777
+ stableRounds = 0;
10778
+ }
10779
+ lastCount = collected.length;
10780
+ }
10781
+ } finally {
10782
+ page.off("response", handler);
10783
+ }
10784
+ return collected.slice(0, maxResults);
10785
+ }
10786
+ function advertisersFromResults(results, maxResults) {
10787
+ const byPage = /* @__PURE__ */ new Map();
10788
+ for (const r of results) {
10789
+ if (!r.page_id || !r.page_name) continue;
10790
+ const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
10791
+ const existing = byPage.get(r.page_id);
10792
+ if (existing) {
10793
+ existing.resultCount++;
10794
+ existing.maxCollation = Math.max(existing.maxCollation, collation);
10795
+ } else {
10796
+ byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
10797
+ }
10798
+ }
10799
+ return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
10800
+ }
10801
+ var AD_LIBRARY_QUERY;
10802
+ var init_FacebookAdGraphql = __esm({
10803
+ "src/extractor/FacebookAdGraphql.ts"() {
10804
+ "use strict";
10805
+ AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
10806
+ }
10807
+ });
10808
+
10809
+ // src/locations.ts
10810
+ var LOCATIONS;
10811
+ var init_locations = __esm({
10812
+ "src/locations.ts"() {
10813
+ "use strict";
10814
+ LOCATIONS = {
10815
+ "austin": "Austin,Texas,United States",
10816
+ "new york": "New York,New York,United States",
10817
+ "new york city": "New York,New York,United States",
10818
+ "nyc": "New York,New York,United States",
10819
+ "los angeles": "Los Angeles,California,United States",
10820
+ "la": "Los Angeles,California,United States",
10821
+ "chicago": "Chicago,Illinois,United States",
10822
+ "houston": "Houston,Texas,United States",
10823
+ "phoenix": "Phoenix,Arizona,United States",
10824
+ "philadelphia": "Philadelphia,Pennsylvania,United States",
10825
+ "philly": "Philadelphia,Pennsylvania,United States",
10826
+ "san antonio": "San Antonio,Texas,United States",
10827
+ "dallas": "Dallas,Texas,United States",
10828
+ "miami": "Miami,Florida,United States",
10829
+ "seattle": "Seattle,Washington,United States",
10830
+ "denver": "Denver,Colorado,United States",
10831
+ "loveland": "Loveland,Colorado,United States",
10832
+ "loveland co": "Loveland,Colorado,United States",
10833
+ "fort collins": "Fort Collins,Colorado,United States",
10834
+ "boulder": "Boulder,Colorado,United States",
10835
+ "colorado springs": "Colorado Springs,Colorado,United States",
10836
+ "boston": "Boston,Massachusetts,United States",
10837
+ "atlanta": "Atlanta,Georgia,United States",
10838
+ "san francisco": "San Francisco,California,United States",
10839
+ "sf": "San Francisco,California,United States",
10840
+ "portland": "Portland,Oregon,United States",
10841
+ "las vegas": "Las Vegas,Nevada,United States",
10842
+ "minneapolis": "Minneapolis,Minnesota,United States",
10843
+ "detroit": "Detroit,Michigan,United States",
10844
+ "nashville": "Nashville,Tennessee,United States",
10845
+ "charlotte": "Charlotte,North Carolina,United States",
10846
+ "orlando": "Orlando,Florida,United States",
10847
+ "san diego": "San Diego,California,United States",
10848
+ "baltimore": "Baltimore,Maryland,United States",
10849
+ "sacramento": "Sacramento,California,United States",
10850
+ "columbus": "Columbus,Ohio,United States",
10851
+ "indianapolis": "Indianapolis,Indiana,United States",
10852
+ "san jose": "San Jose,California,United States",
10853
+ "fort worth": "Fort Worth,Texas,United States",
10854
+ "jacksonville": "Jacksonville,Florida,United States",
10855
+ "memphis": "Memphis,Tennessee,United States",
10856
+ "louisville": "Louisville,Kentucky,United States",
10857
+ "raleigh": "Raleigh,North Carolina,United States",
10858
+ "richmond": "Richmond,Virginia,United States",
10859
+ "salt lake city": "Salt Lake City,Utah,United States",
10860
+ "toronto": "Toronto,Ontario,Canada",
10861
+ "vancouver": "Vancouver,British Columbia,Canada",
10862
+ "montreal": "Montreal,Quebec,Canada",
10863
+ "calgary": "Calgary,Alberta,Canada",
10864
+ "ottawa": "Ottawa,Ontario,Canada",
10865
+ "london": "London,England,United Kingdom",
10866
+ "manchester": "Manchester,England,United Kingdom",
10867
+ "birmingham": "Birmingham,England,United Kingdom",
10868
+ "edinburgh": "Edinburgh,Scotland,United Kingdom",
10869
+ "glasgow": "Glasgow,Scotland,United Kingdom",
10870
+ "leeds": "Leeds,England,United Kingdom",
10871
+ "sydney": "Sydney,New South Wales,Australia",
10872
+ "melbourne": "Melbourne,Victoria,Australia",
10873
+ "brisbane": "Brisbane,Queensland,Australia",
10874
+ "perth": "Perth,Western Australia,Australia",
10875
+ "adelaide": "Adelaide,South Australia,Australia",
10876
+ "dublin": "Dublin,Leinster,Ireland"
10877
+ };
10878
+ }
10879
+ });
10880
+
10881
+ // src/uule.ts
10882
+ function encodeVarint(value) {
10883
+ const bytes = [];
10884
+ let remaining = value;
10885
+ do {
10886
+ let byte = remaining & 127;
10887
+ remaining >>>= 7;
10888
+ if (remaining > 0) byte |= 128;
10889
+ bytes.push(byte);
10890
+ } while (remaining > 0);
10891
+ return bytes;
10892
+ }
10893
+ function encodeUule(name) {
10894
+ const locationBytes = Buffer.from(name, "utf8");
10895
+ const payload = Buffer.concat([
10896
+ Buffer.from([8, 2, 16, 32, 34]),
10897
+ Buffer.from(encodeVarint(locationBytes.length)),
10898
+ locationBytes
10899
+ ]);
10900
+ return `w+${payload.toString("base64")}`;
10901
+ }
10902
+ function normalizeLocation(input) {
10903
+ const raw = input.toLowerCase().trim();
10904
+ if (LOCATIONS[raw]) return LOCATIONS[raw];
10905
+ const beforeComma = raw.split(",")[0].trim();
10906
+ if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
10907
+ const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
10908
+ if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
10909
+ return input;
10910
+ }
10911
+ var init_uule = __esm({
10912
+ "src/uule.ts"() {
10913
+ "use strict";
10914
+ init_locations();
10915
+ }
10916
+ });
10917
+
10918
+ // src/kernel-proxy-resolver.ts
10919
+ function proxyIdSuffix2(proxyId) {
10920
+ return proxyId ? proxyId.slice(-6) : null;
10921
+ }
10922
+ function resolution(source, proxyMode, proxyId, target, error) {
10923
+ return {
10924
+ kernelProxyId: proxyId,
10925
+ resolution: {
10926
+ source,
10927
+ proxyMode,
10928
+ proxyIdPresent: Boolean(proxyId),
10929
+ proxyIdSuffix: proxyIdSuffix2(proxyId),
10930
+ target,
10931
+ error
10932
+ }
10933
+ };
10934
+ }
10935
+ function normalizeStateName(value) {
10936
+ return value.trim().toLowerCase().replace(/\s+/g, " ");
10937
+ }
10938
+ function normalizeCountryName(value) {
10939
+ return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
10940
+ }
10941
+ function isUnitedStates(country) {
10942
+ if (!country) return true;
10943
+ const normalized = normalizeCountryName(country);
10944
+ return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
10945
+ }
10946
+ function stateCodeFor(region) {
10947
+ const trimmed = region.trim();
10948
+ if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
10949
+ return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
10950
+ }
10951
+ function kernelCityIdentifierCandidates(city) {
10952
+ const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
10953
+ const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
10954
+ const underscored = words.join("_");
10955
+ const compact = words.join("");
10956
+ return Array.from(new Set([underscored, compact].filter(Boolean)));
10957
+ }
10958
+ function proxyName(country, state, city) {
10959
+ return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
10960
+ }
10961
+ function zipProxyName(zip) {
10962
+ return `mcp-serp-residential-us-zip-${zip}`;
10963
+ }
10964
+ function parseKernelLocationProxyTarget(location, gl) {
10965
+ if (!location || gl.toLowerCase() !== "us") return null;
10966
+ const canonicalLocation = normalizeLocation(location);
10967
+ let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
10968
+ if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
10969
+ parts = parts.slice(0, -1);
10970
+ }
10971
+ if (parts.length === 1) {
10972
+ const stateOnly = stateCodeFor(parts[0]);
10973
+ if (!stateOnly) return null;
10974
+ return {
10975
+ canonicalLocation,
10976
+ level: "state",
10977
+ country: "US",
10978
+ state: stateOnly,
10979
+ city: "",
10980
+ cityCandidates: [],
10981
+ proxyName: proxyName("US", stateOnly),
10982
+ config: {
10983
+ country: "US",
10984
+ state: stateOnly
10985
+ }
10986
+ };
10987
+ }
10988
+ const [city = "", region = ""] = parts;
10989
+ if (!city || !region) return null;
10990
+ const state = stateCodeFor(region);
10991
+ if (!state) return null;
10992
+ const cityCandidates = kernelCityIdentifierCandidates(city);
10993
+ const primaryCity = cityCandidates[0];
10994
+ if (!primaryCity) return null;
10995
+ return {
10996
+ canonicalLocation,
10997
+ level: "city",
10998
+ country: "US",
10999
+ state,
11000
+ city: primaryCity,
11001
+ cityCandidates,
11002
+ proxyName: proxyName("US", state, primaryCity),
11003
+ config: {
11004
+ country: "US",
11005
+ state,
11006
+ city: primaryCity
11007
+ }
11008
+ };
11009
+ }
11010
+ function cityZipKey(target) {
11011
+ return `${target.city}|${target.state}`;
11012
+ }
11013
+ function knownZipFor(target, explicitZip) {
11014
+ if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
11015
+ return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
11016
+ }
11017
+ function zipTarget(target, zip) {
11018
+ return {
11019
+ ...target,
11020
+ level: "zip",
11021
+ zip,
11022
+ proxyName: zipProxyName(zip),
11023
+ config: {
11024
+ country: target.country,
11025
+ state: target.state,
11026
+ zip
11027
+ }
11028
+ };
11029
+ }
11030
+ function configMatches(config, target, city) {
11031
+ if (target.level === "zip") {
11032
+ return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
11033
+ }
11034
+ return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
11035
+ }
11036
+ function findExistingTargetProxy(proxies, target) {
11037
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
11038
+ }
11039
+ function findExistingProxy(proxies, target) {
11040
+ for (const city of target.cityCandidates) {
11041
+ const name = proxyName(target.country, target.state, city);
11042
+ const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
11043
+ if (found) return found;
11044
+ }
11045
+ return null;
11046
+ }
11047
+ function stateTarget(target) {
11048
+ return {
11049
+ ...target,
11050
+ level: "state",
11051
+ proxyName: proxyName(target.country, target.state),
11052
+ config: {
11053
+ country: target.country,
11054
+ state: target.state
11055
+ }
11056
+ };
11057
+ }
11058
+ function findExistingStateProxy(proxies, target) {
11059
+ const name = proxyName(target.country, target.state);
11060
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
11061
+ }
11062
+ function escalatedTargetLevel(target, attemptIndex) {
11063
+ return stateTarget(target);
11064
+ }
11065
+ function errorText2(err) {
11066
+ return err instanceof Error ? err.message : String(err);
11067
+ }
11068
+ async function resolveKernelProxyId(options) {
11069
+ if (options.proxyMode === "none") {
11070
+ return resolution("disabled", options.proxyMode, void 0, null, null);
11071
+ }
11072
+ if (options.proxyMode === "configured") {
11073
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
11074
+ }
11075
+ const target = parseKernelLocationProxyTarget(options.location, options.gl);
11076
+ if (!target || !options.kernelApiKey) {
11077
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
11078
+ }
11079
+ const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
11080
+ try {
11081
+ const attemptIndex = options.attemptIndex ?? 0;
11082
+ if (attemptIndex >= 1) {
11083
+ const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
11084
+ const createErrors2 = [];
11085
+ try {
11086
+ const created = await kernel.proxies.create({
11087
+ type: "residential",
11088
+ name: escalatedTarget.proxyName,
11089
+ config: escalatedTarget.config
11090
+ });
11091
+ if (created.id) {
11092
+ return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
11093
+ }
11094
+ createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
11095
+ } catch (err) {
11096
+ createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
11097
+ }
11098
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
11099
+ }
11100
+ const proxies = await kernel.proxies.list();
11101
+ const zip = knownZipFor(target, options.proxyZip);
11102
+ const createErrors = [];
11103
+ if (zip) {
11104
+ const targetZip = zipTarget(target, zip);
11105
+ const existingZip = findExistingTargetProxy(proxies, targetZip);
11106
+ if (existingZip?.id) {
11107
+ return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
11108
+ }
11109
+ try {
11110
+ const created = await kernel.proxies.create({
11111
+ type: "residential",
11112
+ name: targetZip.proxyName,
11113
+ config: {
11114
+ country: targetZip.country,
11115
+ zip
11116
+ }
11117
+ });
11118
+ if (created.id) {
11119
+ return resolution("location_created", options.proxyMode, created.id, targetZip, null);
11120
+ }
11121
+ createErrors.push(`${zip}: Kernel did not return a proxy id`);
11122
+ } catch (err) {
11123
+ createErrors.push(`${zip}: ${errorText2(err)}`);
11124
+ }
11125
+ }
11126
+ const existing = findExistingProxy(proxies, target);
11127
+ if (existing?.id) {
11128
+ return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
11129
+ }
11130
+ for (const city of target.cityCandidates) {
11131
+ try {
11132
+ const created = await kernel.proxies.create({
11133
+ type: "residential",
11134
+ name: proxyName(target.country, target.state, city),
11135
+ config: {
11136
+ country: target.country,
11137
+ state: target.state,
11138
+ city
11139
+ }
11140
+ });
11141
+ if (created.id) {
11142
+ return resolution("location_created", options.proxyMode, created.id, {
11143
+ ...target,
11144
+ level: "city",
11145
+ city,
11146
+ proxyName: proxyName(target.country, target.state, city),
11147
+ config: {
11148
+ country: target.country,
11149
+ state: target.state,
11150
+ city
11151
+ }
11152
+ }, null);
11153
+ }
11154
+ createErrors.push(`${city}: Kernel did not return a proxy id`);
11155
+ } catch (err) {
11156
+ createErrors.push(`${city}: ${errorText2(err)}`);
11157
+ }
11158
+ }
11159
+ const fallbackTarget = stateTarget(target);
11160
+ const existingState = findExistingStateProxy(proxies, fallbackTarget);
11161
+ if (existingState?.id) {
11162
+ return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
11163
+ }
11164
+ try {
11165
+ const created = await kernel.proxies.create({
11166
+ type: "residential",
11167
+ name: fallbackTarget.proxyName,
11168
+ config: fallbackTarget.config
11169
+ });
11170
+ if (created.id) {
11171
+ return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
11172
+ }
11173
+ createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
11174
+ } catch (err) {
11175
+ createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
11176
+ }
11177
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
11178
+ } catch (err) {
11179
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
11180
+ }
11181
+ }
11182
+ var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
11183
+ var init_kernel_proxy_resolver = __esm({
11184
+ "src/kernel-proxy-resolver.ts"() {
11185
+ "use strict";
11186
+ import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
11187
+ init_uule();
11188
+ US_STATE_CODES = {
11189
+ alabama: "AL",
11190
+ alaska: "AK",
11191
+ arizona: "AZ",
11192
+ arkansas: "AR",
11193
+ california: "CA",
11194
+ colorado: "CO",
11195
+ connecticut: "CT",
11196
+ delaware: "DE",
11197
+ florida: "FL",
11198
+ georgia: "GA",
11199
+ hawaii: "HI",
11200
+ idaho: "ID",
11201
+ illinois: "IL",
11202
+ indiana: "IN",
11203
+ iowa: "IA",
11204
+ kansas: "KS",
11205
+ kentucky: "KY",
11206
+ louisiana: "LA",
11207
+ maine: "ME",
11208
+ maryland: "MD",
11209
+ massachusetts: "MA",
11210
+ michigan: "MI",
11211
+ minnesota: "MN",
11212
+ mississippi: "MS",
11213
+ missouri: "MO",
11214
+ montana: "MT",
11215
+ nebraska: "NE",
11216
+ nevada: "NV",
11217
+ "new hampshire": "NH",
11218
+ "new jersey": "NJ",
11219
+ "new mexico": "NM",
11220
+ "new york": "NY",
11221
+ "north carolina": "NC",
11222
+ "north dakota": "ND",
11223
+ ohio: "OH",
11224
+ oklahoma: "OK",
11225
+ oregon: "OR",
11226
+ pennsylvania: "PA",
11227
+ "rhode island": "RI",
11228
+ "south carolina": "SC",
11229
+ "south dakota": "SD",
11230
+ tennessee: "TN",
11231
+ texas: "TX",
11232
+ utah: "UT",
11233
+ vermont: "VT",
11234
+ virginia: "VA",
11235
+ washington: "WA",
11236
+ "west virginia": "WV",
11237
+ wisconsin: "WI",
11238
+ wyoming: "WY"
11239
+ };
11240
+ US_CITY_CENTER_ZIPS = {
11241
+ "atlanta|GA": "30303",
11242
+ "austin|TX": "78701",
11243
+ "baltimore|MD": "21201",
11244
+ "boston|MA": "02108",
11245
+ "boulder|CO": "80302",
11246
+ "charlotte|NC": "28202",
11247
+ "chicago|IL": "60601",
11248
+ "colorado_springs|CO": "80903",
11249
+ "columbus|OH": "43215",
11250
+ "dallas|TX": "75201",
11251
+ "denver|CO": "80202",
11252
+ "detroit|MI": "48226",
11253
+ "fort_collins|CO": "80524",
11254
+ "fort_worth|TX": "76102",
11255
+ "houston|TX": "77002",
11256
+ "indianapolis|IN": "46204",
11257
+ "jacksonville|FL": "32202",
11258
+ "las_vegas|NV": "89101",
11259
+ "los_angeles|CA": "90012",
11260
+ "louisville|KY": "40202",
11261
+ "loveland|CO": "80537",
11262
+ "memphis|TN": "38103",
11263
+ "miami|FL": "33131",
11264
+ "minneapolis|MN": "55401",
11265
+ "nashville|TN": "37203",
11266
+ "new_york|NY": "10001",
11267
+ "orlando|FL": "32801",
11268
+ "philadelphia|PA": "19103",
11269
+ "phoenix|AZ": "85004",
11270
+ "portland|OR": "97205",
11271
+ "raleigh|NC": "27601",
11272
+ "richmond|VA": "23219",
11273
+ "sacramento|CA": "95814",
11274
+ "salt_lake_city|UT": "84101",
11275
+ "san_antonio|TX": "78205",
11276
+ "san_diego|CA": "92101",
11277
+ "san_francisco|CA": "94103",
11278
+ "san_jose|CA": "95113",
11279
+ "seattle|WA": "98101"
11280
+ };
11281
+ }
11282
+ });
11283
+
10703
11284
  // src/api/facebook-ad-routes.ts
10704
11285
  function invalidRequest(message) {
10705
11286
  return { error_code: "invalid_request", message };
@@ -10717,6 +11298,22 @@ function buildPageIntelUrl(body, country) {
10717
11298
  function kernelLaunchOpts() {
10718
11299
  return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
10719
11300
  }
11301
+ async function kernelLaunchOptsResidential() {
11302
+ let proxyId = process.env.KERNEL_PROXY_ID?.trim();
11303
+ try {
11304
+ const resolution2 = await resolveKernelProxyId({
11305
+ kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
11306
+ proxyMode: "location",
11307
+ configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
11308
+ location: "New York, NY",
11309
+ gl: "us"
11310
+ });
11311
+ if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
11312
+ } catch {
11313
+ proxyId = process.env.KERNEL_PROXY_ID?.trim();
11314
+ }
11315
+ return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
11316
+ }
10720
11317
  var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
10721
11318
  var init_facebook_ad_routes = __esm({
10722
11319
  "src/api/facebook-ad-routes.ts"() {
@@ -10727,6 +11324,8 @@ var init_facebook_ad_routes = __esm({
10727
11324
  init_rates();
10728
11325
  init_BrowserDriver();
10729
11326
  init_FacebookAdExtractor();
11327
+ init_FacebookAdGraphql();
11328
+ init_kernel_proxy_resolver();
10730
11329
  import_client3 = require("@fal-ai/client");
10731
11330
  init_api_auth();
10732
11331
  init_url_utils();
@@ -10812,7 +11411,7 @@ var init_facebook_ad_routes = __esm({
10812
11411
  const driver = new BrowserDriver();
10813
11412
  let refunded = false;
10814
11413
  try {
10815
- await driver.launch(kernelLaunchOpts());
11414
+ await driver.launch(await kernelLaunchOptsResidential());
10816
11415
  await driver.navigateTo(listingUrl);
10817
11416
  const extractor = new FacebookAdExtractor(driver);
10818
11417
  const result = await extractor.extractPageIntel(listingUrl, maxAds);
@@ -10896,18 +11495,15 @@ var init_facebook_ad_routes = __esm({
10896
11495
  const driver = new BrowserDriver();
10897
11496
  let searchRefunded = false;
10898
11497
  try {
10899
- await driver.launch(kernelLaunchOpts());
11498
+ await driver.launch(await kernelLaunchOptsResidential());
10900
11499
  const page = driver.getPage();
10901
- await driver.navigateTo(searchUrl);
10902
- try {
10903
- await page.waitForFunction(
10904
- () => {
10905
- const bt = document.body ? document.body.innerText ?? "" : "";
10906
- return bt.includes("Library ID") || bt.includes("No results");
10907
- },
10908
- { timeout: 2e4, polling: 500 }
10909
- );
10910
- } catch {
11500
+ const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
11501
+ const gqlAdvertisers = advertisersFromResults(collated, maxResults);
11502
+ if (gqlAdvertisers.length > 0) {
11503
+ const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
11504
+ const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
11505
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
11506
+ return c.json(searchResult2);
10911
11507
  }
10912
11508
  await page.waitForTimeout(1500);
10913
11509
  for (let scroll = 0; scroll < 3; scroll++) {
@@ -10953,7 +11549,7 @@ var init_facebook_ad_routes = __esm({
10953
11549
  advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
10954
11550
  }
10955
11551
  }
10956
- const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
11552
+ const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
10957
11553
  const searchResult = { query: body.query.trim(), searchUrl, results };
10958
11554
  await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
10959
11555
  return c.json(searchResult);
@@ -11937,143 +12533,34 @@ async function capturePageSnapshots(targets, options = {}) {
11937
12533
  attemptNumber: index + 1,
11938
12534
  outcome: artifact.status === "captured" ? "page_captured" : "page_failed",
11939
12535
  startedAt: artifact.diagnostics.requestedAt,
11940
- completedAt: artifact.diagnostics.completedAt,
11941
- durationMs: artifact.diagnostics.durationMs,
11942
- ...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
11943
- }));
11944
- const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
11945
- return {
11946
- pageSnapshotArtifacts,
11947
- attempts,
11948
- diagnostics: {
11949
- requestedCount: targets.length,
11950
- capturedCount,
11951
- failedCount: targets.length - capturedCount,
11952
- maxConcurrency,
11953
- timeoutMs
11954
- }
11955
- };
11956
- }
11957
- var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
11958
- var init_page_snapshot_extractor = __esm({
11959
- "src/serp-intelligence/page-snapshot-extractor.ts"() {
11960
- "use strict";
11961
- import_node_crypto2 = require("crypto");
11962
- import_p_limit3 = __toESM(require("p-limit"), 1);
11963
- init_kpo_extractor();
11964
- init_url_utils();
11965
- DEFAULT_TIMEOUT_MS = 15e3;
11966
- DEFAULT_MAX_CONCURRENCY = 2;
11967
- DEFAULT_MAX_CONTENT_CHARS = 25e4;
11968
- }
11969
- });
11970
-
11971
- // src/locations.ts
11972
- var LOCATIONS;
11973
- var init_locations = __esm({
11974
- "src/locations.ts"() {
11975
- "use strict";
11976
- LOCATIONS = {
11977
- "austin": "Austin,Texas,United States",
11978
- "new york": "New York,New York,United States",
11979
- "new york city": "New York,New York,United States",
11980
- "nyc": "New York,New York,United States",
11981
- "los angeles": "Los Angeles,California,United States",
11982
- "la": "Los Angeles,California,United States",
11983
- "chicago": "Chicago,Illinois,United States",
11984
- "houston": "Houston,Texas,United States",
11985
- "phoenix": "Phoenix,Arizona,United States",
11986
- "philadelphia": "Philadelphia,Pennsylvania,United States",
11987
- "philly": "Philadelphia,Pennsylvania,United States",
11988
- "san antonio": "San Antonio,Texas,United States",
11989
- "dallas": "Dallas,Texas,United States",
11990
- "miami": "Miami,Florida,United States",
11991
- "seattle": "Seattle,Washington,United States",
11992
- "denver": "Denver,Colorado,United States",
11993
- "loveland": "Loveland,Colorado,United States",
11994
- "loveland co": "Loveland,Colorado,United States",
11995
- "fort collins": "Fort Collins,Colorado,United States",
11996
- "boulder": "Boulder,Colorado,United States",
11997
- "colorado springs": "Colorado Springs,Colorado,United States",
11998
- "boston": "Boston,Massachusetts,United States",
11999
- "atlanta": "Atlanta,Georgia,United States",
12000
- "san francisco": "San Francisco,California,United States",
12001
- "sf": "San Francisco,California,United States",
12002
- "portland": "Portland,Oregon,United States",
12003
- "las vegas": "Las Vegas,Nevada,United States",
12004
- "minneapolis": "Minneapolis,Minnesota,United States",
12005
- "detroit": "Detroit,Michigan,United States",
12006
- "nashville": "Nashville,Tennessee,United States",
12007
- "charlotte": "Charlotte,North Carolina,United States",
12008
- "orlando": "Orlando,Florida,United States",
12009
- "san diego": "San Diego,California,United States",
12010
- "baltimore": "Baltimore,Maryland,United States",
12011
- "sacramento": "Sacramento,California,United States",
12012
- "columbus": "Columbus,Ohio,United States",
12013
- "indianapolis": "Indianapolis,Indiana,United States",
12014
- "san jose": "San Jose,California,United States",
12015
- "fort worth": "Fort Worth,Texas,United States",
12016
- "jacksonville": "Jacksonville,Florida,United States",
12017
- "memphis": "Memphis,Tennessee,United States",
12018
- "louisville": "Louisville,Kentucky,United States",
12019
- "raleigh": "Raleigh,North Carolina,United States",
12020
- "richmond": "Richmond,Virginia,United States",
12021
- "salt lake city": "Salt Lake City,Utah,United States",
12022
- "toronto": "Toronto,Ontario,Canada",
12023
- "vancouver": "Vancouver,British Columbia,Canada",
12024
- "montreal": "Montreal,Quebec,Canada",
12025
- "calgary": "Calgary,Alberta,Canada",
12026
- "ottawa": "Ottawa,Ontario,Canada",
12027
- "london": "London,England,United Kingdom",
12028
- "manchester": "Manchester,England,United Kingdom",
12029
- "birmingham": "Birmingham,England,United Kingdom",
12030
- "edinburgh": "Edinburgh,Scotland,United Kingdom",
12031
- "glasgow": "Glasgow,Scotland,United Kingdom",
12032
- "leeds": "Leeds,England,United Kingdom",
12033
- "sydney": "Sydney,New South Wales,Australia",
12034
- "melbourne": "Melbourne,Victoria,Australia",
12035
- "brisbane": "Brisbane,Queensland,Australia",
12036
- "perth": "Perth,Western Australia,Australia",
12037
- "adelaide": "Adelaide,South Australia,Australia",
12038
- "dublin": "Dublin,Leinster,Ireland"
12039
- };
12040
- }
12041
- });
12042
-
12043
- // src/uule.ts
12044
- function encodeVarint(value) {
12045
- const bytes = [];
12046
- let remaining = value;
12047
- do {
12048
- let byte = remaining & 127;
12049
- remaining >>>= 7;
12050
- if (remaining > 0) byte |= 128;
12051
- bytes.push(byte);
12052
- } while (remaining > 0);
12053
- return bytes;
12054
- }
12055
- function encodeUule(name) {
12056
- const locationBytes = Buffer.from(name, "utf8");
12057
- const payload = Buffer.concat([
12058
- Buffer.from([8, 2, 16, 32, 34]),
12059
- Buffer.from(encodeVarint(locationBytes.length)),
12060
- locationBytes
12061
- ]);
12062
- return `w+${payload.toString("base64")}`;
12063
- }
12064
- function normalizeLocation(input) {
12065
- const raw = input.toLowerCase().trim();
12066
- if (LOCATIONS[raw]) return LOCATIONS[raw];
12067
- const beforeComma = raw.split(",")[0].trim();
12068
- if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
12069
- const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
12070
- if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
12071
- return input;
12536
+ completedAt: artifact.diagnostics.completedAt,
12537
+ durationMs: artifact.diagnostics.durationMs,
12538
+ ...artifact.error ? { problemCode: artifact.error.code, message: artifact.error.message } : {}
12539
+ }));
12540
+ const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
12541
+ return {
12542
+ pageSnapshotArtifacts,
12543
+ attempts,
12544
+ diagnostics: {
12545
+ requestedCount: targets.length,
12546
+ capturedCount,
12547
+ failedCount: targets.length - capturedCount,
12548
+ maxConcurrency,
12549
+ timeoutMs
12550
+ }
12551
+ };
12072
12552
  }
12073
- var init_uule = __esm({
12074
- "src/uule.ts"() {
12553
+ var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
12554
+ var init_page_snapshot_extractor = __esm({
12555
+ "src/serp-intelligence/page-snapshot-extractor.ts"() {
12075
12556
  "use strict";
12076
- init_locations();
12557
+ import_node_crypto2 = require("crypto");
12558
+ import_p_limit3 = __toESM(require("p-limit"), 1);
12559
+ init_kpo_extractor();
12560
+ init_url_utils();
12561
+ DEFAULT_TIMEOUT_MS = 15e3;
12562
+ DEFAULT_MAX_CONCURRENCY = 2;
12563
+ DEFAULT_MAX_CONTENT_CHARS = 25e4;
12077
12564
  }
12078
12565
  });
12079
12566
 
@@ -13354,435 +13841,69 @@ var init_OutputSerializer = __esm({
13354
13841
  citation_text: c.text,
13355
13842
  citation_href: c.href
13356
13843
  }));
13357
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13358
- const filename = `${slug}-ai-overview-${Date.now()}.csv`;
13359
- const fullPath = import_node_path5.default.join(outputDir, filename);
13360
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13361
- return fullPath;
13362
- }
13363
- async writeAIModeCSV(citations, text, seed, outputDir) {
13364
- await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13365
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13366
- const rows = citations.map((c, i) => ({
13367
- seed_query: seed,
13368
- response_text: i === 0 ? text ?? "" : "",
13369
- citation_text: c.text,
13370
- citation_href: c.href
13371
- }));
13372
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13373
- const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13374
- const fullPath = import_node_path5.default.join(outputDir, filename);
13375
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13376
- return fullPath;
13377
- }
13378
- async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13379
- await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13380
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13381
- const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13382
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13383
- const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13384
- const fullPath = import_node_path5.default.join(outputDir, filename);
13385
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13386
- return fullPath;
13387
- }
13388
- };
13389
- }
13390
- });
13391
-
13392
- // src/output/ProgressReporter.ts
13393
- var ProgressReporter;
13394
- var init_ProgressReporter = __esm({
13395
- "src/output/ProgressReporter.ts"() {
13396
- "use strict";
13397
- ProgressReporter = class {
13398
- onQuestion(node) {
13399
- process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13400
- }
13401
- onDepth(depth) {
13402
- process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13403
- }
13404
- onVideos(videos) {
13405
- for (const v of videos) {
13406
- process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13407
- }
13408
- }
13409
- onForums(forums) {
13410
- for (const f of forums) {
13411
- process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13412
- }
13413
- }
13414
- onComplete(stats) {
13415
- process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13416
- }
13417
- onError(err) {
13418
- process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13419
- }
13420
- };
13421
- }
13422
- });
13423
-
13424
- // src/kernel-proxy-resolver.ts
13425
- function proxyIdSuffix2(proxyId) {
13426
- return proxyId ? proxyId.slice(-6) : null;
13427
- }
13428
- function resolution(source, proxyMode, proxyId, target, error) {
13429
- return {
13430
- kernelProxyId: proxyId,
13431
- resolution: {
13432
- source,
13433
- proxyMode,
13434
- proxyIdPresent: Boolean(proxyId),
13435
- proxyIdSuffix: proxyIdSuffix2(proxyId),
13436
- target,
13437
- error
13438
- }
13439
- };
13440
- }
13441
- function normalizeStateName(value) {
13442
- return value.trim().toLowerCase().replace(/\s+/g, " ");
13443
- }
13444
- function normalizeCountryName(value) {
13445
- return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
13446
- }
13447
- function isUnitedStates(country) {
13448
- if (!country) return true;
13449
- const normalized = normalizeCountryName(country);
13450
- return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
13451
- }
13452
- function stateCodeFor(region) {
13453
- const trimmed = region.trim();
13454
- if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
13455
- return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
13456
- }
13457
- function kernelCityIdentifierCandidates(city) {
13458
- const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
13459
- const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
13460
- const underscored = words.join("_");
13461
- const compact = words.join("");
13462
- return Array.from(new Set([underscored, compact].filter(Boolean)));
13463
- }
13464
- function proxyName(country, state, city) {
13465
- return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
13466
- }
13467
- function zipProxyName(zip) {
13468
- return `mcp-serp-residential-us-zip-${zip}`;
13469
- }
13470
- function parseKernelLocationProxyTarget(location, gl) {
13471
- if (!location || gl.toLowerCase() !== "us") return null;
13472
- const canonicalLocation = normalizeLocation(location);
13473
- let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
13474
- if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
13475
- parts = parts.slice(0, -1);
13476
- }
13477
- if (parts.length === 1) {
13478
- const stateOnly = stateCodeFor(parts[0]);
13479
- if (!stateOnly) return null;
13480
- return {
13481
- canonicalLocation,
13482
- level: "state",
13483
- country: "US",
13484
- state: stateOnly,
13485
- city: "",
13486
- cityCandidates: [],
13487
- proxyName: proxyName("US", stateOnly),
13488
- config: {
13489
- country: "US",
13490
- state: stateOnly
13491
- }
13492
- };
13493
- }
13494
- const [city = "", region = ""] = parts;
13495
- if (!city || !region) return null;
13496
- const state = stateCodeFor(region);
13497
- if (!state) return null;
13498
- const cityCandidates = kernelCityIdentifierCandidates(city);
13499
- const primaryCity = cityCandidates[0];
13500
- if (!primaryCity) return null;
13501
- return {
13502
- canonicalLocation,
13503
- level: "city",
13504
- country: "US",
13505
- state,
13506
- city: primaryCity,
13507
- cityCandidates,
13508
- proxyName: proxyName("US", state, primaryCity),
13509
- config: {
13510
- country: "US",
13511
- state,
13512
- city: primaryCity
13513
- }
13514
- };
13515
- }
13516
- function cityZipKey(target) {
13517
- return `${target.city}|${target.state}`;
13518
- }
13519
- function knownZipFor(target, explicitZip) {
13520
- if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
13521
- return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
13522
- }
13523
- function zipTarget(target, zip) {
13524
- return {
13525
- ...target,
13526
- level: "zip",
13527
- zip,
13528
- proxyName: zipProxyName(zip),
13529
- config: {
13530
- country: target.country,
13531
- state: target.state,
13532
- zip
13533
- }
13534
- };
13535
- }
13536
- function configMatches(config, target, city) {
13537
- if (target.level === "zip") {
13538
- return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
13539
- }
13540
- return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
13541
- }
13542
- function findExistingTargetProxy(proxies, target) {
13543
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
13544
- }
13545
- function findExistingProxy(proxies, target) {
13546
- for (const city of target.cityCandidates) {
13547
- const name = proxyName(target.country, target.state, city);
13548
- const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
13549
- if (found) return found;
13550
- }
13551
- return null;
13552
- }
13553
- function stateTarget(target) {
13554
- return {
13555
- ...target,
13556
- level: "state",
13557
- proxyName: proxyName(target.country, target.state),
13558
- config: {
13559
- country: target.country,
13560
- state: target.state
13561
- }
13562
- };
13563
- }
13564
- function findExistingStateProxy(proxies, target) {
13565
- const name = proxyName(target.country, target.state);
13566
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
13567
- }
13568
- function escalatedTargetLevel(target, attemptIndex) {
13569
- return stateTarget(target);
13570
- }
13571
- function errorText2(err) {
13572
- return err instanceof Error ? err.message : String(err);
13573
- }
13574
- async function resolveKernelProxyId(options) {
13575
- if (options.proxyMode === "none") {
13576
- return resolution("disabled", options.proxyMode, void 0, null, null);
13577
- }
13578
- if (options.proxyMode === "configured") {
13579
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
13580
- }
13581
- const target = parseKernelLocationProxyTarget(options.location, options.gl);
13582
- if (!target || !options.kernelApiKey) {
13583
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
13844
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13845
+ const filename = `${slug}-ai-overview-${Date.now()}.csv`;
13846
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13847
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13848
+ return fullPath;
13849
+ }
13850
+ async writeAIModeCSV(citations, text, seed, outputDir) {
13851
+ await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13852
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13853
+ const rows = citations.map((c, i) => ({
13854
+ seed_query: seed,
13855
+ response_text: i === 0 ? text ?? "" : "",
13856
+ citation_text: c.text,
13857
+ citation_href: c.href
13858
+ }));
13859
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13860
+ const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13861
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13862
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13863
+ return fullPath;
13864
+ }
13865
+ async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13866
+ await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13867
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13868
+ const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13869
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13870
+ const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13871
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13872
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13873
+ return fullPath;
13874
+ }
13875
+ };
13584
13876
  }
13585
- const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
13586
- try {
13587
- const attemptIndex = options.attemptIndex ?? 0;
13588
- if (attemptIndex >= 1) {
13589
- const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
13590
- const createErrors2 = [];
13591
- try {
13592
- const created = await kernel.proxies.create({
13593
- type: "residential",
13594
- name: escalatedTarget.proxyName,
13595
- config: escalatedTarget.config
13596
- });
13597
- if (created.id) {
13598
- return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
13599
- }
13600
- createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
13601
- } catch (err) {
13602
- createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
13877
+ });
13878
+
13879
+ // src/output/ProgressReporter.ts
13880
+ var ProgressReporter;
13881
+ var init_ProgressReporter = __esm({
13882
+ "src/output/ProgressReporter.ts"() {
13883
+ "use strict";
13884
+ ProgressReporter = class {
13885
+ onQuestion(node) {
13886
+ process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13603
13887
  }
13604
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
13605
- }
13606
- const proxies = await kernel.proxies.list();
13607
- const zip = knownZipFor(target, options.proxyZip);
13608
- const createErrors = [];
13609
- if (zip) {
13610
- const targetZip = zipTarget(target, zip);
13611
- const existingZip = findExistingTargetProxy(proxies, targetZip);
13612
- if (existingZip?.id) {
13613
- return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
13888
+ onDepth(depth) {
13889
+ process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13614
13890
  }
13615
- try {
13616
- const created = await kernel.proxies.create({
13617
- type: "residential",
13618
- name: targetZip.proxyName,
13619
- config: {
13620
- country: targetZip.country,
13621
- zip
13622
- }
13623
- });
13624
- if (created.id) {
13625
- return resolution("location_created", options.proxyMode, created.id, targetZip, null);
13891
+ onVideos(videos) {
13892
+ for (const v of videos) {
13893
+ process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13626
13894
  }
13627
- createErrors.push(`${zip}: Kernel did not return a proxy id`);
13628
- } catch (err) {
13629
- createErrors.push(`${zip}: ${errorText2(err)}`);
13630
13895
  }
13631
- }
13632
- const existing = findExistingProxy(proxies, target);
13633
- if (existing?.id) {
13634
- return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
13635
- }
13636
- for (const city of target.cityCandidates) {
13637
- try {
13638
- const created = await kernel.proxies.create({
13639
- type: "residential",
13640
- name: proxyName(target.country, target.state, city),
13641
- config: {
13642
- country: target.country,
13643
- state: target.state,
13644
- city
13645
- }
13646
- });
13647
- if (created.id) {
13648
- return resolution("location_created", options.proxyMode, created.id, {
13649
- ...target,
13650
- level: "city",
13651
- city,
13652
- proxyName: proxyName(target.country, target.state, city),
13653
- config: {
13654
- country: target.country,
13655
- state: target.state,
13656
- city
13657
- }
13658
- }, null);
13896
+ onForums(forums) {
13897
+ for (const f of forums) {
13898
+ process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13659
13899
  }
13660
- createErrors.push(`${city}: Kernel did not return a proxy id`);
13661
- } catch (err) {
13662
- createErrors.push(`${city}: ${errorText2(err)}`);
13663
13900
  }
13664
- }
13665
- const fallbackTarget = stateTarget(target);
13666
- const existingState = findExistingStateProxy(proxies, fallbackTarget);
13667
- if (existingState?.id) {
13668
- return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
13669
- }
13670
- try {
13671
- const created = await kernel.proxies.create({
13672
- type: "residential",
13673
- name: fallbackTarget.proxyName,
13674
- config: fallbackTarget.config
13675
- });
13676
- if (created.id) {
13677
- return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
13901
+ onComplete(stats) {
13902
+ process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13903
+ }
13904
+ onError(err) {
13905
+ process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13678
13906
  }
13679
- createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
13680
- } catch (err) {
13681
- createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
13682
- }
13683
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
13684
- } catch (err) {
13685
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
13686
- }
13687
- }
13688
- var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
13689
- var init_kernel_proxy_resolver = __esm({
13690
- "src/kernel-proxy-resolver.ts"() {
13691
- "use strict";
13692
- import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
13693
- init_uule();
13694
- US_STATE_CODES = {
13695
- alabama: "AL",
13696
- alaska: "AK",
13697
- arizona: "AZ",
13698
- arkansas: "AR",
13699
- california: "CA",
13700
- colorado: "CO",
13701
- connecticut: "CT",
13702
- delaware: "DE",
13703
- florida: "FL",
13704
- georgia: "GA",
13705
- hawaii: "HI",
13706
- idaho: "ID",
13707
- illinois: "IL",
13708
- indiana: "IN",
13709
- iowa: "IA",
13710
- kansas: "KS",
13711
- kentucky: "KY",
13712
- louisiana: "LA",
13713
- maine: "ME",
13714
- maryland: "MD",
13715
- massachusetts: "MA",
13716
- michigan: "MI",
13717
- minnesota: "MN",
13718
- mississippi: "MS",
13719
- missouri: "MO",
13720
- montana: "MT",
13721
- nebraska: "NE",
13722
- nevada: "NV",
13723
- "new hampshire": "NH",
13724
- "new jersey": "NJ",
13725
- "new mexico": "NM",
13726
- "new york": "NY",
13727
- "north carolina": "NC",
13728
- "north dakota": "ND",
13729
- ohio: "OH",
13730
- oklahoma: "OK",
13731
- oregon: "OR",
13732
- pennsylvania: "PA",
13733
- "rhode island": "RI",
13734
- "south carolina": "SC",
13735
- "south dakota": "SD",
13736
- tennessee: "TN",
13737
- texas: "TX",
13738
- utah: "UT",
13739
- vermont: "VT",
13740
- virginia: "VA",
13741
- washington: "WA",
13742
- "west virginia": "WV",
13743
- wisconsin: "WI",
13744
- wyoming: "WY"
13745
- };
13746
- US_CITY_CENTER_ZIPS = {
13747
- "atlanta|GA": "30303",
13748
- "austin|TX": "78701",
13749
- "baltimore|MD": "21201",
13750
- "boston|MA": "02108",
13751
- "boulder|CO": "80302",
13752
- "charlotte|NC": "28202",
13753
- "chicago|IL": "60601",
13754
- "colorado_springs|CO": "80903",
13755
- "columbus|OH": "43215",
13756
- "dallas|TX": "75201",
13757
- "denver|CO": "80202",
13758
- "detroit|MI": "48226",
13759
- "fort_collins|CO": "80524",
13760
- "fort_worth|TX": "76102",
13761
- "houston|TX": "77002",
13762
- "indianapolis|IN": "46204",
13763
- "jacksonville|FL": "32202",
13764
- "las_vegas|NV": "89101",
13765
- "los_angeles|CA": "90012",
13766
- "louisville|KY": "40202",
13767
- "loveland|CO": "80537",
13768
- "memphis|TN": "38103",
13769
- "miami|FL": "33131",
13770
- "minneapolis|MN": "55401",
13771
- "nashville|TN": "37203",
13772
- "new_york|NY": "10001",
13773
- "orlando|FL": "32801",
13774
- "philadelphia|PA": "19103",
13775
- "phoenix|AZ": "85004",
13776
- "portland|OR": "97205",
13777
- "raleigh|NC": "27601",
13778
- "richmond|VA": "23219",
13779
- "sacramento|CA": "95814",
13780
- "salt_lake_city|UT": "84101",
13781
- "san_antonio|TX": "78205",
13782
- "san_diego|CA": "92101",
13783
- "san_francisco|CA": "94103",
13784
- "san_jose|CA": "95113",
13785
- "seattle|WA": "98101"
13786
13907
  };
13787
13908
  }
13788
13909
  });
@@ -15377,7 +15498,7 @@ function formatFacebookAdSearch(raw, input) {
15377
15498
  const d = parsed.data;
15378
15499
  const advertisers = d.results ?? d.advertisers ?? [];
15379
15500
  const rows = advertisers.map(
15380
- (a, i) => `| ${i + 1} | ${cell(a.name)} | ${a.adCount ?? "\u2014"} | \`${a.libraryId ?? "\u2014"}\` |`
15501
+ (a, i) => `| ${i + 1} | ${cell(a.pageName ?? a.name)} | ${a.adCount ?? "\u2014"} | \`${a.sampleLibraryId ?? a.libraryId ?? "\u2014"}\` |`
15381
15502
  ).join("\n");
15382
15503
  const full = [
15383
15504
  `# Facebook Ad Library Search: "${input.query}"`,
@@ -16479,19 +16600,15 @@ var init_server = __esm({
16479
16600
  const normalizedEmail = email?.trim().toLowerCase();
16480
16601
  if (!normalizedEmail || !password) return c.json({ error: "Email and password required" }, 400);
16481
16602
  if (password.length < 8) return c.json({ error: "Password must be at least 8 characters" }, 400);
16482
- const limited = await enforceRateLimit(c, "auth_register", rateLimitKey(c), 5, 60 * 60);
16483
- if (limited) return limited;
16484
16603
  try {
16485
16604
  const existing = await getUserByEmail(normalizedEmail);
16486
16605
  if (existing) return c.json({ error: "Email already registered" }, 409);
16487
- let stripeCustomerId;
16606
+ let stripeCustomerId = null;
16488
16607
  try {
16489
16608
  stripeCustomerId = await createSignupStripeCustomer(normalizedEmail);
16490
- } catch {
16491
- return c.json({ error: "Stripe customer setup failed" }, 503);
16492
- }
16493
- if (!stripeCustomerId && (process.env.NODE_ENV === "production" || process.env.VERCEL === "1")) {
16494
- return c.json({ error: "Stripe customer setup failed" }, 503);
16609
+ } catch (err) {
16610
+ console.warn("[auth/register] Stripe customer creation failed; continuing without it (created lazily at checkout):", err instanceof Error ? err.message : String(err));
16611
+ stripeCustomerId = null;
16495
16612
  }
16496
16613
  const user = await createUser(normalizedEmail, void 0, password, stripeCustomerId ?? void 0);
16497
16614
  if (stripeCustomerId) {
@@ -16552,14 +16669,18 @@ var init_server = __esm({
16552
16669
  if (process.env.RESEND_API_KEY) {
16553
16670
  try {
16554
16671
  const resend = new import_resend.Resend(process.env.RESEND_API_KEY);
16555
- await resend.emails.send({
16672
+ const sent = await resend.emails.send({
16556
16673
  from: "MCP Scraper <noreply@updates.mcpscraper.dev>",
16557
16674
  to: normalizedEmail,
16558
16675
  subject: "Reset your MCP Scraper password",
16559
16676
  html: `<p>Hi,</p><p>Click the link below to reset your password. This link expires in 1 hour.</p><p><a href="${resetUrl}">${resetUrl}</a></p><p>If you didn't request this, you can ignore this email.</p>`
16560
16677
  });
16561
- } catch {
16678
+ if (sent.error) console.error("[auth/forgot-password] Resend rejected the email:", JSON.stringify(sent.error));
16679
+ } catch (err) {
16680
+ console.error("[auth/forgot-password] Resend send threw:", err instanceof Error ? err.message : String(err));
16562
16681
  }
16682
+ } else {
16683
+ console.warn("[auth/forgot-password] RESEND_API_KEY not set \u2014 no reset email sent for", normalizedEmail);
16563
16684
  }
16564
16685
  return c.json({ ok: true });
16565
16686
  });