mcp-scraper 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10700,6 +10700,587 @@ var init_FacebookAdExtractor = __esm({
10700
10700
  }
10701
10701
  });
10702
10702
 
10703
+ // src/extractor/FacebookAdGraphql.ts
10704
+ function parseFbGraphqlJson(text) {
10705
+ const out = [];
10706
+ const body = text.replace(/^for\s*\(;;\);/, "").trim();
10707
+ try {
10708
+ out.push(JSON.parse(body));
10709
+ return out;
10710
+ } catch {
10711
+ for (const line of body.split("\n")) {
10712
+ const trimmed = line.trim();
10713
+ if (!trimmed) continue;
10714
+ try {
10715
+ out.push(JSON.parse(trimmed));
10716
+ } catch {
10717
+ continue;
10718
+ }
10719
+ }
10720
+ return out;
10721
+ }
10722
+ }
10723
+ function extractCollatedResults(payload) {
10724
+ const root = payload;
10725
+ const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
10726
+ const results = [];
10727
+ for (const edge of edges) {
10728
+ const node = edge?.node;
10729
+ for (const raw of node?.collated_results ?? []) {
10730
+ const r = raw;
10731
+ const id = r.ad_archive_id;
10732
+ if (id === void 0 || id === null) continue;
10733
+ const snapshot = r.snapshot ?? null;
10734
+ results.push({
10735
+ ad_archive_id: String(id),
10736
+ page_id: r.page_id != null ? String(r.page_id) : "",
10737
+ page_name: r.page_name ?? snapshot?.page_name ?? "",
10738
+ is_active: Boolean(r.is_active),
10739
+ collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
10740
+ snapshot
10741
+ });
10742
+ }
10743
+ }
10744
+ return results;
10745
+ }
10746
+ async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
10747
+ const captureMs = opts.captureMs ?? 3e4;
10748
+ const collected = [];
10749
+ const seen = /* @__PURE__ */ new Set();
10750
+ const handler = (resp) => {
10751
+ if (!resp.url().includes("/api/graphql")) return;
10752
+ const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
10753
+ if (friendlyName !== AD_LIBRARY_QUERY) return;
10754
+ void resp.text().then((text) => {
10755
+ for (const payload of parseFbGraphqlJson(text)) {
10756
+ for (const result of extractCollatedResults(payload)) {
10757
+ if (seen.has(result.ad_archive_id)) continue;
10758
+ seen.add(result.ad_archive_id);
10759
+ collected.push(result);
10760
+ }
10761
+ }
10762
+ }).catch(() => void 0);
10763
+ };
10764
+ page.on("response", handler);
10765
+ try {
10766
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
10767
+ const deadline = Date.now() + captureMs;
10768
+ let lastCount = -1;
10769
+ let stableRounds = 0;
10770
+ while (Date.now() < deadline && collected.length < maxResults) {
10771
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
10772
+ await page.waitForTimeout(2e3);
10773
+ if (collected.length === lastCount) {
10774
+ stableRounds++;
10775
+ if (stableRounds >= 2 && collected.length > 0) break;
10776
+ } else {
10777
+ stableRounds = 0;
10778
+ }
10779
+ lastCount = collected.length;
10780
+ }
10781
+ } finally {
10782
+ page.off("response", handler);
10783
+ }
10784
+ return collected.slice(0, maxResults);
10785
+ }
10786
+ function advertisersFromResults(results, maxResults) {
10787
+ const byPage = /* @__PURE__ */ new Map();
10788
+ for (const r of results) {
10789
+ if (!r.page_id || !r.page_name) continue;
10790
+ const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
10791
+ const existing = byPage.get(r.page_id);
10792
+ if (existing) {
10793
+ existing.resultCount++;
10794
+ existing.maxCollation = Math.max(existing.maxCollation, collation);
10795
+ } else {
10796
+ byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
10797
+ }
10798
+ }
10799
+ return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
10800
+ }
10801
+ var AD_LIBRARY_QUERY;
10802
+ var init_FacebookAdGraphql = __esm({
10803
+ "src/extractor/FacebookAdGraphql.ts"() {
10804
+ "use strict";
10805
+ AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
10806
+ }
10807
+ });
10808
+
10809
+ // src/locations.ts
10810
+ var LOCATIONS;
10811
+ var init_locations = __esm({
10812
+ "src/locations.ts"() {
10813
+ "use strict";
10814
+ LOCATIONS = {
10815
+ "austin": "Austin,Texas,United States",
10816
+ "new york": "New York,New York,United States",
10817
+ "new york city": "New York,New York,United States",
10818
+ "nyc": "New York,New York,United States",
10819
+ "los angeles": "Los Angeles,California,United States",
10820
+ "la": "Los Angeles,California,United States",
10821
+ "chicago": "Chicago,Illinois,United States",
10822
+ "houston": "Houston,Texas,United States",
10823
+ "phoenix": "Phoenix,Arizona,United States",
10824
+ "philadelphia": "Philadelphia,Pennsylvania,United States",
10825
+ "philly": "Philadelphia,Pennsylvania,United States",
10826
+ "san antonio": "San Antonio,Texas,United States",
10827
+ "dallas": "Dallas,Texas,United States",
10828
+ "miami": "Miami,Florida,United States",
10829
+ "seattle": "Seattle,Washington,United States",
10830
+ "denver": "Denver,Colorado,United States",
10831
+ "loveland": "Loveland,Colorado,United States",
10832
+ "loveland co": "Loveland,Colorado,United States",
10833
+ "fort collins": "Fort Collins,Colorado,United States",
10834
+ "boulder": "Boulder,Colorado,United States",
10835
+ "colorado springs": "Colorado Springs,Colorado,United States",
10836
+ "boston": "Boston,Massachusetts,United States",
10837
+ "atlanta": "Atlanta,Georgia,United States",
10838
+ "san francisco": "San Francisco,California,United States",
10839
+ "sf": "San Francisco,California,United States",
10840
+ "portland": "Portland,Oregon,United States",
10841
+ "las vegas": "Las Vegas,Nevada,United States",
10842
+ "minneapolis": "Minneapolis,Minnesota,United States",
10843
+ "detroit": "Detroit,Michigan,United States",
10844
+ "nashville": "Nashville,Tennessee,United States",
10845
+ "charlotte": "Charlotte,North Carolina,United States",
10846
+ "orlando": "Orlando,Florida,United States",
10847
+ "san diego": "San Diego,California,United States",
10848
+ "baltimore": "Baltimore,Maryland,United States",
10849
+ "sacramento": "Sacramento,California,United States",
10850
+ "columbus": "Columbus,Ohio,United States",
10851
+ "indianapolis": "Indianapolis,Indiana,United States",
10852
+ "san jose": "San Jose,California,United States",
10853
+ "fort worth": "Fort Worth,Texas,United States",
10854
+ "jacksonville": "Jacksonville,Florida,United States",
10855
+ "memphis": "Memphis,Tennessee,United States",
10856
+ "louisville": "Louisville,Kentucky,United States",
10857
+ "raleigh": "Raleigh,North Carolina,United States",
10858
+ "richmond": "Richmond,Virginia,United States",
10859
+ "salt lake city": "Salt Lake City,Utah,United States",
10860
+ "toronto": "Toronto,Ontario,Canada",
10861
+ "vancouver": "Vancouver,British Columbia,Canada",
10862
+ "montreal": "Montreal,Quebec,Canada",
10863
+ "calgary": "Calgary,Alberta,Canada",
10864
+ "ottawa": "Ottawa,Ontario,Canada",
10865
+ "london": "London,England,United Kingdom",
10866
+ "manchester": "Manchester,England,United Kingdom",
10867
+ "birmingham": "Birmingham,England,United Kingdom",
10868
+ "edinburgh": "Edinburgh,Scotland,United Kingdom",
10869
+ "glasgow": "Glasgow,Scotland,United Kingdom",
10870
+ "leeds": "Leeds,England,United Kingdom",
10871
+ "sydney": "Sydney,New South Wales,Australia",
10872
+ "melbourne": "Melbourne,Victoria,Australia",
10873
+ "brisbane": "Brisbane,Queensland,Australia",
10874
+ "perth": "Perth,Western Australia,Australia",
10875
+ "adelaide": "Adelaide,South Australia,Australia",
10876
+ "dublin": "Dublin,Leinster,Ireland"
10877
+ };
10878
+ }
10879
+ });
10880
+
10881
+ // src/uule.ts
10882
+ function encodeVarint(value) {
10883
+ const bytes = [];
10884
+ let remaining = value;
10885
+ do {
10886
+ let byte = remaining & 127;
10887
+ remaining >>>= 7;
10888
+ if (remaining > 0) byte |= 128;
10889
+ bytes.push(byte);
10890
+ } while (remaining > 0);
10891
+ return bytes;
10892
+ }
10893
+ function encodeUule(name) {
10894
+ const locationBytes = Buffer.from(name, "utf8");
10895
+ const payload = Buffer.concat([
10896
+ Buffer.from([8, 2, 16, 32, 34]),
10897
+ Buffer.from(encodeVarint(locationBytes.length)),
10898
+ locationBytes
10899
+ ]);
10900
+ return `w+${payload.toString("base64")}`;
10901
+ }
10902
+ function normalizeLocation(input) {
10903
+ const raw = input.toLowerCase().trim();
10904
+ if (LOCATIONS[raw]) return LOCATIONS[raw];
10905
+ const beforeComma = raw.split(",")[0].trim();
10906
+ if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
10907
+ const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
10908
+ if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
10909
+ return input;
10910
+ }
10911
+ var init_uule = __esm({
10912
+ "src/uule.ts"() {
10913
+ "use strict";
10914
+ init_locations();
10915
+ }
10916
+ });
10917
+
10918
+ // src/kernel-proxy-resolver.ts
10919
+ function proxyIdSuffix2(proxyId) {
10920
+ return proxyId ? proxyId.slice(-6) : null;
10921
+ }
10922
+ function resolution(source, proxyMode, proxyId, target, error) {
10923
+ return {
10924
+ kernelProxyId: proxyId,
10925
+ resolution: {
10926
+ source,
10927
+ proxyMode,
10928
+ proxyIdPresent: Boolean(proxyId),
10929
+ proxyIdSuffix: proxyIdSuffix2(proxyId),
10930
+ target,
10931
+ error
10932
+ }
10933
+ };
10934
+ }
10935
+ function normalizeStateName(value) {
10936
+ return value.trim().toLowerCase().replace(/\s+/g, " ");
10937
+ }
10938
+ function normalizeCountryName(value) {
10939
+ return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
10940
+ }
10941
+ function isUnitedStates(country) {
10942
+ if (!country) return true;
10943
+ const normalized = normalizeCountryName(country);
10944
+ return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
10945
+ }
10946
+ function stateCodeFor(region) {
10947
+ const trimmed = region.trim();
10948
+ if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
10949
+ return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
10950
+ }
10951
+ function kernelCityIdentifierCandidates(city) {
10952
+ const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
10953
+ const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
10954
+ const underscored = words.join("_");
10955
+ const compact = words.join("");
10956
+ return Array.from(new Set([underscored, compact].filter(Boolean)));
10957
+ }
10958
+ function proxyName(country, state, city) {
10959
+ return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
10960
+ }
10961
+ function zipProxyName(zip) {
10962
+ return `mcp-serp-residential-us-zip-${zip}`;
10963
+ }
10964
+ function parseKernelLocationProxyTarget(location, gl) {
10965
+ if (!location || gl.toLowerCase() !== "us") return null;
10966
+ const canonicalLocation = normalizeLocation(location);
10967
+ let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
10968
+ if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
10969
+ parts = parts.slice(0, -1);
10970
+ }
10971
+ if (parts.length === 1) {
10972
+ const stateOnly = stateCodeFor(parts[0]);
10973
+ if (!stateOnly) return null;
10974
+ return {
10975
+ canonicalLocation,
10976
+ level: "state",
10977
+ country: "US",
10978
+ state: stateOnly,
10979
+ city: "",
10980
+ cityCandidates: [],
10981
+ proxyName: proxyName("US", stateOnly),
10982
+ config: {
10983
+ country: "US",
10984
+ state: stateOnly
10985
+ }
10986
+ };
10987
+ }
10988
+ const [city = "", region = ""] = parts;
10989
+ if (!city || !region) return null;
10990
+ const state = stateCodeFor(region);
10991
+ if (!state) return null;
10992
+ const cityCandidates = kernelCityIdentifierCandidates(city);
10993
+ const primaryCity = cityCandidates[0];
10994
+ if (!primaryCity) return null;
10995
+ return {
10996
+ canonicalLocation,
10997
+ level: "city",
10998
+ country: "US",
10999
+ state,
11000
+ city: primaryCity,
11001
+ cityCandidates,
11002
+ proxyName: proxyName("US", state, primaryCity),
11003
+ config: {
11004
+ country: "US",
11005
+ state,
11006
+ city: primaryCity
11007
+ }
11008
+ };
11009
+ }
11010
+ function cityZipKey(target) {
11011
+ return `${target.city}|${target.state}`;
11012
+ }
11013
+ function knownZipFor(target, explicitZip) {
11014
+ if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
11015
+ return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
11016
+ }
11017
+ function zipTarget(target, zip) {
11018
+ return {
11019
+ ...target,
11020
+ level: "zip",
11021
+ zip,
11022
+ proxyName: zipProxyName(zip),
11023
+ config: {
11024
+ country: target.country,
11025
+ state: target.state,
11026
+ zip
11027
+ }
11028
+ };
11029
+ }
11030
+ function configMatches(config, target, city) {
11031
+ if (target.level === "zip") {
11032
+ return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
11033
+ }
11034
+ return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
11035
+ }
11036
+ function findExistingTargetProxy(proxies, target) {
11037
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
11038
+ }
11039
+ function findExistingProxy(proxies, target) {
11040
+ for (const city of target.cityCandidates) {
11041
+ const name = proxyName(target.country, target.state, city);
11042
+ const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
11043
+ if (found) return found;
11044
+ }
11045
+ return null;
11046
+ }
11047
+ function stateTarget(target) {
11048
+ return {
11049
+ ...target,
11050
+ level: "state",
11051
+ proxyName: proxyName(target.country, target.state),
11052
+ config: {
11053
+ country: target.country,
11054
+ state: target.state
11055
+ }
11056
+ };
11057
+ }
11058
+ function findExistingStateProxy(proxies, target) {
11059
+ const name = proxyName(target.country, target.state);
11060
+ return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
11061
+ }
11062
+ function escalatedTargetLevel(target, attemptIndex) {
11063
+ return stateTarget(target);
11064
+ }
11065
+ function errorText2(err) {
11066
+ return err instanceof Error ? err.message : String(err);
11067
+ }
11068
+ async function resolveKernelProxyId(options) {
11069
+ if (options.proxyMode === "none") {
11070
+ return resolution("disabled", options.proxyMode, void 0, null, null);
11071
+ }
11072
+ if (options.proxyMode === "configured") {
11073
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
11074
+ }
11075
+ const target = parseKernelLocationProxyTarget(options.location, options.gl);
11076
+ if (!target || !options.kernelApiKey) {
11077
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
11078
+ }
11079
+ const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
11080
+ try {
11081
+ const attemptIndex = options.attemptIndex ?? 0;
11082
+ if (attemptIndex >= 1) {
11083
+ const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
11084
+ const createErrors2 = [];
11085
+ try {
11086
+ const created = await kernel.proxies.create({
11087
+ type: "residential",
11088
+ name: escalatedTarget.proxyName,
11089
+ config: escalatedTarget.config
11090
+ });
11091
+ if (created.id) {
11092
+ return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
11093
+ }
11094
+ createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
11095
+ } catch (err) {
11096
+ createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
11097
+ }
11098
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
11099
+ }
11100
+ const proxies = await kernel.proxies.list();
11101
+ const zip = knownZipFor(target, options.proxyZip);
11102
+ const createErrors = [];
11103
+ if (zip) {
11104
+ const targetZip = zipTarget(target, zip);
11105
+ const existingZip = findExistingTargetProxy(proxies, targetZip);
11106
+ if (existingZip?.id) {
11107
+ return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
11108
+ }
11109
+ try {
11110
+ const created = await kernel.proxies.create({
11111
+ type: "residential",
11112
+ name: targetZip.proxyName,
11113
+ config: {
11114
+ country: targetZip.country,
11115
+ zip
11116
+ }
11117
+ });
11118
+ if (created.id) {
11119
+ return resolution("location_created", options.proxyMode, created.id, targetZip, null);
11120
+ }
11121
+ createErrors.push(`${zip}: Kernel did not return a proxy id`);
11122
+ } catch (err) {
11123
+ createErrors.push(`${zip}: ${errorText2(err)}`);
11124
+ }
11125
+ }
11126
+ const existing = findExistingProxy(proxies, target);
11127
+ if (existing?.id) {
11128
+ return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
11129
+ }
11130
+ for (const city of target.cityCandidates) {
11131
+ try {
11132
+ const created = await kernel.proxies.create({
11133
+ type: "residential",
11134
+ name: proxyName(target.country, target.state, city),
11135
+ config: {
11136
+ country: target.country,
11137
+ state: target.state,
11138
+ city
11139
+ }
11140
+ });
11141
+ if (created.id) {
11142
+ return resolution("location_created", options.proxyMode, created.id, {
11143
+ ...target,
11144
+ level: "city",
11145
+ city,
11146
+ proxyName: proxyName(target.country, target.state, city),
11147
+ config: {
11148
+ country: target.country,
11149
+ state: target.state,
11150
+ city
11151
+ }
11152
+ }, null);
11153
+ }
11154
+ createErrors.push(`${city}: Kernel did not return a proxy id`);
11155
+ } catch (err) {
11156
+ createErrors.push(`${city}: ${errorText2(err)}`);
11157
+ }
11158
+ }
11159
+ const fallbackTarget = stateTarget(target);
11160
+ const existingState = findExistingStateProxy(proxies, fallbackTarget);
11161
+ if (existingState?.id) {
11162
+ return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
11163
+ }
11164
+ try {
11165
+ const created = await kernel.proxies.create({
11166
+ type: "residential",
11167
+ name: fallbackTarget.proxyName,
11168
+ config: fallbackTarget.config
11169
+ });
11170
+ if (created.id) {
11171
+ return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
11172
+ }
11173
+ createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
11174
+ } catch (err) {
11175
+ createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
11176
+ }
11177
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
11178
+ } catch (err) {
11179
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
11180
+ }
11181
+ }
11182
+ var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
11183
+ var init_kernel_proxy_resolver = __esm({
11184
+ "src/kernel-proxy-resolver.ts"() {
11185
+ "use strict";
11186
+ import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
11187
+ init_uule();
11188
+ US_STATE_CODES = {
11189
+ alabama: "AL",
11190
+ alaska: "AK",
11191
+ arizona: "AZ",
11192
+ arkansas: "AR",
11193
+ california: "CA",
11194
+ colorado: "CO",
11195
+ connecticut: "CT",
11196
+ delaware: "DE",
11197
+ florida: "FL",
11198
+ georgia: "GA",
11199
+ hawaii: "HI",
11200
+ idaho: "ID",
11201
+ illinois: "IL",
11202
+ indiana: "IN",
11203
+ iowa: "IA",
11204
+ kansas: "KS",
11205
+ kentucky: "KY",
11206
+ louisiana: "LA",
11207
+ maine: "ME",
11208
+ maryland: "MD",
11209
+ massachusetts: "MA",
11210
+ michigan: "MI",
11211
+ minnesota: "MN",
11212
+ mississippi: "MS",
11213
+ missouri: "MO",
11214
+ montana: "MT",
11215
+ nebraska: "NE",
11216
+ nevada: "NV",
11217
+ "new hampshire": "NH",
11218
+ "new jersey": "NJ",
11219
+ "new mexico": "NM",
11220
+ "new york": "NY",
11221
+ "north carolina": "NC",
11222
+ "north dakota": "ND",
11223
+ ohio: "OH",
11224
+ oklahoma: "OK",
11225
+ oregon: "OR",
11226
+ pennsylvania: "PA",
11227
+ "rhode island": "RI",
11228
+ "south carolina": "SC",
11229
+ "south dakota": "SD",
11230
+ tennessee: "TN",
11231
+ texas: "TX",
11232
+ utah: "UT",
11233
+ vermont: "VT",
11234
+ virginia: "VA",
11235
+ washington: "WA",
11236
+ "west virginia": "WV",
11237
+ wisconsin: "WI",
11238
+ wyoming: "WY"
11239
+ };
11240
+ US_CITY_CENTER_ZIPS = {
11241
+ "atlanta|GA": "30303",
11242
+ "austin|TX": "78701",
11243
+ "baltimore|MD": "21201",
11244
+ "boston|MA": "02108",
11245
+ "boulder|CO": "80302",
11246
+ "charlotte|NC": "28202",
11247
+ "chicago|IL": "60601",
11248
+ "colorado_springs|CO": "80903",
11249
+ "columbus|OH": "43215",
11250
+ "dallas|TX": "75201",
11251
+ "denver|CO": "80202",
11252
+ "detroit|MI": "48226",
11253
+ "fort_collins|CO": "80524",
11254
+ "fort_worth|TX": "76102",
11255
+ "houston|TX": "77002",
11256
+ "indianapolis|IN": "46204",
11257
+ "jacksonville|FL": "32202",
11258
+ "las_vegas|NV": "89101",
11259
+ "los_angeles|CA": "90012",
11260
+ "louisville|KY": "40202",
11261
+ "loveland|CO": "80537",
11262
+ "memphis|TN": "38103",
11263
+ "miami|FL": "33131",
11264
+ "minneapolis|MN": "55401",
11265
+ "nashville|TN": "37203",
11266
+ "new_york|NY": "10001",
11267
+ "orlando|FL": "32801",
11268
+ "philadelphia|PA": "19103",
11269
+ "phoenix|AZ": "85004",
11270
+ "portland|OR": "97205",
11271
+ "raleigh|NC": "27601",
11272
+ "richmond|VA": "23219",
11273
+ "sacramento|CA": "95814",
11274
+ "salt_lake_city|UT": "84101",
11275
+ "san_antonio|TX": "78205",
11276
+ "san_diego|CA": "92101",
11277
+ "san_francisco|CA": "94103",
11278
+ "san_jose|CA": "95113",
11279
+ "seattle|WA": "98101"
11280
+ };
11281
+ }
11282
+ });
11283
+
10703
11284
  // src/api/facebook-ad-routes.ts
10704
11285
  function invalidRequest(message) {
10705
11286
  return { error_code: "invalid_request", message };
@@ -10717,6 +11298,22 @@ function buildPageIntelUrl(body, country) {
10717
11298
  function kernelLaunchOpts() {
10718
11299
  return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
10719
11300
  }
11301
+ async function kernelLaunchOptsResidential() {
11302
+ let proxyId = process.env.KERNEL_PROXY_ID?.trim();
11303
+ try {
11304
+ const resolution2 = await resolveKernelProxyId({
11305
+ kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
11306
+ proxyMode: "location",
11307
+ configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
11308
+ location: "New York, NY",
11309
+ gl: "us"
11310
+ });
11311
+ if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
11312
+ } catch {
11313
+ proxyId = process.env.KERNEL_PROXY_ID?.trim();
11314
+ }
11315
+ return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
11316
+ }
10720
11317
  var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
10721
11318
  var init_facebook_ad_routes = __esm({
10722
11319
  "src/api/facebook-ad-routes.ts"() {
@@ -10727,6 +11324,8 @@ var init_facebook_ad_routes = __esm({
10727
11324
  init_rates();
10728
11325
  init_BrowserDriver();
10729
11326
  init_FacebookAdExtractor();
11327
+ init_FacebookAdGraphql();
11328
+ init_kernel_proxy_resolver();
10730
11329
  import_client3 = require("@fal-ai/client");
10731
11330
  init_api_auth();
10732
11331
  init_url_utils();
@@ -10812,7 +11411,7 @@ var init_facebook_ad_routes = __esm({
10812
11411
  const driver = new BrowserDriver();
10813
11412
  let refunded = false;
10814
11413
  try {
10815
- await driver.launch(kernelLaunchOpts());
11414
+ await driver.launch(await kernelLaunchOptsResidential());
10816
11415
  await driver.navigateTo(listingUrl);
10817
11416
  const extractor = new FacebookAdExtractor(driver);
10818
11417
  const result = await extractor.extractPageIntel(listingUrl, maxAds);
@@ -10896,18 +11495,15 @@ var init_facebook_ad_routes = __esm({
10896
11495
  const driver = new BrowserDriver();
10897
11496
  let searchRefunded = false;
10898
11497
  try {
10899
- await driver.launch(kernelLaunchOpts());
11498
+ await driver.launch(await kernelLaunchOptsResidential());
10900
11499
  const page = driver.getPage();
10901
- await driver.navigateTo(searchUrl);
10902
- try {
10903
- await page.waitForFunction(
10904
- () => {
10905
- const bt = document.body ? document.body.innerText ?? "" : "";
10906
- return bt.includes("Library ID") || bt.includes("No results");
10907
- },
10908
- { timeout: 2e4, polling: 500 }
10909
- );
10910
- } catch {
11500
+ const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
11501
+ const gqlAdvertisers = advertisersFromResults(collated, maxResults);
11502
+ if (gqlAdvertisers.length > 0) {
11503
+ const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
11504
+ const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
11505
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
11506
+ return c.json(searchResult2);
10911
11507
  }
10912
11508
  await page.waitForTimeout(1500);
10913
11509
  for (let scroll = 0; scroll < 3; scroll++) {
@@ -10953,7 +11549,7 @@ var init_facebook_ad_routes = __esm({
10953
11549
  advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
10954
11550
  }
10955
11551
  }
10956
- const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
11552
+ const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
10957
11553
  const searchResult = { query: body.query.trim(), searchUrl, results };
10958
11554
  await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
10959
11555
  return c.json(searchResult);
@@ -11944,136 +12540,27 @@ async function capturePageSnapshots(targets, options = {}) {
11944
12540
  const capturedCount = pageSnapshotArtifacts.filter((artifact) => artifact.status === "captured").length;
11945
12541
  return {
11946
12542
  pageSnapshotArtifacts,
11947
- attempts,
11948
- diagnostics: {
11949
- requestedCount: targets.length,
11950
- capturedCount,
11951
- failedCount: targets.length - capturedCount,
11952
- maxConcurrency,
11953
- timeoutMs
11954
- }
11955
- };
11956
- }
11957
- var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
11958
- var init_page_snapshot_extractor = __esm({
11959
- "src/serp-intelligence/page-snapshot-extractor.ts"() {
11960
- "use strict";
11961
- import_node_crypto2 = require("crypto");
11962
- import_p_limit3 = __toESM(require("p-limit"), 1);
11963
- init_kpo_extractor();
11964
- init_url_utils();
11965
- DEFAULT_TIMEOUT_MS = 15e3;
11966
- DEFAULT_MAX_CONCURRENCY = 2;
11967
- DEFAULT_MAX_CONTENT_CHARS = 25e4;
11968
- }
11969
- });
11970
-
11971
- // src/locations.ts
11972
- var LOCATIONS;
11973
- var init_locations = __esm({
11974
- "src/locations.ts"() {
11975
- "use strict";
11976
- LOCATIONS = {
11977
- "austin": "Austin,Texas,United States",
11978
- "new york": "New York,New York,United States",
11979
- "new york city": "New York,New York,United States",
11980
- "nyc": "New York,New York,United States",
11981
- "los angeles": "Los Angeles,California,United States",
11982
- "la": "Los Angeles,California,United States",
11983
- "chicago": "Chicago,Illinois,United States",
11984
- "houston": "Houston,Texas,United States",
11985
- "phoenix": "Phoenix,Arizona,United States",
11986
- "philadelphia": "Philadelphia,Pennsylvania,United States",
11987
- "philly": "Philadelphia,Pennsylvania,United States",
11988
- "san antonio": "San Antonio,Texas,United States",
11989
- "dallas": "Dallas,Texas,United States",
11990
- "miami": "Miami,Florida,United States",
11991
- "seattle": "Seattle,Washington,United States",
11992
- "denver": "Denver,Colorado,United States",
11993
- "loveland": "Loveland,Colorado,United States",
11994
- "loveland co": "Loveland,Colorado,United States",
11995
- "fort collins": "Fort Collins,Colorado,United States",
11996
- "boulder": "Boulder,Colorado,United States",
11997
- "colorado springs": "Colorado Springs,Colorado,United States",
11998
- "boston": "Boston,Massachusetts,United States",
11999
- "atlanta": "Atlanta,Georgia,United States",
12000
- "san francisco": "San Francisco,California,United States",
12001
- "sf": "San Francisco,California,United States",
12002
- "portland": "Portland,Oregon,United States",
12003
- "las vegas": "Las Vegas,Nevada,United States",
12004
- "minneapolis": "Minneapolis,Minnesota,United States",
12005
- "detroit": "Detroit,Michigan,United States",
12006
- "nashville": "Nashville,Tennessee,United States",
12007
- "charlotte": "Charlotte,North Carolina,United States",
12008
- "orlando": "Orlando,Florida,United States",
12009
- "san diego": "San Diego,California,United States",
12010
- "baltimore": "Baltimore,Maryland,United States",
12011
- "sacramento": "Sacramento,California,United States",
12012
- "columbus": "Columbus,Ohio,United States",
12013
- "indianapolis": "Indianapolis,Indiana,United States",
12014
- "san jose": "San Jose,California,United States",
12015
- "fort worth": "Fort Worth,Texas,United States",
12016
- "jacksonville": "Jacksonville,Florida,United States",
12017
- "memphis": "Memphis,Tennessee,United States",
12018
- "louisville": "Louisville,Kentucky,United States",
12019
- "raleigh": "Raleigh,North Carolina,United States",
12020
- "richmond": "Richmond,Virginia,United States",
12021
- "salt lake city": "Salt Lake City,Utah,United States",
12022
- "toronto": "Toronto,Ontario,Canada",
12023
- "vancouver": "Vancouver,British Columbia,Canada",
12024
- "montreal": "Montreal,Quebec,Canada",
12025
- "calgary": "Calgary,Alberta,Canada",
12026
- "ottawa": "Ottawa,Ontario,Canada",
12027
- "london": "London,England,United Kingdom",
12028
- "manchester": "Manchester,England,United Kingdom",
12029
- "birmingham": "Birmingham,England,United Kingdom",
12030
- "edinburgh": "Edinburgh,Scotland,United Kingdom",
12031
- "glasgow": "Glasgow,Scotland,United Kingdom",
12032
- "leeds": "Leeds,England,United Kingdom",
12033
- "sydney": "Sydney,New South Wales,Australia",
12034
- "melbourne": "Melbourne,Victoria,Australia",
12035
- "brisbane": "Brisbane,Queensland,Australia",
12036
- "perth": "Perth,Western Australia,Australia",
12037
- "adelaide": "Adelaide,South Australia,Australia",
12038
- "dublin": "Dublin,Leinster,Ireland"
12039
- };
12040
- }
12041
- });
12042
-
12043
- // src/uule.ts
12044
- function encodeVarint(value) {
12045
- const bytes = [];
12046
- let remaining = value;
12047
- do {
12048
- let byte = remaining & 127;
12049
- remaining >>>= 7;
12050
- if (remaining > 0) byte |= 128;
12051
- bytes.push(byte);
12052
- } while (remaining > 0);
12053
- return bytes;
12054
- }
12055
- function encodeUule(name) {
12056
- const locationBytes = Buffer.from(name, "utf8");
12057
- const payload = Buffer.concat([
12058
- Buffer.from([8, 2, 16, 32, 34]),
12059
- Buffer.from(encodeVarint(locationBytes.length)),
12060
- locationBytes
12061
- ]);
12062
- return `w+${payload.toString("base64")}`;
12063
- }
12064
- function normalizeLocation(input) {
12065
- const raw = input.toLowerCase().trim();
12066
- if (LOCATIONS[raw]) return LOCATIONS[raw];
12067
- const beforeComma = raw.split(",")[0].trim();
12068
- if (beforeComma !== raw && LOCATIONS[beforeComma]) return LOCATIONS[beforeComma];
12069
- const withoutState = raw.replace(/\s+[a-z]{2}$/, "").trim();
12070
- if (withoutState !== raw && LOCATIONS[withoutState]) return LOCATIONS[withoutState];
12071
- return input;
12543
+ attempts,
12544
+ diagnostics: {
12545
+ requestedCount: targets.length,
12546
+ capturedCount,
12547
+ failedCount: targets.length - capturedCount,
12548
+ maxConcurrency,
12549
+ timeoutMs
12550
+ }
12551
+ };
12072
12552
  }
12073
- var init_uule = __esm({
12074
- "src/uule.ts"() {
12553
+ var import_node_crypto2, import_p_limit3, DEFAULT_TIMEOUT_MS, DEFAULT_MAX_CONCURRENCY, DEFAULT_MAX_CONTENT_CHARS;
12554
+ var init_page_snapshot_extractor = __esm({
12555
+ "src/serp-intelligence/page-snapshot-extractor.ts"() {
12075
12556
  "use strict";
12076
- init_locations();
12557
+ import_node_crypto2 = require("crypto");
12558
+ import_p_limit3 = __toESM(require("p-limit"), 1);
12559
+ init_kpo_extractor();
12560
+ init_url_utils();
12561
+ DEFAULT_TIMEOUT_MS = 15e3;
12562
+ DEFAULT_MAX_CONCURRENCY = 2;
12563
+ DEFAULT_MAX_CONTENT_CHARS = 25e4;
12077
12564
  }
12078
12565
  });
12079
12566
 
@@ -13347,442 +13834,76 @@ var init_OutputSerializer = __esm({
13347
13834
  }
13348
13835
  async writeAIOverviewCSV(citations, text, seed, outputDir) {
13349
13836
  await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13350
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13351
- const rows = citations.map((c, i) => ({
13352
- seed_query: seed,
13353
- response_text: i === 0 ? text ?? "" : "",
13354
- citation_text: c.text,
13355
- citation_href: c.href
13356
- }));
13357
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13358
- const filename = `${slug}-ai-overview-${Date.now()}.csv`;
13359
- const fullPath = import_node_path5.default.join(outputDir, filename);
13360
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13361
- return fullPath;
13362
- }
13363
- async writeAIModeCSV(citations, text, seed, outputDir) {
13364
- await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13365
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13366
- const rows = citations.map((c, i) => ({
13367
- seed_query: seed,
13368
- response_text: i === 0 ? text ?? "" : "",
13369
- citation_text: c.text,
13370
- citation_href: c.href
13371
- }));
13372
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13373
- const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13374
- const fullPath = import_node_path5.default.join(outputDir, filename);
13375
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13376
- return fullPath;
13377
- }
13378
- async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13379
- await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13380
- const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13381
- const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13382
- const csv = import_papaparse2.default.unparse(rows, { header: true });
13383
- const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13384
- const fullPath = import_node_path5.default.join(outputDir, filename);
13385
- await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13386
- return fullPath;
13387
- }
13388
- };
13389
- }
13390
- });
13391
-
13392
- // src/output/ProgressReporter.ts
13393
- var ProgressReporter;
13394
- var init_ProgressReporter = __esm({
13395
- "src/output/ProgressReporter.ts"() {
13396
- "use strict";
13397
- ProgressReporter = class {
13398
- onQuestion(node) {
13399
- process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13400
- }
13401
- onDepth(depth) {
13402
- process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13403
- }
13404
- onVideos(videos) {
13405
- for (const v of videos) {
13406
- process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13407
- }
13408
- }
13409
- onForums(forums) {
13410
- for (const f of forums) {
13411
- process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13412
- }
13413
- }
13414
- onComplete(stats) {
13415
- process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13416
- }
13417
- onError(err) {
13418
- process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13419
- }
13420
- };
13421
- }
13422
- });
13423
-
13424
- // src/kernel-proxy-resolver.ts
13425
- function proxyIdSuffix2(proxyId) {
13426
- return proxyId ? proxyId.slice(-6) : null;
13427
- }
13428
- function resolution(source, proxyMode, proxyId, target, error) {
13429
- return {
13430
- kernelProxyId: proxyId,
13431
- resolution: {
13432
- source,
13433
- proxyMode,
13434
- proxyIdPresent: Boolean(proxyId),
13435
- proxyIdSuffix: proxyIdSuffix2(proxyId),
13436
- target,
13437
- error
13438
- }
13439
- };
13440
- }
13441
- function normalizeStateName(value) {
13442
- return value.trim().toLowerCase().replace(/\s+/g, " ");
13443
- }
13444
- function normalizeCountryName(value) {
13445
- return value.trim().toLowerCase().replace(/\./g, "").replace(/\s+/g, " ");
13446
- }
13447
- function isUnitedStates(country) {
13448
- if (!country) return true;
13449
- const normalized = normalizeCountryName(country);
13450
- return normalized === "united states" || normalized === "united states of america" || normalized === "usa" || normalized === "us";
13451
- }
13452
- function stateCodeFor(region) {
13453
- const trimmed = region.trim();
13454
- if (/^[A-Za-z]{2}$/.test(trimmed)) return trimmed.toUpperCase();
13455
- return US_STATE_CODES[normalizeStateName(trimmed)] ?? null;
13456
- }
13457
- function kernelCityIdentifierCandidates(city) {
13458
- const ascii = city.normalize("NFKD").replace(/[^\x00-\x7F]/g, "").toLowerCase();
13459
- const words = ascii.split(/[^a-z0-9]+/).filter(Boolean);
13460
- const underscored = words.join("_");
13461
- const compact = words.join("");
13462
- return Array.from(new Set([underscored, compact].filter(Boolean)));
13463
- }
13464
- function proxyName(country, state, city) {
13465
- return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
13466
- }
13467
- function zipProxyName(zip) {
13468
- return `mcp-serp-residential-us-zip-${zip}`;
13469
- }
13470
- function parseKernelLocationProxyTarget(location, gl) {
13471
- if (!location || gl.toLowerCase() !== "us") return null;
13472
- const canonicalLocation = normalizeLocation(location);
13473
- let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
13474
- if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
13475
- parts = parts.slice(0, -1);
13476
- }
13477
- if (parts.length === 1) {
13478
- const stateOnly = stateCodeFor(parts[0]);
13479
- if (!stateOnly) return null;
13480
- return {
13481
- canonicalLocation,
13482
- level: "state",
13483
- country: "US",
13484
- state: stateOnly,
13485
- city: "",
13486
- cityCandidates: [],
13487
- proxyName: proxyName("US", stateOnly),
13488
- config: {
13489
- country: "US",
13490
- state: stateOnly
13491
- }
13492
- };
13493
- }
13494
- const [city = "", region = ""] = parts;
13495
- if (!city || !region) return null;
13496
- const state = stateCodeFor(region);
13497
- if (!state) return null;
13498
- const cityCandidates = kernelCityIdentifierCandidates(city);
13499
- const primaryCity = cityCandidates[0];
13500
- if (!primaryCity) return null;
13501
- return {
13502
- canonicalLocation,
13503
- level: "city",
13504
- country: "US",
13505
- state,
13506
- city: primaryCity,
13507
- cityCandidates,
13508
- proxyName: proxyName("US", state, primaryCity),
13509
- config: {
13510
- country: "US",
13511
- state,
13512
- city: primaryCity
13513
- }
13514
- };
13515
- }
13516
- function cityZipKey(target) {
13517
- return `${target.city}|${target.state}`;
13518
- }
13519
- function knownZipFor(target, explicitZip) {
13520
- if (explicitZip && /^\d{5}$/.test(explicitZip)) return explicitZip;
13521
- return US_CITY_CENTER_ZIPS[cityZipKey(target)] ?? null;
13522
- }
13523
- function zipTarget(target, zip) {
13524
- return {
13525
- ...target,
13526
- level: "zip",
13527
- zip,
13528
- proxyName: zipProxyName(zip),
13529
- config: {
13530
- country: target.country,
13531
- state: target.state,
13532
- zip
13533
- }
13534
- };
13535
- }
13536
- function configMatches(config, target, city) {
13537
- if (target.level === "zip") {
13538
- return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
13539
- }
13540
- return config?.country?.toUpperCase() === target.country && config?.state?.toUpperCase() === target.state && (city ? config?.city === city : !config?.city);
13541
- }
13542
- function findExistingTargetProxy(proxies, target) {
13543
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === target.proxyName || configMatches(proxy.config, target, target.level === "city" ? target.city : void 0))) ?? null;
13544
- }
13545
- function findExistingProxy(proxies, target) {
13546
- for (const city of target.cityCandidates) {
13547
- const name = proxyName(target.country, target.state, city);
13548
- const found = proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target, city)));
13549
- if (found) return found;
13550
- }
13551
- return null;
13552
- }
13553
- function stateTarget(target) {
13554
- return {
13555
- ...target,
13556
- level: "state",
13557
- proxyName: proxyName(target.country, target.state),
13558
- config: {
13559
- country: target.country,
13560
- state: target.state
13561
- }
13562
- };
13563
- }
13564
- function findExistingStateProxy(proxies, target) {
13565
- const name = proxyName(target.country, target.state);
13566
- return proxies.find((proxy) => proxy.type === "residential" && proxy.status !== "unavailable" && Boolean(proxy.id) && (proxy.name === name || configMatches(proxy.config, target))) ?? null;
13567
- }
13568
- function escalatedTargetLevel(target, attemptIndex) {
13569
- return stateTarget(target);
13570
- }
13571
- function errorText2(err) {
13572
- return err instanceof Error ? err.message : String(err);
13573
- }
13574
- async function resolveKernelProxyId(options) {
13575
- if (options.proxyMode === "none") {
13576
- return resolution("disabled", options.proxyMode, void 0, null, null);
13577
- }
13578
- if (options.proxyMode === "configured") {
13579
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, null, null);
13580
- }
13581
- const target = parseKernelLocationProxyTarget(options.location, options.gl);
13582
- if (!target || !options.kernelApiKey) {
13583
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, target ? null : "location could not be normalized to a US city/state proxy target");
13837
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13838
+ const rows = citations.map((c, i) => ({
13839
+ seed_query: seed,
13840
+ response_text: i === 0 ? text ?? "" : "",
13841
+ citation_text: c.text,
13842
+ citation_href: c.href
13843
+ }));
13844
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13845
+ const filename = `${slug}-ai-overview-${Date.now()}.csv`;
13846
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13847
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13848
+ return fullPath;
13849
+ }
13850
+ async writeAIModeCSV(citations, text, seed, outputDir) {
13851
+ await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13852
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13853
+ const rows = citations.map((c, i) => ({
13854
+ seed_query: seed,
13855
+ response_text: i === 0 ? text ?? "" : "",
13856
+ citation_text: c.text,
13857
+ citation_href: c.href
13858
+ }));
13859
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13860
+ const filename = `${slug}-ai-mode-${Date.now()}.csv`;
13861
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13862
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13863
+ return fullPath;
13864
+ }
13865
+ async writeWhatPeopleSayingCSV(cards, seed, outputDir) {
13866
+ await import_node_fs3.promises.mkdir(outputDir, { recursive: true });
13867
+ const slug = seed.toLowerCase().replace(/\W+/g, "-").slice(0, 40);
13868
+ const rows = cards.map((c) => ({ seed_query: seed, ...c }));
13869
+ const csv = import_papaparse2.default.unparse(rows, { header: true });
13870
+ const filename = `${slug}-what-people-saying-${Date.now()}.csv`;
13871
+ const fullPath = import_node_path5.default.join(outputDir, filename);
13872
+ await import_node_fs3.promises.writeFile(fullPath, csv, "utf8");
13873
+ return fullPath;
13874
+ }
13875
+ };
13584
13876
  }
13585
- const kernel = new import_sdk5.default({ apiKey: options.kernelApiKey });
13586
- try {
13587
- const attemptIndex = options.attemptIndex ?? 0;
13588
- if (attemptIndex >= 1) {
13589
- const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
13590
- const createErrors2 = [];
13591
- try {
13592
- const created = await kernel.proxies.create({
13593
- type: "residential",
13594
- name: escalatedTarget.proxyName,
13595
- config: escalatedTarget.config
13596
- });
13597
- if (created.id) {
13598
- return resolution("location_created", options.proxyMode, created.id, escalatedTarget, null);
13599
- }
13600
- createErrors2.push(`${escalatedTarget.state}: Kernel did not return a proxy id`);
13601
- } catch (err) {
13602
- createErrors2.push(`${escalatedTarget.state}: ${errorText2(err)}`);
13877
+ });
13878
+
13879
+ // src/output/ProgressReporter.ts
13880
+ var ProgressReporter;
13881
+ var init_ProgressReporter = __esm({
13882
+ "src/output/ProgressReporter.ts"() {
13883
+ "use strict";
13884
+ ProgressReporter = class {
13885
+ onQuestion(node) {
13886
+ process.stdout.write(JSON.stringify({ event: "question", depth: node.depth, question: node.question }) + "\n");
13603
13887
  }
13604
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, escalatedTarget, createErrors2.join(" | "));
13605
- }
13606
- const proxies = await kernel.proxies.list();
13607
- const zip = knownZipFor(target, options.proxyZip);
13608
- const createErrors = [];
13609
- if (zip) {
13610
- const targetZip = zipTarget(target, zip);
13611
- const existingZip = findExistingTargetProxy(proxies, targetZip);
13612
- if (existingZip?.id) {
13613
- return resolution("location_reused", options.proxyMode, existingZip.id, targetZip, null);
13888
+ onDepth(depth) {
13889
+ process.stdout.write(JSON.stringify({ event: "depth", depth }) + "\n");
13614
13890
  }
13615
- try {
13616
- const created = await kernel.proxies.create({
13617
- type: "residential",
13618
- name: targetZip.proxyName,
13619
- config: {
13620
- country: targetZip.country,
13621
- zip
13622
- }
13623
- });
13624
- if (created.id) {
13625
- return resolution("location_created", options.proxyMode, created.id, targetZip, null);
13891
+ onVideos(videos) {
13892
+ for (const v of videos) {
13893
+ process.stdout.write(JSON.stringify({ event: "video", type: v.type, platform: v.platform, duration: v.duration, title: v.title, channel: v.channel, url: v.url }) + "\n");
13626
13894
  }
13627
- createErrors.push(`${zip}: Kernel did not return a proxy id`);
13628
- } catch (err) {
13629
- createErrors.push(`${zip}: ${errorText2(err)}`);
13630
13895
  }
13631
- }
13632
- const existing = findExistingProxy(proxies, target);
13633
- if (existing?.id) {
13634
- return resolution("location_reused", options.proxyMode, existing.id, target, createErrors.join(" | ") || null);
13635
- }
13636
- for (const city of target.cityCandidates) {
13637
- try {
13638
- const created = await kernel.proxies.create({
13639
- type: "residential",
13640
- name: proxyName(target.country, target.state, city),
13641
- config: {
13642
- country: target.country,
13643
- state: target.state,
13644
- city
13645
- }
13646
- });
13647
- if (created.id) {
13648
- return resolution("location_created", options.proxyMode, created.id, {
13649
- ...target,
13650
- level: "city",
13651
- city,
13652
- proxyName: proxyName(target.country, target.state, city),
13653
- config: {
13654
- country: target.country,
13655
- state: target.state,
13656
- city
13657
- }
13658
- }, null);
13896
+ onForums(forums) {
13897
+ for (const f of forums) {
13898
+ process.stdout.write(JSON.stringify({ event: "forum", title: f.title, source: f.source, url: f.url }) + "\n");
13659
13899
  }
13660
- createErrors.push(`${city}: Kernel did not return a proxy id`);
13661
- } catch (err) {
13662
- createErrors.push(`${city}: ${errorText2(err)}`);
13663
13900
  }
13664
- }
13665
- const fallbackTarget = stateTarget(target);
13666
- const existingState = findExistingStateProxy(proxies, fallbackTarget);
13667
- if (existingState?.id) {
13668
- return resolution("location_reused", options.proxyMode, existingState.id, fallbackTarget, createErrors.join(" | "));
13669
- }
13670
- try {
13671
- const created = await kernel.proxies.create({
13672
- type: "residential",
13673
- name: fallbackTarget.proxyName,
13674
- config: fallbackTarget.config
13675
- });
13676
- if (created.id) {
13677
- return resolution("location_created", options.proxyMode, created.id, fallbackTarget, createErrors.join(" | "));
13901
+ onComplete(stats) {
13902
+ process.stdout.write(JSON.stringify({ event: "complete", ...stats }) + "\n");
13903
+ }
13904
+ onError(err) {
13905
+ process.stderr.write(JSON.stringify({ event: "error", type: err.constructor.name, message: err.message }) + "\n");
13678
13906
  }
13679
- createErrors.push(`${fallbackTarget.state}: Kernel did not return a proxy id`);
13680
- } catch (err) {
13681
- createErrors.push(`${fallbackTarget.state}: ${errorText2(err)}`);
13682
- }
13683
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
13684
- } catch (err) {
13685
- return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, errorText2(err));
13686
- }
13687
- }
13688
- var import_sdk5, US_STATE_CODES, US_CITY_CENTER_ZIPS;
13689
- var init_kernel_proxy_resolver = __esm({
13690
- "src/kernel-proxy-resolver.ts"() {
13691
- "use strict";
13692
- import_sdk5 = __toESM(require("@onkernel/sdk"), 1);
13693
- init_uule();
13694
- US_STATE_CODES = {
13695
- alabama: "AL",
13696
- alaska: "AK",
13697
- arizona: "AZ",
13698
- arkansas: "AR",
13699
- california: "CA",
13700
- colorado: "CO",
13701
- connecticut: "CT",
13702
- delaware: "DE",
13703
- florida: "FL",
13704
- georgia: "GA",
13705
- hawaii: "HI",
13706
- idaho: "ID",
13707
- illinois: "IL",
13708
- indiana: "IN",
13709
- iowa: "IA",
13710
- kansas: "KS",
13711
- kentucky: "KY",
13712
- louisiana: "LA",
13713
- maine: "ME",
13714
- maryland: "MD",
13715
- massachusetts: "MA",
13716
- michigan: "MI",
13717
- minnesota: "MN",
13718
- mississippi: "MS",
13719
- missouri: "MO",
13720
- montana: "MT",
13721
- nebraska: "NE",
13722
- nevada: "NV",
13723
- "new hampshire": "NH",
13724
- "new jersey": "NJ",
13725
- "new mexico": "NM",
13726
- "new york": "NY",
13727
- "north carolina": "NC",
13728
- "north dakota": "ND",
13729
- ohio: "OH",
13730
- oklahoma: "OK",
13731
- oregon: "OR",
13732
- pennsylvania: "PA",
13733
- "rhode island": "RI",
13734
- "south carolina": "SC",
13735
- "south dakota": "SD",
13736
- tennessee: "TN",
13737
- texas: "TX",
13738
- utah: "UT",
13739
- vermont: "VT",
13740
- virginia: "VA",
13741
- washington: "WA",
13742
- "west virginia": "WV",
13743
- wisconsin: "WI",
13744
- wyoming: "WY"
13745
- };
13746
- US_CITY_CENTER_ZIPS = {
13747
- "atlanta|GA": "30303",
13748
- "austin|TX": "78701",
13749
- "baltimore|MD": "21201",
13750
- "boston|MA": "02108",
13751
- "boulder|CO": "80302",
13752
- "charlotte|NC": "28202",
13753
- "chicago|IL": "60601",
13754
- "colorado_springs|CO": "80903",
13755
- "columbus|OH": "43215",
13756
- "dallas|TX": "75201",
13757
- "denver|CO": "80202",
13758
- "detroit|MI": "48226",
13759
- "fort_collins|CO": "80524",
13760
- "fort_worth|TX": "76102",
13761
- "houston|TX": "77002",
13762
- "indianapolis|IN": "46204",
13763
- "jacksonville|FL": "32202",
13764
- "las_vegas|NV": "89101",
13765
- "los_angeles|CA": "90012",
13766
- "louisville|KY": "40202",
13767
- "loveland|CO": "80537",
13768
- "memphis|TN": "38103",
13769
- "miami|FL": "33131",
13770
- "minneapolis|MN": "55401",
13771
- "nashville|TN": "37203",
13772
- "new_york|NY": "10001",
13773
- "orlando|FL": "32801",
13774
- "philadelphia|PA": "19103",
13775
- "phoenix|AZ": "85004",
13776
- "portland|OR": "97205",
13777
- "raleigh|NC": "27601",
13778
- "richmond|VA": "23219",
13779
- "sacramento|CA": "95814",
13780
- "salt_lake_city|UT": "84101",
13781
- "san_antonio|TX": "78205",
13782
- "san_diego|CA": "92101",
13783
- "san_francisco|CA": "94103",
13784
- "san_jose|CA": "95113",
13785
- "seattle|WA": "98101"
13786
13907
  };
13787
13908
  }
13788
13909
  });
@@ -15377,7 +15498,7 @@ function formatFacebookAdSearch(raw, input) {
15377
15498
  const d = parsed.data;
15378
15499
  const advertisers = d.results ?? d.advertisers ?? [];
15379
15500
  const rows = advertisers.map(
15380
- (a, i) => `| ${i + 1} | ${cell(a.name)} | ${a.adCount ?? "\u2014"} | \`${a.libraryId ?? "\u2014"}\` |`
15501
+ (a, i) => `| ${i + 1} | ${cell(a.pageName ?? a.name)} | ${a.adCount ?? "\u2014"} | \`${a.sampleLibraryId ?? a.libraryId ?? "\u2014"}\` |`
15381
15502
  ).join("\n");
15382
15503
  const full = [
15383
15504
  `# Facebook Ad Library Search: "${input.query}"`,