mcp-scraper 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import {
3
3
  CaptureSerpSnapshotInputSchema,
4
4
  HttpMcpToolExecutor,
5
5
  buildPaaExtractorMcpServer
6
- } from "./chunk-JQKZWEON.js";
6
+ } from "./chunk-DZY3XO3M.js";
7
7
  import {
8
8
  BALANCE_PACK_LABELS,
9
9
  BALANCE_PRICE_IDS,
@@ -19,7 +19,7 @@ import {
19
19
  harvestProblemResponse,
20
20
  insufficientBalanceResponse,
21
21
  serializeHarvestProblem
22
- } from "./chunk-Y74EXABN.js";
22
+ } from "./chunk-7HB7NDOY.js";
23
23
  import {
24
24
  BrowserDriver,
25
25
  CaptchaError,
@@ -30,8 +30,9 @@ import {
30
30
  RawMapsOverviewSchema,
31
31
  RawMapsReviewStatsSchema,
32
32
  buildYouTubeChannelVideosUrl,
33
- harvest
34
- } from "./chunk-HERFK7W6.js";
33
+ harvest,
34
+ resolveKernelProxyId
35
+ } from "./chunk-W4P2U5VF.js";
35
36
  import {
36
37
  SiteAuditJobRowSchema,
37
38
  cancelJob,
@@ -8767,6 +8768,106 @@ var FacebookAdExtractor = class {
8767
8768
  }
8768
8769
  };
8769
8770
 
8771
+ // src/extractor/FacebookAdGraphql.ts
8772
+ var AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
8773
+ function parseFbGraphqlJson(text) {
8774
+ const out = [];
8775
+ const body = text.replace(/^for\s*\(;;\);/, "").trim();
8776
+ try {
8777
+ out.push(JSON.parse(body));
8778
+ return out;
8779
+ } catch {
8780
+ for (const line of body.split("\n")) {
8781
+ const trimmed = line.trim();
8782
+ if (!trimmed) continue;
8783
+ try {
8784
+ out.push(JSON.parse(trimmed));
8785
+ } catch {
8786
+ continue;
8787
+ }
8788
+ }
8789
+ return out;
8790
+ }
8791
+ }
8792
+ function extractCollatedResults(payload) {
8793
+ const root = payload;
8794
+ const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
8795
+ const results = [];
8796
+ for (const edge of edges) {
8797
+ const node = edge?.node;
8798
+ for (const raw of node?.collated_results ?? []) {
8799
+ const r = raw;
8800
+ const id = r.ad_archive_id;
8801
+ if (id === void 0 || id === null) continue;
8802
+ const snapshot = r.snapshot ?? null;
8803
+ results.push({
8804
+ ad_archive_id: String(id),
8805
+ page_id: r.page_id != null ? String(r.page_id) : "",
8806
+ page_name: r.page_name ?? snapshot?.page_name ?? "",
8807
+ is_active: Boolean(r.is_active),
8808
+ collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
8809
+ snapshot
8810
+ });
8811
+ }
8812
+ }
8813
+ return results;
8814
+ }
8815
+ async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
8816
+ const captureMs = opts.captureMs ?? 3e4;
8817
+ const collected = [];
8818
+ const seen = /* @__PURE__ */ new Set();
8819
+ const handler = (resp) => {
8820
+ if (!resp.url().includes("/api/graphql")) return;
8821
+ const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
8822
+ if (friendlyName !== AD_LIBRARY_QUERY) return;
8823
+ void resp.text().then((text) => {
8824
+ for (const payload of parseFbGraphqlJson(text)) {
8825
+ for (const result of extractCollatedResults(payload)) {
8826
+ if (seen.has(result.ad_archive_id)) continue;
8827
+ seen.add(result.ad_archive_id);
8828
+ collected.push(result);
8829
+ }
8830
+ }
8831
+ }).catch(() => void 0);
8832
+ };
8833
+ page.on("response", handler);
8834
+ try {
8835
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
8836
+ const deadline = Date.now() + captureMs;
8837
+ let lastCount = -1;
8838
+ let stableRounds = 0;
8839
+ while (Date.now() < deadline && collected.length < maxResults) {
8840
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
8841
+ await page.waitForTimeout(2e3);
8842
+ if (collected.length === lastCount) {
8843
+ stableRounds++;
8844
+ if (stableRounds >= 2 && collected.length > 0) break;
8845
+ } else {
8846
+ stableRounds = 0;
8847
+ }
8848
+ lastCount = collected.length;
8849
+ }
8850
+ } finally {
8851
+ page.off("response", handler);
8852
+ }
8853
+ return collected.slice(0, maxResults);
8854
+ }
8855
+ function advertisersFromResults(results, maxResults) {
8856
+ const byPage = /* @__PURE__ */ new Map();
8857
+ for (const r of results) {
8858
+ if (!r.page_id || !r.page_name) continue;
8859
+ const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
8860
+ const existing = byPage.get(r.page_id);
8861
+ if (existing) {
8862
+ existing.resultCount++;
8863
+ existing.maxCollation = Math.max(existing.maxCollation, collation);
8864
+ } else {
8865
+ byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
8866
+ }
8867
+ }
8868
+ return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
8869
+ }
8870
+
8770
8871
  // src/api/facebook-ad-routes.ts
8771
8872
  import { fal as fal2 } from "@fal-ai/client";
8772
8873
  var FacebookAdBodySchema = z13.object({
@@ -8811,6 +8912,22 @@ function buildPageIntelUrl(body, country) {
8811
8912
  function kernelLaunchOpts() {
8812
8913
  return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
8813
8914
  }
8915
+ async function kernelLaunchOptsResidential() {
8916
+ let proxyId = process.env.KERNEL_PROXY_ID?.trim();
8917
+ try {
8918
+ const resolution = await resolveKernelProxyId({
8919
+ kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
8920
+ proxyMode: "location",
8921
+ configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
8922
+ location: "New York, NY",
8923
+ gl: "us"
8924
+ });
8925
+ if (resolution.kernelProxyId) proxyId = resolution.kernelProxyId;
8926
+ } catch {
8927
+ proxyId = process.env.KERNEL_PROXY_ID?.trim();
8928
+ }
8929
+ return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
8930
+ }
8814
8931
  var facebookAdApp = new Hono4();
8815
8932
  facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
8816
8933
  const raw = await c.req.json().catch(() => ({}));
@@ -8867,7 +8984,7 @@ facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
8867
8984
  const driver = new BrowserDriver();
8868
8985
  let refunded = false;
8869
8986
  try {
8870
- await driver.launch(kernelLaunchOpts());
8987
+ await driver.launch(await kernelLaunchOptsResidential());
8871
8988
  await driver.navigateTo(listingUrl);
8872
8989
  const extractor = new FacebookAdExtractor(driver);
8873
8990
  const result = await extractor.extractPageIntel(listingUrl, maxAds);
@@ -8951,18 +9068,15 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
8951
9068
  const driver = new BrowserDriver();
8952
9069
  let searchRefunded = false;
8953
9070
  try {
8954
- await driver.launch(kernelLaunchOpts());
9071
+ await driver.launch(await kernelLaunchOptsResidential());
8955
9072
  const page = driver.getPage();
8956
- await driver.navigateTo(searchUrl);
8957
- try {
8958
- await page.waitForFunction(
8959
- () => {
8960
- const bt = document.body ? document.body.innerText ?? "" : "";
8961
- return bt.includes("Library ID") || bt.includes("No results");
8962
- },
8963
- { timeout: 2e4, polling: 500 }
8964
- );
8965
- } catch {
9073
+ const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
9074
+ const gqlAdvertisers = advertisersFromResults(collated, maxResults);
9075
+ if (gqlAdvertisers.length > 0) {
9076
+ const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
9077
+ const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
9078
+ await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
9079
+ return c.json(searchResult2);
8966
9080
  }
8967
9081
  await page.waitForTimeout(1500);
8968
9082
  for (let scroll = 0; scroll < 3; scroll++) {
@@ -9008,7 +9122,7 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
9008
9122
  advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
9009
9123
  }
9010
9124
  }
9011
- const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
9125
+ const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
9012
9126
  const searchResult = { query: body.query.trim(), searchUrl, results };
9013
9127
  await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
9014
9128
  return c.json(searchResult);
@@ -10887,19 +11001,15 @@ app.post("/auth/register", requireAllowedOrigin, async (c) => {
10887
11001
  const normalizedEmail = email?.trim().toLowerCase();
10888
11002
  if (!normalizedEmail || !password) return c.json({ error: "Email and password required" }, 400);
10889
11003
  if (password.length < 8) return c.json({ error: "Password must be at least 8 characters" }, 400);
10890
- const limited = await enforceRateLimit(c, "auth_register", rateLimitKey(c), 5, 60 * 60);
10891
- if (limited) return limited;
10892
11004
  try {
10893
11005
  const existing = await getUserByEmail(normalizedEmail);
10894
11006
  if (existing) return c.json({ error: "Email already registered" }, 409);
10895
- let stripeCustomerId;
11007
+ let stripeCustomerId = null;
10896
11008
  try {
10897
11009
  stripeCustomerId = await createSignupStripeCustomer(normalizedEmail);
10898
- } catch {
10899
- return c.json({ error: "Stripe customer setup failed" }, 503);
10900
- }
10901
- if (!stripeCustomerId && (process.env.NODE_ENV === "production" || process.env.VERCEL === "1")) {
10902
- return c.json({ error: "Stripe customer setup failed" }, 503);
11010
+ } catch (err) {
11011
+ console.warn("[auth/register] Stripe customer creation failed; continuing without it (created lazily at checkout):", err instanceof Error ? err.message : String(err));
11012
+ stripeCustomerId = null;
10903
11013
  }
10904
11014
  const user = await createUser(normalizedEmail, void 0, password, stripeCustomerId ?? void 0);
10905
11015
  if (stripeCustomerId) {
@@ -10960,14 +11070,18 @@ app.post("/auth/forgot-password", requireAllowedOrigin, async (c) => {
10960
11070
  if (process.env.RESEND_API_KEY) {
10961
11071
  try {
10962
11072
  const resend = new Resend(process.env.RESEND_API_KEY);
10963
- await resend.emails.send({
11073
+ const sent = await resend.emails.send({
10964
11074
  from: "MCP Scraper <noreply@updates.mcpscraper.dev>",
10965
11075
  to: normalizedEmail,
10966
11076
  subject: "Reset your MCP Scraper password",
10967
11077
  html: `<p>Hi,</p><p>Click the link below to reset your password. This link expires in 1 hour.</p><p><a href="${resetUrl}">${resetUrl}</a></p><p>If you didn't request this, you can ignore this email.</p>`
10968
11078
  });
10969
- } catch {
11079
+ if (sent.error) console.error("[auth/forgot-password] Resend rejected the email:", JSON.stringify(sent.error));
11080
+ } catch (err) {
11081
+ console.error("[auth/forgot-password] Resend send threw:", err instanceof Error ? err.message : String(err));
10970
11082
  }
11083
+ } else {
11084
+ console.warn("[auth/forgot-password] RESEND_API_KEY not set \u2014 no reset email sent for", normalizedEmail);
10971
11085
  }
10972
11086
  return c.json({ ok: true });
10973
11087
  });
@@ -11500,7 +11614,7 @@ app.get("/cron/tick", async (c) => {
11500
11614
  if (!process.env.CRON_SECRET || secret2 !== `Bearer ${process.env.CRON_SECRET}`) {
11501
11615
  return c.json({ error: "Unauthorized" }, 401);
11502
11616
  }
11503
- const { drainQueue } = await import("./worker-D4D2YQTA.js");
11617
+ const { drainQueue } = await import("./worker-UT4ZQU2T.js");
11504
11618
  const budget = { maxJobs: 10, deadlineMs: Date.now() + 28e4 };
11505
11619
  const [results, sweepResult] = await Promise.all([
11506
11620
  drainQueue(budget),
@@ -11622,4 +11736,4 @@ app.get("/blog/:slug/", (c) => {
11622
11736
  export {
11623
11737
  app
11624
11738
  };
11625
- //# sourceMappingURL=server-W5NWH5KF.js.map
11739
+ //# sourceMappingURL=server-KUF3QJC7.js.map