mcp-scraper 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +701 -580
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +1 -1
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/bin/paa-harvest.js +1 -1
- package/dist/{chunk-Y74EXABN.js → chunk-7HB7NDOY.js} +2 -2
- package/dist/{chunk-JQKZWEON.js → chunk-DZY3XO3M.js} +2 -2
- package/dist/{chunk-JQKZWEON.js.map → chunk-DZY3XO3M.js.map} +1 -1
- package/dist/{chunk-HERFK7W6.js → chunk-W4P2U5VF.js} +2 -1
- package/dist/index.js +1 -1
- package/dist/{server-W5NWH5KF.js → server-KUF3QJC7.js} +143 -29
- package/dist/server-KUF3QJC7.js.map +1 -0
- package/dist/{worker-D4D2YQTA.js → worker-UT4ZQU2T.js} +3 -3
- package/package.json +16 -16
- package/dist/server-W5NWH5KF.js.map +0 -1
- /package/dist/{chunk-Y74EXABN.js.map → chunk-7HB7NDOY.js.map} +0 -0
- /package/dist/{chunk-HERFK7W6.js.map → chunk-W4P2U5VF.js.map} +0 -0
- /package/dist/{worker-D4D2YQTA.js.map → worker-UT4ZQU2T.js.map} +0 -0
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
CaptureSerpSnapshotInputSchema,
|
|
4
4
|
HttpMcpToolExecutor,
|
|
5
5
|
buildPaaExtractorMcpServer
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-DZY3XO3M.js";
|
|
7
7
|
import {
|
|
8
8
|
BALANCE_PACK_LABELS,
|
|
9
9
|
BALANCE_PRICE_IDS,
|
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
harvestProblemResponse,
|
|
20
20
|
insufficientBalanceResponse,
|
|
21
21
|
serializeHarvestProblem
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-7HB7NDOY.js";
|
|
23
23
|
import {
|
|
24
24
|
BrowserDriver,
|
|
25
25
|
CaptchaError,
|
|
@@ -30,8 +30,9 @@ import {
|
|
|
30
30
|
RawMapsOverviewSchema,
|
|
31
31
|
RawMapsReviewStatsSchema,
|
|
32
32
|
buildYouTubeChannelVideosUrl,
|
|
33
|
-
harvest
|
|
34
|
-
|
|
33
|
+
harvest,
|
|
34
|
+
resolveKernelProxyId
|
|
35
|
+
} from "./chunk-W4P2U5VF.js";
|
|
35
36
|
import {
|
|
36
37
|
SiteAuditJobRowSchema,
|
|
37
38
|
cancelJob,
|
|
@@ -8767,6 +8768,106 @@ var FacebookAdExtractor = class {
|
|
|
8767
8768
|
}
|
|
8768
8769
|
};
|
|
8769
8770
|
|
|
8771
|
+
// src/extractor/FacebookAdGraphql.ts
|
|
8772
|
+
var AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
|
|
8773
|
+
function parseFbGraphqlJson(text) {
|
|
8774
|
+
const out = [];
|
|
8775
|
+
const body = text.replace(/^for\s*\(;;\);/, "").trim();
|
|
8776
|
+
try {
|
|
8777
|
+
out.push(JSON.parse(body));
|
|
8778
|
+
return out;
|
|
8779
|
+
} catch {
|
|
8780
|
+
for (const line of body.split("\n")) {
|
|
8781
|
+
const trimmed = line.trim();
|
|
8782
|
+
if (!trimmed) continue;
|
|
8783
|
+
try {
|
|
8784
|
+
out.push(JSON.parse(trimmed));
|
|
8785
|
+
} catch {
|
|
8786
|
+
continue;
|
|
8787
|
+
}
|
|
8788
|
+
}
|
|
8789
|
+
return out;
|
|
8790
|
+
}
|
|
8791
|
+
}
|
|
8792
|
+
function extractCollatedResults(payload) {
|
|
8793
|
+
const root = payload;
|
|
8794
|
+
const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
|
|
8795
|
+
const results = [];
|
|
8796
|
+
for (const edge of edges) {
|
|
8797
|
+
const node = edge?.node;
|
|
8798
|
+
for (const raw of node?.collated_results ?? []) {
|
|
8799
|
+
const r = raw;
|
|
8800
|
+
const id = r.ad_archive_id;
|
|
8801
|
+
if (id === void 0 || id === null) continue;
|
|
8802
|
+
const snapshot = r.snapshot ?? null;
|
|
8803
|
+
results.push({
|
|
8804
|
+
ad_archive_id: String(id),
|
|
8805
|
+
page_id: r.page_id != null ? String(r.page_id) : "",
|
|
8806
|
+
page_name: r.page_name ?? snapshot?.page_name ?? "",
|
|
8807
|
+
is_active: Boolean(r.is_active),
|
|
8808
|
+
collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
|
|
8809
|
+
snapshot
|
|
8810
|
+
});
|
|
8811
|
+
}
|
|
8812
|
+
}
|
|
8813
|
+
return results;
|
|
8814
|
+
}
|
|
8815
|
+
async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
|
|
8816
|
+
const captureMs = opts.captureMs ?? 3e4;
|
|
8817
|
+
const collected = [];
|
|
8818
|
+
const seen = /* @__PURE__ */ new Set();
|
|
8819
|
+
const handler = (resp) => {
|
|
8820
|
+
if (!resp.url().includes("/api/graphql")) return;
|
|
8821
|
+
const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
|
|
8822
|
+
if (friendlyName !== AD_LIBRARY_QUERY) return;
|
|
8823
|
+
void resp.text().then((text) => {
|
|
8824
|
+
for (const payload of parseFbGraphqlJson(text)) {
|
|
8825
|
+
for (const result of extractCollatedResults(payload)) {
|
|
8826
|
+
if (seen.has(result.ad_archive_id)) continue;
|
|
8827
|
+
seen.add(result.ad_archive_id);
|
|
8828
|
+
collected.push(result);
|
|
8829
|
+
}
|
|
8830
|
+
}
|
|
8831
|
+
}).catch(() => void 0);
|
|
8832
|
+
};
|
|
8833
|
+
page.on("response", handler);
|
|
8834
|
+
try {
|
|
8835
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
|
|
8836
|
+
const deadline = Date.now() + captureMs;
|
|
8837
|
+
let lastCount = -1;
|
|
8838
|
+
let stableRounds = 0;
|
|
8839
|
+
while (Date.now() < deadline && collected.length < maxResults) {
|
|
8840
|
+
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
|
|
8841
|
+
await page.waitForTimeout(2e3);
|
|
8842
|
+
if (collected.length === lastCount) {
|
|
8843
|
+
stableRounds++;
|
|
8844
|
+
if (stableRounds >= 2 && collected.length > 0) break;
|
|
8845
|
+
} else {
|
|
8846
|
+
stableRounds = 0;
|
|
8847
|
+
}
|
|
8848
|
+
lastCount = collected.length;
|
|
8849
|
+
}
|
|
8850
|
+
} finally {
|
|
8851
|
+
page.off("response", handler);
|
|
8852
|
+
}
|
|
8853
|
+
return collected.slice(0, maxResults);
|
|
8854
|
+
}
|
|
8855
|
+
function advertisersFromResults(results, maxResults) {
|
|
8856
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
8857
|
+
for (const r of results) {
|
|
8858
|
+
if (!r.page_id || !r.page_name) continue;
|
|
8859
|
+
const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
|
|
8860
|
+
const existing = byPage.get(r.page_id);
|
|
8861
|
+
if (existing) {
|
|
8862
|
+
existing.resultCount++;
|
|
8863
|
+
existing.maxCollation = Math.max(existing.maxCollation, collation);
|
|
8864
|
+
} else {
|
|
8865
|
+
byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
|
|
8866
|
+
}
|
|
8867
|
+
}
|
|
8868
|
+
return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
8869
|
+
}
|
|
8870
|
+
|
|
8770
8871
|
// src/api/facebook-ad-routes.ts
|
|
8771
8872
|
import { fal as fal2 } from "@fal-ai/client";
|
|
8772
8873
|
var FacebookAdBodySchema = z13.object({
|
|
@@ -8811,6 +8912,22 @@ function buildPageIntelUrl(body, country) {
|
|
|
8811
8912
|
function kernelLaunchOpts() {
|
|
8812
8913
|
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
8813
8914
|
}
|
|
8915
|
+
async function kernelLaunchOptsResidential() {
|
|
8916
|
+
let proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
8917
|
+
try {
|
|
8918
|
+
const resolution = await resolveKernelProxyId({
|
|
8919
|
+
kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
|
|
8920
|
+
proxyMode: "location",
|
|
8921
|
+
configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
|
|
8922
|
+
location: "New York, NY",
|
|
8923
|
+
gl: "us"
|
|
8924
|
+
});
|
|
8925
|
+
if (resolution.kernelProxyId) proxyId = resolution.kernelProxyId;
|
|
8926
|
+
} catch {
|
|
8927
|
+
proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
8928
|
+
}
|
|
8929
|
+
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
8930
|
+
}
|
|
8814
8931
|
var facebookAdApp = new Hono4();
|
|
8815
8932
|
facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
|
|
8816
8933
|
const raw = await c.req.json().catch(() => ({}));
|
|
@@ -8867,7 +8984,7 @@ facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
|
|
|
8867
8984
|
const driver = new BrowserDriver();
|
|
8868
8985
|
let refunded = false;
|
|
8869
8986
|
try {
|
|
8870
|
-
await driver.launch(
|
|
8987
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
8871
8988
|
await driver.navigateTo(listingUrl);
|
|
8872
8989
|
const extractor = new FacebookAdExtractor(driver);
|
|
8873
8990
|
const result = await extractor.extractPageIntel(listingUrl, maxAds);
|
|
@@ -8951,18 +9068,15 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
8951
9068
|
const driver = new BrowserDriver();
|
|
8952
9069
|
let searchRefunded = false;
|
|
8953
9070
|
try {
|
|
8954
|
-
await driver.launch(
|
|
9071
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
8955
9072
|
const page = driver.getPage();
|
|
8956
|
-
await
|
|
8957
|
-
|
|
8958
|
-
|
|
8959
|
-
|
|
8960
|
-
|
|
8961
|
-
|
|
8962
|
-
|
|
8963
|
-
{ timeout: 2e4, polling: 500 }
|
|
8964
|
-
);
|
|
8965
|
-
} catch {
|
|
9073
|
+
const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
|
|
9074
|
+
const gqlAdvertisers = advertisersFromResults(collated, maxResults);
|
|
9075
|
+
if (gqlAdvertisers.length > 0) {
|
|
9076
|
+
const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
9077
|
+
const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
|
|
9078
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
|
|
9079
|
+
return c.json(searchResult2);
|
|
8966
9080
|
}
|
|
8967
9081
|
await page.waitForTimeout(1500);
|
|
8968
9082
|
for (let scroll = 0; scroll < 3; scroll++) {
|
|
@@ -9008,7 +9122,7 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
9008
9122
|
advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
|
|
9009
9123
|
}
|
|
9010
9124
|
}
|
|
9011
|
-
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
9125
|
+
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
9012
9126
|
const searchResult = { query: body.query.trim(), searchUrl, results };
|
|
9013
9127
|
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
|
|
9014
9128
|
return c.json(searchResult);
|
|
@@ -10887,19 +11001,15 @@ app.post("/auth/register", requireAllowedOrigin, async (c) => {
|
|
|
10887
11001
|
const normalizedEmail = email?.trim().toLowerCase();
|
|
10888
11002
|
if (!normalizedEmail || !password) return c.json({ error: "Email and password required" }, 400);
|
|
10889
11003
|
if (password.length < 8) return c.json({ error: "Password must be at least 8 characters" }, 400);
|
|
10890
|
-
const limited = await enforceRateLimit(c, "auth_register", rateLimitKey(c), 5, 60 * 60);
|
|
10891
|
-
if (limited) return limited;
|
|
10892
11004
|
try {
|
|
10893
11005
|
const existing = await getUserByEmail(normalizedEmail);
|
|
10894
11006
|
if (existing) return c.json({ error: "Email already registered" }, 409);
|
|
10895
|
-
let stripeCustomerId;
|
|
11007
|
+
let stripeCustomerId = null;
|
|
10896
11008
|
try {
|
|
10897
11009
|
stripeCustomerId = await createSignupStripeCustomer(normalizedEmail);
|
|
10898
|
-
} catch {
|
|
10899
|
-
|
|
10900
|
-
|
|
10901
|
-
if (!stripeCustomerId && (process.env.NODE_ENV === "production" || process.env.VERCEL === "1")) {
|
|
10902
|
-
return c.json({ error: "Stripe customer setup failed" }, 503);
|
|
11010
|
+
} catch (err) {
|
|
11011
|
+
console.warn("[auth/register] Stripe customer creation failed; continuing without it (created lazily at checkout):", err instanceof Error ? err.message : String(err));
|
|
11012
|
+
stripeCustomerId = null;
|
|
10903
11013
|
}
|
|
10904
11014
|
const user = await createUser(normalizedEmail, void 0, password, stripeCustomerId ?? void 0);
|
|
10905
11015
|
if (stripeCustomerId) {
|
|
@@ -10960,14 +11070,18 @@ app.post("/auth/forgot-password", requireAllowedOrigin, async (c) => {
|
|
|
10960
11070
|
if (process.env.RESEND_API_KEY) {
|
|
10961
11071
|
try {
|
|
10962
11072
|
const resend = new Resend(process.env.RESEND_API_KEY);
|
|
10963
|
-
await resend.emails.send({
|
|
11073
|
+
const sent = await resend.emails.send({
|
|
10964
11074
|
from: "MCP Scraper <noreply@updates.mcpscraper.dev>",
|
|
10965
11075
|
to: normalizedEmail,
|
|
10966
11076
|
subject: "Reset your MCP Scraper password",
|
|
10967
11077
|
html: `<p>Hi,</p><p>Click the link below to reset your password. This link expires in 1 hour.</p><p><a href="${resetUrl}">${resetUrl}</a></p><p>If you didn't request this, you can ignore this email.</p>`
|
|
10968
11078
|
});
|
|
10969
|
-
|
|
11079
|
+
if (sent.error) console.error("[auth/forgot-password] Resend rejected the email:", JSON.stringify(sent.error));
|
|
11080
|
+
} catch (err) {
|
|
11081
|
+
console.error("[auth/forgot-password] Resend send threw:", err instanceof Error ? err.message : String(err));
|
|
10970
11082
|
}
|
|
11083
|
+
} else {
|
|
11084
|
+
console.warn("[auth/forgot-password] RESEND_API_KEY not set \u2014 no reset email sent for", normalizedEmail);
|
|
10971
11085
|
}
|
|
10972
11086
|
return c.json({ ok: true });
|
|
10973
11087
|
});
|
|
@@ -11500,7 +11614,7 @@ app.get("/cron/tick", async (c) => {
|
|
|
11500
11614
|
if (!process.env.CRON_SECRET || secret2 !== `Bearer ${process.env.CRON_SECRET}`) {
|
|
11501
11615
|
return c.json({ error: "Unauthorized" }, 401);
|
|
11502
11616
|
}
|
|
11503
|
-
const { drainQueue } = await import("./worker-
|
|
11617
|
+
const { drainQueue } = await import("./worker-UT4ZQU2T.js");
|
|
11504
11618
|
const budget = { maxJobs: 10, deadlineMs: Date.now() + 28e4 };
|
|
11505
11619
|
const [results, sweepResult] = await Promise.all([
|
|
11506
11620
|
drainQueue(budget),
|
|
@@ -11622,4 +11736,4 @@ app.get("/blog/:slug/", (c) => {
|
|
|
11622
11736
|
export {
|
|
11623
11737
|
app
|
|
11624
11738
|
};
|
|
11625
|
-
//# sourceMappingURL=server-
|
|
11739
|
+
//# sourceMappingURL=server-KUF3QJC7.js.map
|