mcp-scraper 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +1040 -829
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +51 -18
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/bin/paa-harvest.js +1 -1
- package/dist/{chunk-JQKZWEON.js → chunk-4OHPDEZM.js} +54 -20
- package/dist/chunk-4OHPDEZM.js.map +1 -0
- package/dist/{chunk-Y74EXABN.js → chunk-7HB7NDOY.js} +2 -2
- package/dist/{chunk-HERFK7W6.js → chunk-W4P2U5VF.js} +2 -1
- package/dist/index.js +1 -1
- package/dist/{server-6CHHLOII.js → server-V5XMVRYE.js} +209 -47
- package/dist/server-V5XMVRYE.js.map +1 -0
- package/dist/{worker-D4D2YQTA.js → worker-UT4ZQU2T.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-JQKZWEON.js.map +0 -1
- package/dist/server-6CHHLOII.js.map +0 -1
- /package/dist/{chunk-Y74EXABN.js.map → chunk-7HB7NDOY.js.map} +0 -0
- /package/dist/{chunk-HERFK7W6.js.map → chunk-W4P2U5VF.js.map} +0 -0
- /package/dist/{worker-D4D2YQTA.js.map → worker-UT4ZQU2T.js.map} +0 -0
|
@@ -2,8 +2,9 @@ import {
|
|
|
2
2
|
CaptureSerpPageSnapshotsInputSchema,
|
|
3
3
|
CaptureSerpSnapshotInputSchema,
|
|
4
4
|
HttpMcpToolExecutor,
|
|
5
|
-
buildPaaExtractorMcpServer
|
|
6
|
-
|
|
5
|
+
buildPaaExtractorMcpServer,
|
|
6
|
+
harvestTimeoutBudget
|
|
7
|
+
} from "./chunk-4OHPDEZM.js";
|
|
7
8
|
import {
|
|
8
9
|
BALANCE_PACK_LABELS,
|
|
9
10
|
BALANCE_PRICE_IDS,
|
|
@@ -19,7 +20,7 @@ import {
|
|
|
19
20
|
harvestProblemResponse,
|
|
20
21
|
insufficientBalanceResponse,
|
|
21
22
|
serializeHarvestProblem
|
|
22
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-7HB7NDOY.js";
|
|
23
24
|
import {
|
|
24
25
|
BrowserDriver,
|
|
25
26
|
CaptchaError,
|
|
@@ -30,8 +31,9 @@ import {
|
|
|
30
31
|
RawMapsOverviewSchema,
|
|
31
32
|
RawMapsReviewStatsSchema,
|
|
32
33
|
buildYouTubeChannelVideosUrl,
|
|
33
|
-
harvest
|
|
34
|
-
|
|
34
|
+
harvest,
|
|
35
|
+
resolveKernelProxyId
|
|
36
|
+
} from "./chunk-W4P2U5VF.js";
|
|
35
37
|
import {
|
|
36
38
|
SiteAuditJobRowSchema,
|
|
37
39
|
cancelJob,
|
|
@@ -3430,11 +3432,6 @@ async function validatePublicHttpUrl(raw, opts) {
|
|
|
3430
3432
|
return { parsed };
|
|
3431
3433
|
}
|
|
3432
3434
|
|
|
3433
|
-
// src/api/server.ts
|
|
3434
|
-
import { mkdirSync as mkdirSync2, writeFileSync } from "fs";
|
|
3435
|
-
import { homedir as homedir2 } from "os";
|
|
3436
|
-
import { join as join4 } from "path";
|
|
3437
|
-
|
|
3438
3435
|
// src/api/kpo-extractor.ts
|
|
3439
3436
|
import TurndownService from "turndown";
|
|
3440
3437
|
|
|
@@ -3849,25 +3846,73 @@ function firstFont(fontFamily) {
|
|
|
3849
3846
|
const first = fontFamily.split(",")[0].trim().replace(/['"]/g, "");
|
|
3850
3847
|
return first || null;
|
|
3851
3848
|
}
|
|
3849
|
+
function dominantColor(freq) {
|
|
3850
|
+
return Object.entries(freq).filter(([hex]) => !isTransparentOrWhite(hex) && hex !== "#000000" && hex !== "#020101").sort((a, b) => b[1] - a[1])[0]?.[0] ?? null;
|
|
3851
|
+
}
|
|
3852
3852
|
async function extractBrandingFromPage(page) {
|
|
3853
3853
|
const evalScript = `
|
|
3854
3854
|
(function() {
|
|
3855
3855
|
function cs(el) { return el ? window.getComputedStyle(el) : null; }
|
|
3856
|
+
function toHex(rgb) {
|
|
3857
|
+
var m = rgb && rgb.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/);
|
|
3858
|
+
if (!m) return null;
|
|
3859
|
+
return '#' + [m[1],m[2],m[3]].map(function(v){ return ('0'+parseInt(v).toString(16)).slice(-2); }).join('');
|
|
3860
|
+
}
|
|
3861
|
+
function isUsable(hex) {
|
|
3862
|
+
if (!hex) return false;
|
|
3863
|
+
if (hex === '#000000' || hex === '#020101' || hex === '#ffffff' || hex === '#fffffe') return false;
|
|
3864
|
+
var r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
|
|
3865
|
+
return (0.2126*r + 0.7152*g + 0.0722*b) <= 230;
|
|
3866
|
+
}
|
|
3867
|
+
|
|
3856
3868
|
var navEl = document.querySelector('nav, header, [role="banner"]');
|
|
3857
3869
|
var bodyEl = document.body;
|
|
3858
3870
|
var h1El = document.querySelector('h1');
|
|
3859
3871
|
var btnEl = document.querySelector(
|
|
3860
3872
|
'a.btn-primary, button.btn-primary, .btn-primary, .cta-btn,' +
|
|
3861
3873
|
'a.button--primary, button.button--primary, [class*="btn-cta"],' +
|
|
3862
|
-
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a'
|
|
3874
|
+
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a,' +
|
|
3875
|
+
'.elementor-button, .elementor-button-link,' +
|
|
3876
|
+
'.et_pb_button,' +
|
|
3877
|
+
'.fl-button,' +
|
|
3878
|
+
'.vc_btn,' +
|
|
3879
|
+
'[class*="cta"][href], [class*="get-started"], [class*="contact-btn"]'
|
|
3863
3880
|
);
|
|
3864
3881
|
var navStyle = cs(navEl);
|
|
3865
3882
|
var bodyStyle = cs(bodyEl);
|
|
3866
3883
|
var h1Style = cs(h1El);
|
|
3867
3884
|
var btnStyle = cs(btnEl);
|
|
3868
|
-
|
|
3885
|
+
|
|
3886
|
+
var svgFreq = {};
|
|
3887
|
+
var svgScope = navEl || document.querySelector('header, [role="banner"]');
|
|
3888
|
+
if (svgScope) {
|
|
3889
|
+
var svgEls = svgScope.querySelectorAll('svg *, [fill], path, circle, rect, polygon, polyline');
|
|
3890
|
+
for (var si = 0; si < svgEls.length; si++) {
|
|
3891
|
+
var svgEl = svgEls[si];
|
|
3892
|
+
var fillComp = cs(svgEl) ? cs(svgEl).fill : null;
|
|
3893
|
+
var fillAttr = svgEl.getAttribute('fill');
|
|
3894
|
+
var fillHex = null;
|
|
3895
|
+
if (fillComp && fillComp !== 'none') { fillHex = toHex(fillComp); }
|
|
3896
|
+
else if (fillAttr && fillAttr !== 'none' && fillAttr.startsWith('#')) { fillHex = fillAttr; }
|
|
3897
|
+
if (fillHex && isUsable(fillHex)) { svgFreq[fillHex] = (svgFreq[fillHex] || 0) + 1; }
|
|
3898
|
+
}
|
|
3899
|
+
}
|
|
3900
|
+
|
|
3901
|
+
var navChildBgFreq = {};
|
|
3902
|
+
if (navEl) {
|
|
3903
|
+
var navChildren = navEl.querySelectorAll('li, a, button, [class*="menu-item"]');
|
|
3904
|
+
for (var ni = 0; ni < navChildren.length; ni++) {
|
|
3905
|
+
var nbg = cs(navChildren[ni]);
|
|
3906
|
+
if (nbg) {
|
|
3907
|
+
var bghex = toHex(nbg.backgroundColor);
|
|
3908
|
+
if (bghex && isUsable(bghex)) { navChildBgFreq[bghex] = (navChildBgFreq[bghex] || 0) + 1; }
|
|
3909
|
+
}
|
|
3910
|
+
}
|
|
3911
|
+
}
|
|
3912
|
+
|
|
3913
|
+
var pageHost = window.location.hostname.replace(/^www\\./, '');
|
|
3869
3914
|
function isSameDomain(src) {
|
|
3870
|
-
try { return new URL(src).hostname.replace(/^www
|
|
3915
|
+
try { return new URL(src).hostname.replace(/^www\\./, '').endsWith(pageHost); } catch { return false; }
|
|
3871
3916
|
}
|
|
3872
3917
|
var logoSelectors = [
|
|
3873
3918
|
'header img[class*="logo"]', 'nav img[class*="logo"]',
|
|
@@ -3890,22 +3935,27 @@ async function extractBrandingFromPage(page) {
|
|
|
3890
3935
|
'link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'
|
|
3891
3936
|
);
|
|
3892
3937
|
return {
|
|
3893
|
-
navBg:
|
|
3894
|
-
bodyBg:
|
|
3895
|
-
bodyColor:
|
|
3896
|
-
h1Color:
|
|
3897
|
-
btnBg:
|
|
3898
|
-
bodyFont:
|
|
3899
|
-
h1Font:
|
|
3900
|
-
logoSrc:
|
|
3901
|
-
faviconHref:
|
|
3938
|
+
navBg: navStyle ? navStyle.backgroundColor : null,
|
|
3939
|
+
bodyBg: bodyStyle ? bodyStyle.backgroundColor : null,
|
|
3940
|
+
bodyColor: bodyStyle ? bodyStyle.color : null,
|
|
3941
|
+
h1Color: h1Style ? h1Style.color : null,
|
|
3942
|
+
btnBg: btnStyle ? btnStyle.backgroundColor : null,
|
|
3943
|
+
bodyFont: bodyStyle ? bodyStyle.fontFamily : null,
|
|
3944
|
+
h1Font: h1Style ? h1Style.fontFamily : null,
|
|
3945
|
+
logoSrc: logoSrc,
|
|
3946
|
+
faviconHref: faviconEl ? faviconEl.href : null,
|
|
3947
|
+
svgFreq: svgFreq,
|
|
3948
|
+
navChildBgFreq: navChildBgFreq,
|
|
3902
3949
|
};
|
|
3903
3950
|
})()
|
|
3904
3951
|
`;
|
|
3905
3952
|
const raw = await page.evaluate(evalScript);
|
|
3906
3953
|
const navBgHex = rgbToHex(raw.navBg ?? "");
|
|
3907
3954
|
const bodyBgHex = rgbToHex(raw.bodyBg ?? "");
|
|
3908
|
-
const
|
|
3955
|
+
const navBgUsable = navBgHex && !isTransparentOrWhite(navBgHex) && navBgHex !== "#000000" && navBgHex !== "#020101" ? navBgHex : null;
|
|
3956
|
+
const svgPrimary = dominantColor(raw.svgFreq ?? {});
|
|
3957
|
+
const navChildBg = dominantColor(raw.navChildBgFreq ?? {});
|
|
3958
|
+
const primary = navBgUsable ?? svgPrimary ?? navChildBg ?? bodyBgHex;
|
|
3909
3959
|
const accent = rgbToHex(raw.btnBg ?? "");
|
|
3910
3960
|
const text = rgbToHex(raw.bodyColor ?? "");
|
|
3911
3961
|
const heading = rgbToHex(raw.h1Color ?? "");
|
|
@@ -8767,6 +8817,106 @@ var FacebookAdExtractor = class {
|
|
|
8767
8817
|
}
|
|
8768
8818
|
};
|
|
8769
8819
|
|
|
8820
|
+
// src/extractor/FacebookAdGraphql.ts
|
|
8821
|
+
var AD_LIBRARY_QUERY = "AdLibrarySearchPaginationQuery";
|
|
8822
|
+
function parseFbGraphqlJson(text) {
|
|
8823
|
+
const out = [];
|
|
8824
|
+
const body = text.replace(/^for\s*\(;;\);/, "").trim();
|
|
8825
|
+
try {
|
|
8826
|
+
out.push(JSON.parse(body));
|
|
8827
|
+
return out;
|
|
8828
|
+
} catch {
|
|
8829
|
+
for (const line of body.split("\n")) {
|
|
8830
|
+
const trimmed = line.trim();
|
|
8831
|
+
if (!trimmed) continue;
|
|
8832
|
+
try {
|
|
8833
|
+
out.push(JSON.parse(trimmed));
|
|
8834
|
+
} catch {
|
|
8835
|
+
continue;
|
|
8836
|
+
}
|
|
8837
|
+
}
|
|
8838
|
+
return out;
|
|
8839
|
+
}
|
|
8840
|
+
}
|
|
8841
|
+
function extractCollatedResults(payload) {
|
|
8842
|
+
const root = payload;
|
|
8843
|
+
const edges = root?.data?.ad_library_main?.search_results_connection?.edges ?? [];
|
|
8844
|
+
const results = [];
|
|
8845
|
+
for (const edge of edges) {
|
|
8846
|
+
const node = edge?.node;
|
|
8847
|
+
for (const raw of node?.collated_results ?? []) {
|
|
8848
|
+
const r = raw;
|
|
8849
|
+
const id = r.ad_archive_id;
|
|
8850
|
+
if (id === void 0 || id === null) continue;
|
|
8851
|
+
const snapshot = r.snapshot ?? null;
|
|
8852
|
+
results.push({
|
|
8853
|
+
ad_archive_id: String(id),
|
|
8854
|
+
page_id: r.page_id != null ? String(r.page_id) : "",
|
|
8855
|
+
page_name: r.page_name ?? snapshot?.page_name ?? "",
|
|
8856
|
+
is_active: Boolean(r.is_active),
|
|
8857
|
+
collation_count: typeof r.collation_count === "number" ? r.collation_count : null,
|
|
8858
|
+
snapshot
|
|
8859
|
+
});
|
|
8860
|
+
}
|
|
8861
|
+
}
|
|
8862
|
+
return results;
|
|
8863
|
+
}
|
|
8864
|
+
async function collectAdLibraryResults(page, url, maxResults, opts = {}) {
|
|
8865
|
+
const captureMs = opts.captureMs ?? 3e4;
|
|
8866
|
+
const collected = [];
|
|
8867
|
+
const seen = /* @__PURE__ */ new Set();
|
|
8868
|
+
const handler = (resp) => {
|
|
8869
|
+
if (!resp.url().includes("/api/graphql")) return;
|
|
8870
|
+
const friendlyName = (resp.request().postData() ?? "").match(/fb_api_req_friendly_name=([^&]+)/)?.[1];
|
|
8871
|
+
if (friendlyName !== AD_LIBRARY_QUERY) return;
|
|
8872
|
+
void resp.text().then((text) => {
|
|
8873
|
+
for (const payload of parseFbGraphqlJson(text)) {
|
|
8874
|
+
for (const result of extractCollatedResults(payload)) {
|
|
8875
|
+
if (seen.has(result.ad_archive_id)) continue;
|
|
8876
|
+
seen.add(result.ad_archive_id);
|
|
8877
|
+
collected.push(result);
|
|
8878
|
+
}
|
|
8879
|
+
}
|
|
8880
|
+
}).catch(() => void 0);
|
|
8881
|
+
};
|
|
8882
|
+
page.on("response", handler);
|
|
8883
|
+
try {
|
|
8884
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45e3 });
|
|
8885
|
+
const deadline = Date.now() + captureMs;
|
|
8886
|
+
let lastCount = -1;
|
|
8887
|
+
let stableRounds = 0;
|
|
8888
|
+
while (Date.now() < deadline && collected.length < maxResults) {
|
|
8889
|
+
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)).catch(() => void 0);
|
|
8890
|
+
await page.waitForTimeout(2e3);
|
|
8891
|
+
if (collected.length === lastCount) {
|
|
8892
|
+
stableRounds++;
|
|
8893
|
+
if (stableRounds >= 2 && collected.length > 0) break;
|
|
8894
|
+
} else {
|
|
8895
|
+
stableRounds = 0;
|
|
8896
|
+
}
|
|
8897
|
+
lastCount = collected.length;
|
|
8898
|
+
}
|
|
8899
|
+
} finally {
|
|
8900
|
+
page.off("response", handler);
|
|
8901
|
+
}
|
|
8902
|
+
return collected.slice(0, maxResults);
|
|
8903
|
+
}
|
|
8904
|
+
function advertisersFromResults(results, maxResults) {
|
|
8905
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
8906
|
+
for (const r of results) {
|
|
8907
|
+
if (!r.page_id || !r.page_name) continue;
|
|
8908
|
+
const collation = typeof r.collation_count === "number" && r.collation_count > 0 ? r.collation_count : 0;
|
|
8909
|
+
const existing = byPage.get(r.page_id);
|
|
8910
|
+
if (existing) {
|
|
8911
|
+
existing.resultCount++;
|
|
8912
|
+
existing.maxCollation = Math.max(existing.maxCollation, collation);
|
|
8913
|
+
} else {
|
|
8914
|
+
byPage.set(r.page_id, { pageName: r.page_name, pageId: r.page_id, sampleLibraryId: r.ad_archive_id, maxCollation: collation, resultCount: 1 });
|
|
8915
|
+
}
|
|
8916
|
+
}
|
|
8917
|
+
return [...byPage.values()].map((e) => ({ pageName: e.pageName, pageId: e.pageId, sampleLibraryId: e.sampleLibraryId, adCount: Math.max(e.maxCollation, e.resultCount) })).sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
8918
|
+
}
|
|
8919
|
+
|
|
8770
8920
|
// src/api/facebook-ad-routes.ts
|
|
8771
8921
|
import { fal as fal2 } from "@fal-ai/client";
|
|
8772
8922
|
var FacebookAdBodySchema = z13.object({
|
|
@@ -8811,6 +8961,22 @@ function buildPageIntelUrl(body, country) {
|
|
|
8811
8961
|
function kernelLaunchOpts() {
|
|
8812
8962
|
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: process.env.KERNEL_PROXY_ID?.trim(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
8813
8963
|
}
|
|
8964
|
+
async function kernelLaunchOptsResidential() {
|
|
8965
|
+
let proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
8966
|
+
try {
|
|
8967
|
+
const resolution = await resolveKernelProxyId({
|
|
8968
|
+
kernelApiKey: process.env.KERNEL_API_KEY?.trim(),
|
|
8969
|
+
proxyMode: "location",
|
|
8970
|
+
configuredKernelProxyId: process.env.KERNEL_PROXY_ID?.trim(),
|
|
8971
|
+
location: "New York, NY",
|
|
8972
|
+
gl: "us"
|
|
8973
|
+
});
|
|
8974
|
+
if (resolution.kernelProxyId) proxyId = resolution.kernelProxyId;
|
|
8975
|
+
} catch {
|
|
8976
|
+
proxyId = process.env.KERNEL_PROXY_ID?.trim();
|
|
8977
|
+
}
|
|
8978
|
+
return { headless: true, kernelApiKey: process.env.KERNEL_API_KEY?.trim(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
8979
|
+
}
|
|
8814
8980
|
var facebookAdApp = new Hono4();
|
|
8815
8981
|
facebookAdApp.post("/ad", createApiKeyAuth(), async (c) => {
|
|
8816
8982
|
const raw = await c.req.json().catch(() => ({}));
|
|
@@ -8867,7 +9033,7 @@ facebookAdApp.post("/page-intel", createApiKeyAuth(), async (c) => {
|
|
|
8867
9033
|
const driver = new BrowserDriver();
|
|
8868
9034
|
let refunded = false;
|
|
8869
9035
|
try {
|
|
8870
|
-
await driver.launch(
|
|
9036
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
8871
9037
|
await driver.navigateTo(listingUrl);
|
|
8872
9038
|
const extractor = new FacebookAdExtractor(driver);
|
|
8873
9039
|
const result = await extractor.extractPageIntel(listingUrl, maxAds);
|
|
@@ -8951,18 +9117,15 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
8951
9117
|
const driver = new BrowserDriver();
|
|
8952
9118
|
let searchRefunded = false;
|
|
8953
9119
|
try {
|
|
8954
|
-
await driver.launch(
|
|
9120
|
+
await driver.launch(await kernelLaunchOptsResidential());
|
|
8955
9121
|
const page = driver.getPage();
|
|
8956
|
-
await
|
|
8957
|
-
|
|
8958
|
-
|
|
8959
|
-
|
|
8960
|
-
|
|
8961
|
-
|
|
8962
|
-
|
|
8963
|
-
{ timeout: 2e4, polling: 500 }
|
|
8964
|
-
);
|
|
8965
|
-
} catch {
|
|
9122
|
+
const collated = await collectAdLibraryResults(page, searchUrl, Math.max(maxResults * 4, 40));
|
|
9123
|
+
const gqlAdvertisers = advertisersFromResults(collated, maxResults);
|
|
9124
|
+
if (gqlAdvertisers.length > 0) {
|
|
9125
|
+
const results2 = gqlAdvertisers.map((a) => ({ name: a.pageName, pageName: a.pageName, pageId: a.pageId, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
9126
|
+
const searchResult2 = { query: body.query.trim(), searchUrl, results: results2, via: "graphql" };
|
|
9127
|
+
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results2.length, result: searchResult2 });
|
|
9128
|
+
return c.json(searchResult2);
|
|
8966
9129
|
}
|
|
8967
9130
|
await page.waitForTimeout(1500);
|
|
8968
9131
|
for (let scroll = 0; scroll < 3; scroll++) {
|
|
@@ -9008,7 +9171,7 @@ facebookAdApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
9008
9171
|
advertiserMap.set(pageName, { pageName, sampleLibraryId: libraryId, adCount: 1 });
|
|
9009
9172
|
}
|
|
9010
9173
|
}
|
|
9011
|
-
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults);
|
|
9174
|
+
const results = [...advertiserMap.values()].sort((a, b) => b.adCount - a.adCount).slice(0, maxResults).map((a) => ({ name: a.pageName, pageName: a.pageName, libraryId: a.sampleLibraryId, sampleLibraryId: a.sampleLibraryId, adCount: a.adCount }));
|
|
9012
9175
|
const searchResult = { query: body.query.trim(), searchUrl, results };
|
|
9013
9176
|
await logRequestEvent({ userId: fbUser.id, source: "facebook_search", status: "done", query: body.query.trim(), resultCount: results.length, result: searchResult });
|
|
9014
9177
|
return c.json(searchResult);
|
|
@@ -11029,7 +11192,11 @@ app.delete("/api-key", requireAllowedOrigin, sessionAuth, async (c) => {
|
|
|
11029
11192
|
var BYPASS_EMAILS = new Set(
|
|
11030
11193
|
(process.env.HARVEST_LIMIT_BYPASS_EMAILS ?? "").split(",").map((e) => e.trim()).filter(Boolean)
|
|
11031
11194
|
);
|
|
11032
|
-
var
|
|
11195
|
+
var SYNC_HARVEST_TIMEOUT_OVERRIDE_MS = (() => {
|
|
11196
|
+
const raw = process.env.SYNC_HARVEST_TIMEOUT_MS;
|
|
11197
|
+
const parsed = raw === void 0 ? NaN : Number(raw);
|
|
11198
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
|
11199
|
+
})();
|
|
11033
11200
|
function combineAbortSignals(signals) {
|
|
11034
11201
|
const controller = new AbortController();
|
|
11035
11202
|
const abortFrom = (signal) => {
|
|
@@ -11129,9 +11296,10 @@ app.post("/harvest/sync", auth, async (c) => {
|
|
|
11129
11296
|
if (!syncOk) return c.json(insufficientBalanceResponse(syncBal, syncCost), 402);
|
|
11130
11297
|
const jobId = await createRunningJob(user.id, options.query, options);
|
|
11131
11298
|
const recordAttempt = createHarvestAttemptRecorder(jobId, user.id);
|
|
11299
|
+
const syncTimeoutMs = SYNC_HARVEST_TIMEOUT_OVERRIDE_MS ?? harvestTimeoutBudget(options.maxQuestions, options.serpOnly).serverMs;
|
|
11132
11300
|
const syncSignal = combineAbortSignals([
|
|
11133
11301
|
c.req.raw.signal,
|
|
11134
|
-
AbortSignal.timeout(
|
|
11302
|
+
AbortSignal.timeout(syncTimeoutMs)
|
|
11135
11303
|
]);
|
|
11136
11304
|
try {
|
|
11137
11305
|
const result = await harvest({
|
|
@@ -11279,13 +11447,7 @@ app.post("/extract-url", auth, async (c) => {
|
|
|
11279
11447
|
const brandingData = pageData?.branding ?? null;
|
|
11280
11448
|
let screenshotMeta = null;
|
|
11281
11449
|
if (screenshotBuf) {
|
|
11282
|
-
|
|
11283
|
-
mkdirSync2(outDir, { recursive: true });
|
|
11284
|
-
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
11285
|
-
const slug = canonicalUrl.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
11286
|
-
const filePath = join4(outDir, `${stamp}-${slug}.png`);
|
|
11287
|
-
writeFileSync(filePath, screenshotBuf);
|
|
11288
|
-
screenshotMeta = { savedPath: filePath, sizeBytes: screenshotBuf.length, device };
|
|
11450
|
+
screenshotMeta = { base64: screenshotBuf.toString("base64"), sizeBytes: screenshotBuf.length, device };
|
|
11289
11451
|
}
|
|
11290
11452
|
const mediaMeta = downloadMedia ? await harvestPageMedia(result.bodyHtml, canonicalUrl, { types: mediaTypes ?? ["image", "video", "audio"] }) : null;
|
|
11291
11453
|
await logRequestEvent({ userId: user.id, source: "extract_url", status: "done", query: canonicalUrl, resultCount: result.headings.length, result });
|
|
@@ -11500,7 +11662,7 @@ app.get("/cron/tick", async (c) => {
|
|
|
11500
11662
|
if (!process.env.CRON_SECRET || secret2 !== `Bearer ${process.env.CRON_SECRET}`) {
|
|
11501
11663
|
return c.json({ error: "Unauthorized" }, 401);
|
|
11502
11664
|
}
|
|
11503
|
-
const { drainQueue } = await import("./worker-
|
|
11665
|
+
const { drainQueue } = await import("./worker-UT4ZQU2T.js");
|
|
11504
11666
|
const budget = { maxJobs: 10, deadlineMs: Date.now() + 28e4 };
|
|
11505
11667
|
const [results, sweepResult] = await Promise.all([
|
|
11506
11668
|
drainQueue(budget),
|
|
@@ -11622,4 +11784,4 @@ app.get("/blog/:slug/", (c) => {
|
|
|
11622
11784
|
export {
|
|
11623
11785
|
app
|
|
11624
11786
|
};
|
|
11625
|
-
//# sourceMappingURL=server-
|
|
11787
|
+
//# sourceMappingURL=server-V5XMVRYE.js.map
|