mcp-scraper 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +136 -46
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +1 -1
- package/dist/bin/mcp-stdio-server.cjs +51 -18
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/{chunk-DZY3XO3M.js → chunk-6TWZS2FQ.js} +54 -20
- package/dist/chunk-6TWZS2FQ.js.map +1 -0
- package/dist/{server-KUF3QJC7.js → server-2Y27U4TO.js} +78 -30
- package/dist/server-2Y27U4TO.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-DZY3XO3M.js.map +0 -1
- package/dist/server-KUF3QJC7.js.map +0 -1
package/dist/bin/api-server.cjs
CHANGED
|
@@ -30,6 +30,26 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
30
|
mod
|
|
31
31
|
));
|
|
32
32
|
|
|
33
|
+
// src/harvest-timeout.ts
|
|
34
|
+
function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
|
|
35
|
+
const requested = Number.isFinite(maxQuestions) && maxQuestions > 0 ? Math.trunc(maxQuestions) : 30;
|
|
36
|
+
let serverMs;
|
|
37
|
+
if (serpOnly || requested <= 50) serverMs = 11e4;
|
|
38
|
+
else if (requested <= 100) serverMs = 18e4;
|
|
39
|
+
else if (requested <= 150) serverMs = 24e4;
|
|
40
|
+
else serverMs = 28e4;
|
|
41
|
+
const clientMs = Math.min(serverMs + CLIENT_OVER_SERVER_MARGIN_MS, VERCEL_FUNCTION_MAX_MS - 5e3);
|
|
42
|
+
return { serverMs, clientMs };
|
|
43
|
+
}
|
|
44
|
+
var VERCEL_FUNCTION_MAX_MS, CLIENT_OVER_SERVER_MARGIN_MS;
|
|
45
|
+
var init_harvest_timeout = __esm({
|
|
46
|
+
"src/harvest-timeout.ts"() {
|
|
47
|
+
"use strict";
|
|
48
|
+
VERCEL_FUNCTION_MAX_MS = 3e5;
|
|
49
|
+
CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
33
53
|
// src/blog/registry.ts
|
|
34
54
|
var posts;
|
|
35
55
|
var init_registry = __esm({
|
|
@@ -3825,25 +3845,73 @@ function firstFont(fontFamily) {
|
|
|
3825
3845
|
const first = fontFamily.split(",")[0].trim().replace(/['"]/g, "");
|
|
3826
3846
|
return first || null;
|
|
3827
3847
|
}
|
|
3848
|
+
function dominantColor(freq) {
|
|
3849
|
+
return Object.entries(freq).filter(([hex]) => !isTransparentOrWhite(hex) && hex !== "#000000" && hex !== "#020101").sort((a, b) => b[1] - a[1])[0]?.[0] ?? null;
|
|
3850
|
+
}
|
|
3828
3851
|
async function extractBrandingFromPage(page) {
|
|
3829
3852
|
const evalScript = `
|
|
3830
3853
|
(function() {
|
|
3831
3854
|
function cs(el) { return el ? window.getComputedStyle(el) : null; }
|
|
3855
|
+
function toHex(rgb) {
|
|
3856
|
+
var m = rgb && rgb.match(/rgba?\\((\\d+),\\s*(\\d+),\\s*(\\d+)/);
|
|
3857
|
+
if (!m) return null;
|
|
3858
|
+
return '#' + [m[1],m[2],m[3]].map(function(v){ return ('0'+parseInt(v).toString(16)).slice(-2); }).join('');
|
|
3859
|
+
}
|
|
3860
|
+
function isUsable(hex) {
|
|
3861
|
+
if (!hex) return false;
|
|
3862
|
+
if (hex === '#000000' || hex === '#020101' || hex === '#ffffff' || hex === '#fffffe') return false;
|
|
3863
|
+
var r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
|
|
3864
|
+
return (0.2126*r + 0.7152*g + 0.0722*b) <= 230;
|
|
3865
|
+
}
|
|
3866
|
+
|
|
3832
3867
|
var navEl = document.querySelector('nav, header, [role="banner"]');
|
|
3833
3868
|
var bodyEl = document.body;
|
|
3834
3869
|
var h1El = document.querySelector('h1');
|
|
3835
3870
|
var btnEl = document.querySelector(
|
|
3836
3871
|
'a.btn-primary, button.btn-primary, .btn-primary, .cta-btn,' +
|
|
3837
3872
|
'a.button--primary, button.button--primary, [class*="btn-cta"],' +
|
|
3838
|
-
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a'
|
|
3873
|
+
'[class*="cta-button"], .wp-block-button__link, [class*="hero"] a,' +
|
|
3874
|
+
'.elementor-button, .elementor-button-link,' +
|
|
3875
|
+
'.et_pb_button,' +
|
|
3876
|
+
'.fl-button,' +
|
|
3877
|
+
'.vc_btn,' +
|
|
3878
|
+
'[class*="cta"][href], [class*="get-started"], [class*="contact-btn"]'
|
|
3839
3879
|
);
|
|
3840
3880
|
var navStyle = cs(navEl);
|
|
3841
3881
|
var bodyStyle = cs(bodyEl);
|
|
3842
3882
|
var h1Style = cs(h1El);
|
|
3843
3883
|
var btnStyle = cs(btnEl);
|
|
3844
|
-
|
|
3884
|
+
|
|
3885
|
+
var svgFreq = {};
|
|
3886
|
+
var svgScope = navEl || document.querySelector('header, [role="banner"]');
|
|
3887
|
+
if (svgScope) {
|
|
3888
|
+
var svgEls = svgScope.querySelectorAll('svg *, [fill], path, circle, rect, polygon, polyline');
|
|
3889
|
+
for (var si = 0; si < svgEls.length; si++) {
|
|
3890
|
+
var svgEl = svgEls[si];
|
|
3891
|
+
var fillComp = cs(svgEl) ? cs(svgEl).fill : null;
|
|
3892
|
+
var fillAttr = svgEl.getAttribute('fill');
|
|
3893
|
+
var fillHex = null;
|
|
3894
|
+
if (fillComp && fillComp !== 'none') { fillHex = toHex(fillComp); }
|
|
3895
|
+
else if (fillAttr && fillAttr !== 'none' && fillAttr.startsWith('#')) { fillHex = fillAttr; }
|
|
3896
|
+
if (fillHex && isUsable(fillHex)) { svgFreq[fillHex] = (svgFreq[fillHex] || 0) + 1; }
|
|
3897
|
+
}
|
|
3898
|
+
}
|
|
3899
|
+
|
|
3900
|
+
var navChildBgFreq = {};
|
|
3901
|
+
if (navEl) {
|
|
3902
|
+
var navChildren = navEl.querySelectorAll('li, a, button, [class*="menu-item"]');
|
|
3903
|
+
for (var ni = 0; ni < navChildren.length; ni++) {
|
|
3904
|
+
var nbg = cs(navChildren[ni]);
|
|
3905
|
+
if (nbg) {
|
|
3906
|
+
var bghex = toHex(nbg.backgroundColor);
|
|
3907
|
+
if (bghex && isUsable(bghex)) { navChildBgFreq[bghex] = (navChildBgFreq[bghex] || 0) + 1; }
|
|
3908
|
+
}
|
|
3909
|
+
}
|
|
3910
|
+
}
|
|
3911
|
+
|
|
3912
|
+
var pageHost = window.location.hostname.replace(/^www\\./, '');
|
|
3845
3913
|
function isSameDomain(src) {
|
|
3846
|
-
try { return new URL(src).hostname.replace(/^www
|
|
3914
|
+
try { return new URL(src).hostname.replace(/^www\\./, '').endsWith(pageHost); } catch { return false; }
|
|
3847
3915
|
}
|
|
3848
3916
|
var logoSelectors = [
|
|
3849
3917
|
'header img[class*="logo"]', 'nav img[class*="logo"]',
|
|
@@ -3866,22 +3934,27 @@ async function extractBrandingFromPage(page) {
|
|
|
3866
3934
|
'link[rel~="icon"], link[rel="shortcut icon"], link[rel="apple-touch-icon"]'
|
|
3867
3935
|
);
|
|
3868
3936
|
return {
|
|
3869
|
-
navBg:
|
|
3870
|
-
bodyBg:
|
|
3871
|
-
bodyColor:
|
|
3872
|
-
h1Color:
|
|
3873
|
-
btnBg:
|
|
3874
|
-
bodyFont:
|
|
3875
|
-
h1Font:
|
|
3876
|
-
logoSrc:
|
|
3877
|
-
faviconHref:
|
|
3937
|
+
navBg: navStyle ? navStyle.backgroundColor : null,
|
|
3938
|
+
bodyBg: bodyStyle ? bodyStyle.backgroundColor : null,
|
|
3939
|
+
bodyColor: bodyStyle ? bodyStyle.color : null,
|
|
3940
|
+
h1Color: h1Style ? h1Style.color : null,
|
|
3941
|
+
btnBg: btnStyle ? btnStyle.backgroundColor : null,
|
|
3942
|
+
bodyFont: bodyStyle ? bodyStyle.fontFamily : null,
|
|
3943
|
+
h1Font: h1Style ? h1Style.fontFamily : null,
|
|
3944
|
+
logoSrc: logoSrc,
|
|
3945
|
+
faviconHref: faviconEl ? faviconEl.href : null,
|
|
3946
|
+
svgFreq: svgFreq,
|
|
3947
|
+
navChildBgFreq: navChildBgFreq,
|
|
3878
3948
|
};
|
|
3879
3949
|
})()
|
|
3880
3950
|
`;
|
|
3881
3951
|
const raw = await page.evaluate(evalScript);
|
|
3882
3952
|
const navBgHex = rgbToHex(raw.navBg ?? "");
|
|
3883
3953
|
const bodyBgHex = rgbToHex(raw.bodyBg ?? "");
|
|
3884
|
-
const
|
|
3954
|
+
const navBgUsable = navBgHex && !isTransparentOrWhite(navBgHex) && navBgHex !== "#000000" && navBgHex !== "#020101" ? navBgHex : null;
|
|
3955
|
+
const svgPrimary = dominantColor(raw.svgFreq ?? {});
|
|
3956
|
+
const navChildBg = dominantColor(raw.navChildBgFreq ?? {});
|
|
3957
|
+
const primary = navBgUsable ?? svgPrimary ?? navChildBg ?? bodyBgHex;
|
|
3885
3958
|
const accent = rgbToHex(raw.btnBg ?? "");
|
|
3886
3959
|
const text = rgbToHex(raw.bodyColor ?? "");
|
|
3887
3960
|
const heading = rgbToHex(raw.h1Color ?? "");
|
|
@@ -14888,7 +14961,7 @@ var init_mcp_tool_schemas = __esm({
|
|
|
14888
14961
|
HarvestPaaInputSchema = {
|
|
14889
14962
|
query: import_zod19.z.string().min(1).describe('Core search topic only. If the user says "best hvac company in Denver CO", use query="best hvac company" and location="Denver, CO". Do not include the location in query when it can be separated.'),
|
|
14890
14963
|
location: import_zod19.z.string().optional().describe('City, region, or country for geo-targeted results, inferred from the user request when present, e.g. "Denver, CO", "Tokyo, Japan", "London, UK".'),
|
|
14891
|
-
maxQuestions: import_zod19.z.number().int().min(1).max(
|
|
14964
|
+
maxQuestions: import_zod19.z.number().int().min(1).max(200).default(30).describe("Number of PAA questions to extract. Default 30. Maximum 200. Use 10 for quick probes, 30 for normal research, 100-200 when the user asks for everything/full/deep research. Larger harvests get a longer server time budget (151-200 questions \u2192 up to 280s). Credits are charged by extracted question; unused request hold is refunded."),
|
|
14892
14965
|
gl: import_zod19.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
|
|
14893
14966
|
hl: import_zod19.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
|
|
14894
14967
|
device: import_zod19.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
@@ -15000,9 +15073,12 @@ function reportTitle(full) {
|
|
|
15000
15073
|
const title = full.split("\n").find((line) => line.startsWith("# "));
|
|
15001
15074
|
return title?.replace(/^#\s+/, "").trim() || "MCP Scraper Report";
|
|
15002
15075
|
}
|
|
15076
|
+
function outputBaseDir() {
|
|
15077
|
+
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
|
|
15078
|
+
}
|
|
15003
15079
|
function saveFullReport(full) {
|
|
15004
15080
|
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15005
|
-
const outDir =
|
|
15081
|
+
const outDir = outputBaseDir();
|
|
15006
15082
|
try {
|
|
15007
15083
|
(0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
|
|
15008
15084
|
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
@@ -15013,6 +15089,20 @@ function saveFullReport(full) {
|
|
|
15013
15089
|
return null;
|
|
15014
15090
|
}
|
|
15015
15091
|
}
|
|
15092
|
+
function persistScreenshotLocally(base64, url) {
|
|
15093
|
+
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15094
|
+
try {
|
|
15095
|
+
const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
|
|
15096
|
+
(0, import_node_fs4.mkdirSync)(dir, { recursive: true });
|
|
15097
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
15098
|
+
const slug = url.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
15099
|
+
const filePath = (0, import_node_path6.join)(dir, `${stamp}-${slug}.png`);
|
|
15100
|
+
(0, import_node_fs4.writeFileSync)(filePath, Buffer.from(base64, "base64"));
|
|
15101
|
+
return filePath;
|
|
15102
|
+
} catch {
|
|
15103
|
+
return null;
|
|
15104
|
+
}
|
|
15105
|
+
}
|
|
15016
15106
|
function oneBlock(content) {
|
|
15017
15107
|
const filePath = saveFullReport(content);
|
|
15018
15108
|
const text = filePath ? `${content}
|
|
@@ -15233,6 +15323,7 @@ function formatExtractUrl(raw, input) {
|
|
|
15233
15323
|
const bodyMd = d.bodyMarkdown ?? "";
|
|
15234
15324
|
const schema = d.schema;
|
|
15235
15325
|
const screenshotMeta = d.screenshot;
|
|
15326
|
+
const screenshotPath = screenshotMeta?.base64 ? persistScreenshotLocally(screenshotMeta.base64, url) : null;
|
|
15236
15327
|
const branding = d.branding;
|
|
15237
15328
|
const media = d.media;
|
|
15238
15329
|
const h1Lines = headings.filter((h) => h.level === 1).map((h) => `- ${h.text}`).join("\n");
|
|
@@ -15259,7 +15350,7 @@ ${[h1Lines, h2Lines].filter(Boolean).join("\n")}` : "";
|
|
|
15259
15350
|
${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
15260
15351
|
const screenshotSection = screenshotMeta ? `
|
|
15261
15352
|
## Screenshot
|
|
15262
|
-
- **File:** ${
|
|
15353
|
+
- **File:** ${screenshotPath ?? "(returned inline only \u2014 disk write unavailable in this environment)"}
|
|
15263
15354
|
- **Size:** ${(screenshotMeta.sizeBytes / 1024).toFixed(1)} KB
|
|
15264
15355
|
- **Device:** ${screenshotMeta.device}` : "";
|
|
15265
15356
|
const brandingSection = branding ? [
|
|
@@ -15288,17 +15379,13 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
15288
15379
|
**${title}**
|
|
15289
15380
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
15290
15381
|
const textResult = oneBlock(full);
|
|
15291
|
-
if (screenshotMeta?.
|
|
15292
|
-
|
|
15293
|
-
|
|
15294
|
-
|
|
15295
|
-
|
|
15296
|
-
|
|
15297
|
-
|
|
15298
|
-
]
|
|
15299
|
-
};
|
|
15300
|
-
} catch {
|
|
15301
|
-
}
|
|
15382
|
+
if (screenshotMeta?.base64) {
|
|
15383
|
+
return {
|
|
15384
|
+
content: [
|
|
15385
|
+
...textResult.content,
|
|
15386
|
+
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
15387
|
+
]
|
|
15388
|
+
};
|
|
15302
15389
|
}
|
|
15303
15390
|
return textResult;
|
|
15304
15391
|
}
|
|
@@ -15800,16 +15887,20 @@ var HttpMcpToolExecutor;
|
|
|
15800
15887
|
var init_http_mcp_tool_executor = __esm({
|
|
15801
15888
|
"src/mcp/http-mcp-tool-executor.ts"() {
|
|
15802
15889
|
"use strict";
|
|
15890
|
+
init_harvest_timeout();
|
|
15803
15891
|
HttpMcpToolExecutor = class {
|
|
15804
15892
|
baseUrl;
|
|
15805
15893
|
apiKey;
|
|
15806
15894
|
timeoutMs;
|
|
15895
|
+
httpTimeoutOverrideMs;
|
|
15807
15896
|
serpIntelligenceTimeoutMs;
|
|
15808
15897
|
constructor(baseUrl, apiKey) {
|
|
15809
15898
|
this.baseUrl = baseUrl.replace(/\/$/, "");
|
|
15810
15899
|
this.apiKey = apiKey;
|
|
15811
|
-
const
|
|
15812
|
-
|
|
15900
|
+
const rawOverride = process.env.MCP_SCRAPER_HTTP_TIMEOUT_MS;
|
|
15901
|
+
const parsedOverride = rawOverride === void 0 ? NaN : Number(rawOverride);
|
|
15902
|
+
this.httpTimeoutOverrideMs = Number.isFinite(parsedOverride) && parsedOverride > 0 ? parsedOverride : null;
|
|
15903
|
+
this.timeoutMs = this.httpTimeoutOverrideMs ?? 11e4;
|
|
15813
15904
|
const configuredSerpIntelligenceTimeoutMs = Number(process.env.MCP_SCRAPER_SERP_INTELLIGENCE_HTTP_TIMEOUT_MS ?? this.timeoutMs);
|
|
15814
15905
|
this.serpIntelligenceTimeoutMs = Number.isFinite(configuredSerpIntelligenceTimeoutMs) && configuredSerpIntelligenceTimeoutMs > 0 ? configuredSerpIntelligenceTimeoutMs : this.timeoutMs;
|
|
15815
15906
|
}
|
|
@@ -15851,10 +15942,12 @@ var init_http_mcp_tool_executor = __esm({
|
|
|
15851
15942
|
}
|
|
15852
15943
|
}
|
|
15853
15944
|
harvestPaa(input) {
|
|
15854
|
-
|
|
15945
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(input.maxQuestions ?? 30).clientMs;
|
|
15946
|
+
return this.call("/harvest/sync", input, timeoutMs);
|
|
15855
15947
|
}
|
|
15856
15948
|
searchSerp(input) {
|
|
15857
|
-
|
|
15949
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(0, true).clientMs;
|
|
15950
|
+
return this.call("/harvest/sync", { ...input, serpOnly: true }, timeoutMs);
|
|
15858
15951
|
}
|
|
15859
15952
|
extractUrl(input) {
|
|
15860
15953
|
return this.call("/extract-url", input);
|
|
@@ -16501,18 +16594,16 @@ async function checkHarvestLimits(userId, email, extraSlots = 0) {
|
|
|
16501
16594
|
if (active >= limit) return { error: `You have ${active} job${active !== 1 ? "s" : ""} running. Your account allows ${limit} concurrent job${limit !== 1 ? "s" : ""}. Wait for one to finish or add a concurrency slot at mcpscraper.dev/billing.` };
|
|
16502
16595
|
return null;
|
|
16503
16596
|
}
|
|
16504
|
-
var import_resend,
|
|
16597
|
+
var import_resend, import_hono9, import_hono10, import_factory6, import_cookie, import_stripe2, secureCookies, isProduction2, sessionCookieOptions, requireAllowedOrigin, auth, adminAuth, sessionAuth, app, STRIPE_API_VERSION, BYPASS_EMAILS, SYNC_HARVEST_TIMEOUT_OVERRIDE_MS;
|
|
16505
16598
|
var init_server = __esm({
|
|
16506
16599
|
"src/api/server.ts"() {
|
|
16507
16600
|
"use strict";
|
|
16601
|
+
init_harvest_timeout();
|
|
16508
16602
|
init_registry();
|
|
16509
16603
|
init_template();
|
|
16510
16604
|
init_og();
|
|
16511
16605
|
import_resend = require("resend");
|
|
16512
16606
|
init_url_utils();
|
|
16513
|
-
import_node_fs5 = require("fs");
|
|
16514
|
-
import_node_os4 = require("os");
|
|
16515
|
-
import_node_path7 = require("path");
|
|
16516
16607
|
init_kpo_extractor();
|
|
16517
16608
|
init_screenshot();
|
|
16518
16609
|
init_media_extractor();
|
|
@@ -16742,7 +16833,11 @@ var init_server = __esm({
|
|
|
16742
16833
|
BYPASS_EMAILS = new Set(
|
|
16743
16834
|
(process.env.HARVEST_LIMIT_BYPASS_EMAILS ?? "").split(",").map((e) => e.trim()).filter(Boolean)
|
|
16744
16835
|
);
|
|
16745
|
-
|
|
16836
|
+
SYNC_HARVEST_TIMEOUT_OVERRIDE_MS = (() => {
|
|
16837
|
+
const raw = process.env.SYNC_HARVEST_TIMEOUT_MS;
|
|
16838
|
+
const parsed = raw === void 0 ? NaN : Number(raw);
|
|
16839
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
|
16840
|
+
})();
|
|
16746
16841
|
app.post("/harvest", auth, async (c) => {
|
|
16747
16842
|
const user = c.get("user");
|
|
16748
16843
|
const raw = await c.req.json().catch(() => ({}));
|
|
@@ -16812,9 +16907,10 @@ var init_server = __esm({
|
|
|
16812
16907
|
if (!syncOk) return c.json(insufficientBalanceResponse(syncBal, syncCost), 402);
|
|
16813
16908
|
const jobId = await createRunningJob(user.id, options.query, options);
|
|
16814
16909
|
const recordAttempt = createHarvestAttemptRecorder(jobId, user.id);
|
|
16910
|
+
const syncTimeoutMs = SYNC_HARVEST_TIMEOUT_OVERRIDE_MS ?? harvestTimeoutBudget(options.maxQuestions, options.serpOnly).serverMs;
|
|
16815
16911
|
const syncSignal = combineAbortSignals([
|
|
16816
16912
|
c.req.raw.signal,
|
|
16817
|
-
AbortSignal.timeout(
|
|
16913
|
+
AbortSignal.timeout(syncTimeoutMs)
|
|
16818
16914
|
]);
|
|
16819
16915
|
try {
|
|
16820
16916
|
const result = await harvest({
|
|
@@ -16962,13 +17058,7 @@ var init_server = __esm({
|
|
|
16962
17058
|
const brandingData = pageData?.branding ?? null;
|
|
16963
17059
|
let screenshotMeta = null;
|
|
16964
17060
|
if (screenshotBuf) {
|
|
16965
|
-
|
|
16966
|
-
(0, import_node_fs5.mkdirSync)(outDir, { recursive: true });
|
|
16967
|
-
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
16968
|
-
const slug = canonicalUrl.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
16969
|
-
const filePath = (0, import_node_path7.join)(outDir, `${stamp}-${slug}.png`);
|
|
16970
|
-
(0, import_node_fs5.writeFileSync)(filePath, screenshotBuf);
|
|
16971
|
-
screenshotMeta = { savedPath: filePath, sizeBytes: screenshotBuf.length, device };
|
|
17061
|
+
screenshotMeta = { base64: screenshotBuf.toString("base64"), sizeBytes: screenshotBuf.length, device };
|
|
16972
17062
|
}
|
|
16973
17063
|
const mediaMeta = downloadMedia ? await harvestPageMedia(result.bodyHtml, canonicalUrl, { types: mediaTypes ?? ["image", "video", "audio"] }) : null;
|
|
16974
17064
|
await logRequestEvent({ userId: user.id, source: "extract_url", status: "done", query: canonicalUrl, resultCount: result.headings.length, result });
|
|
@@ -17306,10 +17396,10 @@ var init_server = __esm({
|
|
|
17306
17396
|
});
|
|
17307
17397
|
|
|
17308
17398
|
// bin/api-server.ts
|
|
17309
|
-
var
|
|
17399
|
+
var import_node_fs5 = require("fs");
|
|
17310
17400
|
function loadDotEnv() {
|
|
17311
17401
|
try {
|
|
17312
|
-
for (const line of (0,
|
|
17402
|
+
for (const line of (0, import_node_fs5.readFileSync)(".env", "utf8").split("\n")) {
|
|
17313
17403
|
const eq = line.indexOf("=");
|
|
17314
17404
|
if (eq < 1 || line.trimStart().startsWith("#")) continue;
|
|
17315
17405
|
const k = line.slice(0, eq).trim();
|