mcp-scraper 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/bin/api-server.cjs +957 -243
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +540 -158
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +2 -1
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +36 -5
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +5 -3
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-6TWZS2FQ.js → chunk-RE6HCRYC.js} +543 -159
- package/dist/chunk-RE6HCRYC.js.map +1 -0
- package/dist/{chunk-W4P2U5VF.js → chunk-TM22BLWP.js} +46 -34
- package/dist/chunk-TM22BLWP.js.map +1 -0
- package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
- package/dist/chunk-ZK456YXN.js.map +1 -0
- package/dist/chunk-ZMOWIBMK.js +36 -0
- package/dist/chunk-ZMOWIBMK.js.map +1 -0
- package/dist/index.cjs +34 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/{server-2Y27U4TO.js → server-QXVVTKJP.js} +311 -48
- package/dist/server-QXVVTKJP.js.map +1 -0
- package/dist/{worker-UT4ZQU2T.js → worker-AUCXFHEL.js} +6 -4
- package/dist/worker-AUCXFHEL.js.map +1 -0
- package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
- package/docs/adr/README.md +11 -0
- package/docs/mcp-tool-quality-spec.md +238 -0
- package/package.json +5 -4
- package/dist/chunk-6TWZS2FQ.js.map +0 -1
- package/dist/chunk-7HB7NDOY.js.map +0 -1
- package/dist/chunk-W4P2U5VF.js.map +0 -1
- package/dist/server-2Y27U4TO.js.map +0 -1
- package/dist/worker-UT4ZQU2T.js.map +0 -1
package/dist/bin/api-server.cjs
CHANGED
|
@@ -50,6 +50,109 @@ var init_harvest_timeout = __esm({
|
|
|
50
50
|
}
|
|
51
51
|
});
|
|
52
52
|
|
|
53
|
+
// src/lib/browser-service-env.ts
|
|
54
|
+
function browserServiceApiKey() {
|
|
55
|
+
const value = (process.env.BROWSER_SERVICE_API_KEY ?? process.env.KERNEL_API_KEY)?.trim();
|
|
56
|
+
return value || void 0;
|
|
57
|
+
}
|
|
58
|
+
function browserServiceProxyId() {
|
|
59
|
+
const value = (process.env.BROWSER_SERVICE_PROXY_ID ?? process.env.KERNEL_PROXY_ID)?.trim();
|
|
60
|
+
return value || void 0;
|
|
61
|
+
}
|
|
62
|
+
var init_browser_service_env = __esm({
|
|
63
|
+
"src/lib/browser-service-env.ts"() {
|
|
64
|
+
"use strict";
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// src/errors.ts
|
|
69
|
+
function sanitizeVendorName(message) {
|
|
70
|
+
return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
|
|
71
|
+
}
|
|
72
|
+
var RECAPTCHA_INSTRUCTIONS, CaptchaError, ExtractionError, RequestAbortedError;
|
|
73
|
+
var init_errors = __esm({
|
|
74
|
+
"src/errors.ts"() {
|
|
75
|
+
"use strict";
|
|
76
|
+
RECAPTCHA_INSTRUCTIONS = "Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.";
|
|
77
|
+
CaptchaError = class extends Error {
|
|
78
|
+
constructor(instructions) {
|
|
79
|
+
super(`CAPTCHA detected. ${instructions}`);
|
|
80
|
+
this.instructions = instructions;
|
|
81
|
+
}
|
|
82
|
+
instructions;
|
|
83
|
+
name = "CaptchaError";
|
|
84
|
+
};
|
|
85
|
+
ExtractionError = class extends Error {
|
|
86
|
+
constructor(message, cause) {
|
|
87
|
+
super(message);
|
|
88
|
+
this.cause = cause;
|
|
89
|
+
}
|
|
90
|
+
cause;
|
|
91
|
+
name = "ExtractionError";
|
|
92
|
+
};
|
|
93
|
+
RequestAbortedError = class extends Error {
|
|
94
|
+
name = "RequestAbortedError";
|
|
95
|
+
constructor(message = "Request aborted before harvest completed") {
|
|
96
|
+
super(message);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// src/api/outbound-sanitize.ts
|
|
103
|
+
function sanitizeOutboundDiagnostics(value, parentKey = "") {
|
|
104
|
+
if (typeof value === "string") {
|
|
105
|
+
if (SANITIZED_VALUE_KEYS.test(parentKey) && /kernel/i.test(value)) {
|
|
106
|
+
return sanitizeVendorName(value);
|
|
107
|
+
}
|
|
108
|
+
return value;
|
|
109
|
+
}
|
|
110
|
+
if (Array.isArray(value)) return value.map((v) => sanitizeOutboundDiagnostics(v, parentKey));
|
|
111
|
+
if (value !== null && typeof value === "object") {
|
|
112
|
+
const out = {};
|
|
113
|
+
for (const [key, val] of Object.entries(value)) {
|
|
114
|
+
const renamed = KEY_RENAMES[key] ?? key;
|
|
115
|
+
out[renamed] = sanitizeOutboundDiagnostics(val, key);
|
|
116
|
+
}
|
|
117
|
+
return out;
|
|
118
|
+
}
|
|
119
|
+
return value;
|
|
120
|
+
}
|
|
121
|
+
function sanitizeAttempts(attempts) {
|
|
122
|
+
return attempts.map((a) => sanitizeOutboundDiagnostics(a));
|
|
123
|
+
}
|
|
124
|
+
function sanitizeHarvestResult(result) {
|
|
125
|
+
const diagnostics = result?.diagnostics;
|
|
126
|
+
if (!diagnostics?.debug) return result;
|
|
127
|
+
return {
|
|
128
|
+
...result,
|
|
129
|
+
diagnostics: {
|
|
130
|
+
...diagnostics,
|
|
131
|
+
debug: sanitizeOutboundDiagnostics(diagnostics.debug)
|
|
132
|
+
}
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
var KEY_RENAMES, SANITIZED_VALUE_KEYS;
|
|
136
|
+
var init_outbound_sanitize = __esm({
|
|
137
|
+
"src/api/outbound-sanitize.ts"() {
|
|
138
|
+
"use strict";
|
|
139
|
+
init_errors();
|
|
140
|
+
KEY_RENAMES = {
|
|
141
|
+
kernel: "browserRuntime",
|
|
142
|
+
kernel_session_id: "browser_session_id",
|
|
143
|
+
kernel_delete_started: "session_cleanup_started",
|
|
144
|
+
kernel_delete_succeeded: "session_cleanup_succeeded",
|
|
145
|
+
kernel_delete_error: "session_cleanup_error",
|
|
146
|
+
kernelSessionId: "browserSessionId",
|
|
147
|
+
kernelDeleteStarted: "sessionCleanupStarted",
|
|
148
|
+
kernelDeleteSucceeded: "sessionCleanupSucceeded",
|
|
149
|
+
kernelDeleteError: "sessionCleanupError",
|
|
150
|
+
kernelProxyId: "proxyId"
|
|
151
|
+
};
|
|
152
|
+
SANITIZED_VALUE_KEYS = /error|message/i;
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
|
|
53
156
|
// src/blog/registry.ts
|
|
54
157
|
var posts;
|
|
55
158
|
var init_registry = __esm({
|
|
@@ -3425,7 +3528,7 @@ var init_url_utils = __esm({
|
|
|
3425
3528
|
|
|
3426
3529
|
// src/api/kernel-fetch.ts
|
|
3427
3530
|
async function fetchWithKernel(url) {
|
|
3428
|
-
const apiKey =
|
|
3531
|
+
const apiKey = browserServiceApiKey();
|
|
3429
3532
|
if (!apiKey) throw new Error("Browser backend API key not set");
|
|
3430
3533
|
const client = new import_sdk.default({ apiKey });
|
|
3431
3534
|
const kb = await client.browsers.create({ stealth: true, timeout_seconds: 60 });
|
|
@@ -3450,6 +3553,7 @@ var init_kernel_fetch = __esm({
|
|
|
3450
3553
|
"src/api/kernel-fetch.ts"() {
|
|
3451
3554
|
"use strict";
|
|
3452
3555
|
import_sdk = __toESM(require("@onkernel/sdk"), 1);
|
|
3556
|
+
init_browser_service_env();
|
|
3453
3557
|
import_playwright = require("playwright");
|
|
3454
3558
|
}
|
|
3455
3559
|
});
|
|
@@ -3468,9 +3572,9 @@ async function extractKpo(opts) {
|
|
|
3468
3572
|
redirect: "manual"
|
|
3469
3573
|
});
|
|
3470
3574
|
if (res.status >= 300 && res.status < 400) {
|
|
3471
|
-
const
|
|
3472
|
-
if (!
|
|
3473
|
-
const next = new URL(
|
|
3575
|
+
const location2 = res.headers.get("location");
|
|
3576
|
+
if (!location2) return null;
|
|
3577
|
+
const next = new URL(location2, target).href;
|
|
3474
3578
|
const checkedRedirect = await validatePublicHttpUrl(next, { field: "redirect URL" });
|
|
3475
3579
|
if (checkedRedirect.error || !checkedRedirect.parsed) return null;
|
|
3476
3580
|
target = checkedRedirect.parsed.href;
|
|
@@ -8216,6 +8320,7 @@ var init_rates = __esm({
|
|
|
8216
8320
|
yt_channel: 50,
|
|
8217
8321
|
yt_transcription: 200,
|
|
8218
8322
|
fb_ad: 50,
|
|
8323
|
+
maps_search: 2e3,
|
|
8219
8324
|
maps_place: 2e3,
|
|
8220
8325
|
maps_review: 50,
|
|
8221
8326
|
fb_search: 50,
|
|
@@ -8277,6 +8382,14 @@ var init_rates = __esm({
|
|
|
8277
8382
|
credits: mcToCredits(MC_COSTS.fb_ad),
|
|
8278
8383
|
unit: "per call"
|
|
8279
8384
|
},
|
|
8385
|
+
{
|
|
8386
|
+
key: "maps_search",
|
|
8387
|
+
label: "Maps business search",
|
|
8388
|
+
aliases: ["maps_search", "google maps search", "gmb search", "gbp search", "business profiles"],
|
|
8389
|
+
credits: mcToCredits(MC_COSTS.maps_search),
|
|
8390
|
+
unit: "per search",
|
|
8391
|
+
notes: "Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses."
|
|
8392
|
+
},
|
|
8280
8393
|
{
|
|
8281
8394
|
key: "maps_place",
|
|
8282
8395
|
label: "Maps business lookup",
|
|
@@ -8338,6 +8451,7 @@ var init_rates = __esm({
|
|
|
8338
8451
|
TRANSCRIPTION_REFUND: "transcription_refund",
|
|
8339
8452
|
YT_CHANNEL: "yt_channel",
|
|
8340
8453
|
FB_AD: "fb_ad",
|
|
8454
|
+
MAPS_SEARCH: "maps_search",
|
|
8341
8455
|
MAPS_PLACE: "maps_place",
|
|
8342
8456
|
MAPS_REVIEW: "maps_review",
|
|
8343
8457
|
MAPS_REVIEW_REFUND: "maps_review_refund",
|
|
@@ -8484,40 +8598,6 @@ var init_selectors = __esm({
|
|
|
8484
8598
|
}
|
|
8485
8599
|
});
|
|
8486
8600
|
|
|
8487
|
-
// src/errors.ts
|
|
8488
|
-
function sanitizeVendorName(message) {
|
|
8489
|
-
return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
|
|
8490
|
-
}
|
|
8491
|
-
var RECAPTCHA_INSTRUCTIONS, CaptchaError, ExtractionError, RequestAbortedError;
|
|
8492
|
-
var init_errors = __esm({
|
|
8493
|
-
"src/errors.ts"() {
|
|
8494
|
-
"use strict";
|
|
8495
|
-
RECAPTCHA_INSTRUCTIONS = "Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.";
|
|
8496
|
-
CaptchaError = class extends Error {
|
|
8497
|
-
constructor(instructions) {
|
|
8498
|
-
super(`CAPTCHA detected. ${instructions}`);
|
|
8499
|
-
this.instructions = instructions;
|
|
8500
|
-
}
|
|
8501
|
-
instructions;
|
|
8502
|
-
name = "CaptchaError";
|
|
8503
|
-
};
|
|
8504
|
-
ExtractionError = class extends Error {
|
|
8505
|
-
constructor(message, cause) {
|
|
8506
|
-
super(message);
|
|
8507
|
-
this.cause = cause;
|
|
8508
|
-
}
|
|
8509
|
-
cause;
|
|
8510
|
-
name = "ExtractionError";
|
|
8511
|
-
};
|
|
8512
|
-
RequestAbortedError = class extends Error {
|
|
8513
|
-
name = "RequestAbortedError";
|
|
8514
|
-
constructor(message = "Request aborted before harvest completed") {
|
|
8515
|
-
super(message);
|
|
8516
|
-
}
|
|
8517
|
-
};
|
|
8518
|
-
}
|
|
8519
|
-
});
|
|
8520
|
-
|
|
8521
8601
|
// src/driver/BrowserDriver.ts
|
|
8522
8602
|
function positiveIntFromEnv(name, fallback) {
|
|
8523
8603
|
const raw = process.env[name];
|
|
@@ -9499,7 +9579,7 @@ async function writeOutputs(result, outputDir) {
|
|
|
9499
9579
|
}
|
|
9500
9580
|
}
|
|
9501
9581
|
async function ytHarvest(rawOptions) {
|
|
9502
|
-
const kernelApiKey =
|
|
9582
|
+
const kernelApiKey = browserServiceApiKey();
|
|
9503
9583
|
if (!kernelApiKey) {
|
|
9504
9584
|
throw new Error("A browser backend API key is required \u2014 YouTube harvesting requires a stealth session.");
|
|
9505
9585
|
}
|
|
@@ -9533,6 +9613,7 @@ var init_youtube_harvest = __esm({
|
|
|
9533
9613
|
"src/youtube/youtube-harvest.ts"() {
|
|
9534
9614
|
"use strict";
|
|
9535
9615
|
import_node_fs2 = require("fs");
|
|
9616
|
+
init_browser_service_env();
|
|
9536
9617
|
import_node_path4 = __toESM(require("path"), 1);
|
|
9537
9618
|
import_papaparse = __toESM(require("papaparse"), 1);
|
|
9538
9619
|
init_schemas2();
|
|
@@ -9610,7 +9691,7 @@ function parseTimedtextXml(xml) {
|
|
|
9610
9691
|
return results;
|
|
9611
9692
|
}
|
|
9612
9693
|
async function fetchViaKernelInnertube(videoId) {
|
|
9613
|
-
const kernelApiKey =
|
|
9694
|
+
const kernelApiKey = browserServiceApiKey();
|
|
9614
9695
|
if (!kernelApiKey) return null;
|
|
9615
9696
|
const driver = new BrowserDriver();
|
|
9616
9697
|
const start = Date.now();
|
|
@@ -9753,7 +9834,7 @@ async function attemptKernelWhisper(videoId, kernelApiKey, falKey, start) {
|
|
|
9753
9834
|
}
|
|
9754
9835
|
}
|
|
9755
9836
|
async function fetchViaKernelWhisper(videoId) {
|
|
9756
|
-
const kernelApiKey =
|
|
9837
|
+
const kernelApiKey = browserServiceApiKey();
|
|
9757
9838
|
const falKey = process.env.FAL_KEY;
|
|
9758
9839
|
if (!kernelApiKey || !falKey) return null;
|
|
9759
9840
|
const start = Date.now();
|
|
@@ -9793,6 +9874,7 @@ var init_CaptionFetcher = __esm({
|
|
|
9793
9874
|
"src/youtube/CaptionFetcher.ts"() {
|
|
9794
9875
|
"use strict";
|
|
9795
9876
|
init_BrowserDriver();
|
|
9877
|
+
init_browser_service_env();
|
|
9796
9878
|
import_client2 = require("@fal-ai/client");
|
|
9797
9879
|
WHISPER_RECORD_SECONDS = 90;
|
|
9798
9880
|
}
|
|
@@ -10034,6 +10116,7 @@ var init_screenshot_routes = __esm({
|
|
|
10034
10116
|
"src/api/screenshot-routes.ts"() {
|
|
10035
10117
|
"use strict";
|
|
10036
10118
|
import_hono3 = require("hono");
|
|
10119
|
+
init_browser_service_env();
|
|
10037
10120
|
import_zod14 = require("zod");
|
|
10038
10121
|
init_screenshot();
|
|
10039
10122
|
init_api_auth();
|
|
@@ -10068,7 +10151,7 @@ var init_screenshot_routes = __esm({
|
|
|
10068
10151
|
}
|
|
10069
10152
|
const device2 = body.device === "mobile" ? "mobile" : "desktop";
|
|
10070
10153
|
try {
|
|
10071
|
-
const buf = await captureScreenshot(parsedFallback.href,
|
|
10154
|
+
const buf = await captureScreenshot(parsedFallback.href, browserServiceApiKey(), device2);
|
|
10072
10155
|
return new Response(new Uint8Array(buf), {
|
|
10073
10156
|
status: 200,
|
|
10074
10157
|
headers: {
|
|
@@ -10084,7 +10167,7 @@ var init_screenshot_routes = __esm({
|
|
|
10084
10167
|
}
|
|
10085
10168
|
const device = body.device === "mobile" ? "mobile" : "desktop";
|
|
10086
10169
|
try {
|
|
10087
|
-
const buf = await captureScreenshot(urlCheck.parsed.href,
|
|
10170
|
+
const buf = await captureScreenshot(urlCheck.parsed.href, browserServiceApiKey(), device);
|
|
10088
10171
|
return new Response(new Uint8Array(buf), {
|
|
10089
10172
|
status: 200,
|
|
10090
10173
|
headers: {
|
|
@@ -11034,9 +11117,9 @@ function proxyName(country, state, city) {
|
|
|
11034
11117
|
function zipProxyName(zip) {
|
|
11035
11118
|
return `mcp-serp-residential-us-zip-${zip}`;
|
|
11036
11119
|
}
|
|
11037
|
-
function parseKernelLocationProxyTarget(
|
|
11038
|
-
if (!
|
|
11039
|
-
const canonicalLocation = normalizeLocation(
|
|
11120
|
+
function parseKernelLocationProxyTarget(location2, gl) {
|
|
11121
|
+
if (!location2 || gl.toLowerCase() !== "us") return null;
|
|
11122
|
+
const canonicalLocation = normalizeLocation(location2);
|
|
11040
11123
|
let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
|
|
11041
11124
|
if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
|
|
11042
11125
|
parts = parts.slice(0, -1);
|
|
@@ -11369,29 +11452,30 @@ function buildPageIntelUrl(body, country) {
|
|
|
11369
11452
|
return `https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=${country}&q=${encodeURIComponent(body.query.trim())}&search_type=keyword_unordered`;
|
|
11370
11453
|
}
|
|
11371
11454
|
function kernelLaunchOpts() {
|
|
11372
|
-
return { headless: true, kernelApiKey:
|
|
11455
|
+
return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: browserServiceProxyId(), viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
11373
11456
|
}
|
|
11374
11457
|
async function kernelLaunchOptsResidential() {
|
|
11375
|
-
let proxyId =
|
|
11458
|
+
let proxyId = browserServiceProxyId();
|
|
11376
11459
|
try {
|
|
11377
11460
|
const resolution2 = await resolveKernelProxyId({
|
|
11378
|
-
kernelApiKey:
|
|
11461
|
+
kernelApiKey: browserServiceApiKey(),
|
|
11379
11462
|
proxyMode: "location",
|
|
11380
|
-
configuredKernelProxyId:
|
|
11463
|
+
configuredKernelProxyId: browserServiceProxyId(),
|
|
11381
11464
|
location: "New York, NY",
|
|
11382
11465
|
gl: "us"
|
|
11383
11466
|
});
|
|
11384
11467
|
if (resolution2.kernelProxyId) proxyId = resolution2.kernelProxyId;
|
|
11385
11468
|
} catch {
|
|
11386
|
-
proxyId =
|
|
11469
|
+
proxyId = browserServiceProxyId();
|
|
11387
11470
|
}
|
|
11388
|
-
return { headless: true, kernelApiKey:
|
|
11471
|
+
return { headless: true, kernelApiKey: browserServiceApiKey(), kernelProxyId: proxyId, viewport: { width: 1280, height: 900 }, locale: "en-US" };
|
|
11389
11472
|
}
|
|
11390
11473
|
var import_hono4, import_zod15, import_client3, FacebookAdBodySchema, FacebookPageIntelBodySchema, FacebookTranscribeBodySchema, FacebookSearchBodySchema, FacebookMediaBodySchema, facebookAdApp, ALLOWED_MEDIA_HOSTS;
|
|
11391
11474
|
var init_facebook_ad_routes = __esm({
|
|
11392
11475
|
"src/api/facebook-ad-routes.ts"() {
|
|
11393
11476
|
"use strict";
|
|
11394
11477
|
import_hono4 = require("hono");
|
|
11478
|
+
init_browser_service_env();
|
|
11395
11479
|
import_zod15 = require("zod");
|
|
11396
11480
|
init_db();
|
|
11397
11481
|
init_rates();
|
|
@@ -11687,7 +11771,7 @@ var init_facebook_ad_routes = __esm({
|
|
|
11687
11771
|
});
|
|
11688
11772
|
|
|
11689
11773
|
// src/schemas.ts
|
|
11690
|
-
var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
|
|
11774
|
+
var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, MapsSearchOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
|
|
11691
11775
|
var init_schemas3 = __esm({
|
|
11692
11776
|
"src/schemas.ts"() {
|
|
11693
11777
|
"use strict";
|
|
@@ -11725,6 +11809,16 @@ var init_schemas3 = __esm({
|
|
|
11725
11809
|
kernelProxyId: import_zod16.z.string().optional(),
|
|
11726
11810
|
headless: import_zod16.z.boolean().default(true)
|
|
11727
11811
|
});
|
|
11812
|
+
MapsSearchOptionsSchema = import_zod16.z.object({
|
|
11813
|
+
query: import_zod16.z.string().min(1),
|
|
11814
|
+
location: import_zod16.z.string().optional(),
|
|
11815
|
+
gl: import_zod16.z.string().length(2).default("us"),
|
|
11816
|
+
hl: import_zod16.z.string().length(2).default("en"),
|
|
11817
|
+
maxResults: import_zod16.z.number().int().min(1).max(50).default(10),
|
|
11818
|
+
kernelApiKey: import_zod16.z.string().optional(),
|
|
11819
|
+
kernelProxyId: import_zod16.z.string().optional(),
|
|
11820
|
+
headless: import_zod16.z.boolean().default(true)
|
|
11821
|
+
});
|
|
11728
11822
|
RawPAAItemSchema = import_zod16.z.object({
|
|
11729
11823
|
question: import_zod16.z.string().min(1),
|
|
11730
11824
|
answer: import_zod16.z.string().optional(),
|
|
@@ -11785,8 +11879,8 @@ var init_MapsNavigator = __esm({
|
|
|
11785
11879
|
this.page = page;
|
|
11786
11880
|
}
|
|
11787
11881
|
page;
|
|
11788
|
-
async navigateToPlacePage(businessName,
|
|
11789
|
-
const query = `${businessName} ${
|
|
11882
|
+
async navigateToPlacePage(businessName, location2) {
|
|
11883
|
+
const query = `${businessName} ${location2}`;
|
|
11790
11884
|
const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(query)}`;
|
|
11791
11885
|
await this.page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 45e3 });
|
|
11792
11886
|
const onPlacePage = await this.page.evaluate(() => /\/maps\/place\//.test(window.location.href));
|
|
@@ -12230,7 +12324,172 @@ var init_MapsExtractor = __esm({
|
|
|
12230
12324
|
}
|
|
12231
12325
|
});
|
|
12232
12326
|
|
|
12327
|
+
// src/extractor/MapsSearchExtractor.ts
|
|
12328
|
+
var MAPS_SEARCH_SCROLL_BUDGET_MS, MAPS_SEARCH_SCROLL_STEP_MS, MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS, MapsSearchExtractor;
|
|
12329
|
+
var init_MapsSearchExtractor = __esm({
|
|
12330
|
+
"src/extractor/MapsSearchExtractor.ts"() {
|
|
12331
|
+
"use strict";
|
|
12332
|
+
init_errors();
|
|
12333
|
+
MAPS_SEARCH_SCROLL_BUDGET_MS = 6e4;
|
|
12334
|
+
MAPS_SEARCH_SCROLL_STEP_MS = 1200;
|
|
12335
|
+
MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS = 4;
|
|
12336
|
+
MapsSearchExtractor = class {
|
|
12337
|
+
constructor(driver) {
|
|
12338
|
+
this.driver = driver;
|
|
12339
|
+
}
|
|
12340
|
+
driver;
|
|
12341
|
+
async extract(options) {
|
|
12342
|
+
const startMs = Date.now();
|
|
12343
|
+
const searchQuery = [options.query, options.location].filter(Boolean).join(" ");
|
|
12344
|
+
const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(searchQuery)}?hl=${encodeURIComponent(options.hl)}`;
|
|
12345
|
+
const config = {
|
|
12346
|
+
headless: options.headless,
|
|
12347
|
+
kernelApiKey: options.kernelApiKey,
|
|
12348
|
+
kernelProxyId: options.kernelProxyId,
|
|
12349
|
+
viewport: { width: 1280, height: 900 },
|
|
12350
|
+
locale: `${options.hl}-${options.gl.toUpperCase()}`
|
|
12351
|
+
};
|
|
12352
|
+
try {
|
|
12353
|
+
await this.driver.launch(config);
|
|
12354
|
+
const page = this.driver.getPage();
|
|
12355
|
+
await page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
12356
|
+
await page.waitForTimeout(3e3);
|
|
12357
|
+
const blocked = await this.detectBlock(page);
|
|
12358
|
+
if (blocked) throw new CaptchaError(RECAPTCHA_INSTRUCTIONS);
|
|
12359
|
+
const results = await this.collectResults(page, options.maxResults);
|
|
12360
|
+
return {
|
|
12361
|
+
query: options.query,
|
|
12362
|
+
location: options.location ?? null,
|
|
12363
|
+
searchQuery,
|
|
12364
|
+
searchUrl,
|
|
12365
|
+
extractedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12366
|
+
requestedMaxResults: options.maxResults,
|
|
12367
|
+
resultCount: results.length,
|
|
12368
|
+
results,
|
|
12369
|
+
durationMs: Date.now() - startMs
|
|
12370
|
+
};
|
|
12371
|
+
} finally {
|
|
12372
|
+
await this.driver.close();
|
|
12373
|
+
}
|
|
12374
|
+
}
|
|
12375
|
+
async detectBlock(page) {
|
|
12376
|
+
return page.evaluate(() => {
|
|
12377
|
+
const text = document.body.innerText.slice(0, 2e3);
|
|
12378
|
+
return /unusual traffic|captcha|recaptcha|about this page/i.test(text) || /\/sorry\//.test(location.href);
|
|
12379
|
+
});
|
|
12380
|
+
}
|
|
12381
|
+
async collectResults(page, maxResults) {
|
|
12382
|
+
const seen = /* @__PURE__ */ new Map();
|
|
12383
|
+
const started = Date.now();
|
|
12384
|
+
let noGrowthRounds = 0;
|
|
12385
|
+
while (Date.now() - started < MAPS_SEARCH_SCROLL_BUDGET_MS) {
|
|
12386
|
+
const before = seen.size;
|
|
12387
|
+
const batch = await this.extractVisibleResults(page);
|
|
12388
|
+
for (const result of batch) {
|
|
12389
|
+
const key = this.resultKey(result);
|
|
12390
|
+
if (!seen.has(key)) seen.set(key, { ...result, position: seen.size + 1 });
|
|
12391
|
+
if (seen.size >= maxResults) break;
|
|
12392
|
+
}
|
|
12393
|
+
if (seen.size >= maxResults) break;
|
|
12394
|
+
if (seen.size === before) noGrowthRounds += 1;
|
|
12395
|
+
else noGrowthRounds = 0;
|
|
12396
|
+
if (noGrowthRounds >= MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS) break;
|
|
12397
|
+
await page.evaluate(() => {
|
|
12398
|
+
const feed = document.querySelector('[role="feed"]');
|
|
12399
|
+
if (feed) {
|
|
12400
|
+
feed.scrollTop = feed.scrollHeight;
|
|
12401
|
+
} else {
|
|
12402
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
12403
|
+
}
|
|
12404
|
+
});
|
|
12405
|
+
await page.waitForTimeout(MAPS_SEARCH_SCROLL_STEP_MS);
|
|
12406
|
+
}
|
|
12407
|
+
return [...seen.values()].slice(0, maxResults);
|
|
12408
|
+
}
|
|
12409
|
+
resultKey(result) {
|
|
12410
|
+
return result.cidDecimal ?? result.placeUrl.replace(/[?&].*$/, "") ?? result.name;
|
|
12411
|
+
}
|
|
12412
|
+
async extractVisibleResults(page) {
|
|
12413
|
+
return page.evaluate(() => {
|
|
12414
|
+
function normalizeText(value) {
|
|
12415
|
+
const text = value?.replace(/\s+/g, " ").trim() ?? "";
|
|
12416
|
+
return text || null;
|
|
12417
|
+
}
|
|
12418
|
+
function cidFromUrl(url) {
|
|
12419
|
+
const fid = url.match(/!1s(0x[0-9a-f]+):(0x[0-9a-f]+)/i);
|
|
12420
|
+
if (!fid) return { cid: null, cidDecimal: null };
|
|
12421
|
+
let cidDecimal = null;
|
|
12422
|
+
try {
|
|
12423
|
+
cidDecimal = BigInt(fid[2]).toString();
|
|
12424
|
+
} catch {
|
|
12425
|
+
}
|
|
12426
|
+
return { cid: `${fid[1]}:${fid[2]}`, cidDecimal };
|
|
12427
|
+
}
|
|
12428
|
+
function textParts(card) {
|
|
12429
|
+
if (!card) return [];
|
|
12430
|
+
const parts = [];
|
|
12431
|
+
card.querySelectorAll("div, span").forEach((el2) => {
|
|
12432
|
+
const text = Array.from(el2.childNodes).filter((node) => node.nodeType === 3).map((node) => node.textContent?.trim() ?? "").filter((text2) => text2.length > 1 && text2.length < 140).join(" ");
|
|
12433
|
+
if (text && !parts.includes(text)) parts.push(text);
|
|
12434
|
+
});
|
|
12435
|
+
return parts;
|
|
12436
|
+
}
|
|
12437
|
+
function firstMatching(parts, pattern) {
|
|
12438
|
+
const value = parts.find((part) => pattern.test(part));
|
|
12439
|
+
return value ?? null;
|
|
12440
|
+
}
|
|
12441
|
+
const out = [];
|
|
12442
|
+
const seen = /* @__PURE__ */ new Set();
|
|
12443
|
+
const anchors = Array.from(document.querySelectorAll('a[href*="/maps/place/"]'));
|
|
12444
|
+
for (const anchor of anchors) {
|
|
12445
|
+
const placeUrl = anchor.href;
|
|
12446
|
+
const stableUrl = placeUrl.replace(/[?&].*$/, "");
|
|
12447
|
+
if (seen.has(stableUrl)) continue;
|
|
12448
|
+
seen.add(stableUrl);
|
|
12449
|
+
const card = anchor.closest('.Nv2PK, [role="article"], .bfdHYd') ?? anchor.parentElement;
|
|
12450
|
+
const parts = textParts(card);
|
|
12451
|
+
const aria = normalizeText(anchor.getAttribute("aria-label"));
|
|
12452
|
+
const heading = normalizeText(card?.querySelector('.qBF1Pd, .fontHeadlineSmall, [role="heading"]')?.textContent);
|
|
12453
|
+
const name = aria ?? heading ?? parts[0] ?? stableUrl;
|
|
12454
|
+
const links = Array.from(card?.querySelectorAll("a[href]") ?? []);
|
|
12455
|
+
const websiteUrl = links.find((link) => link.href.startsWith("http") && !link.href.includes("google."))?.href ?? null;
|
|
12456
|
+
const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? null;
|
|
12457
|
+
const rating = firstMatching(parts, /^\d(?:\.\d)?$/);
|
|
12458
|
+
const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?$/);
|
|
12459
|
+
const category = parts.find((part) => !/^\d(?:\.\d)?$|^\(?[\d,]+\)?$|open|closed|directions|website/i.test(part)) ?? null;
|
|
12460
|
+
const address = parts.find((part) => /\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part)) ?? null;
|
|
12461
|
+
const { cid, cidDecimal } = cidFromUrl(placeUrl);
|
|
12462
|
+
out.push({
|
|
12463
|
+
position: out.length + 1,
|
|
12464
|
+
name,
|
|
12465
|
+
placeUrl,
|
|
12466
|
+
cid,
|
|
12467
|
+
cidDecimal,
|
|
12468
|
+
rating,
|
|
12469
|
+
reviewCount: reviewCountRaw ? reviewCountRaw.replace(/[()]/g, "") : null,
|
|
12470
|
+
category,
|
|
12471
|
+
address,
|
|
12472
|
+
websiteUrl,
|
|
12473
|
+
directionsUrl,
|
|
12474
|
+
metadata: parts.slice(0, 20)
|
|
12475
|
+
});
|
|
12476
|
+
}
|
|
12477
|
+
return out;
|
|
12478
|
+
});
|
|
12479
|
+
}
|
|
12480
|
+
};
|
|
12481
|
+
}
|
|
12482
|
+
});
|
|
12483
|
+
|
|
12233
12484
|
// src/api/maps-routes.ts
|
|
12485
|
+
function mapsErrorResponse(c, msg, errorCode) {
|
|
12486
|
+
const blocked = msg.includes("CAPTCHA") || msg.includes("blocked");
|
|
12487
|
+
return c.json({
|
|
12488
|
+
error: sanitizeVendorName(msg),
|
|
12489
|
+
error_code: blocked ? "captcha_or_blocked" : errorCode,
|
|
12490
|
+
retryable: blocked
|
|
12491
|
+
}, blocked ? 503 : 500);
|
|
12492
|
+
}
|
|
12234
12493
|
var import_hono5, mapsApp;
|
|
12235
12494
|
var init_maps_routes = __esm({
|
|
12236
12495
|
"src/api/maps-routes.ts"() {
|
|
@@ -12239,10 +12498,59 @@ var init_maps_routes = __esm({
|
|
|
12239
12498
|
init_db();
|
|
12240
12499
|
init_rates();
|
|
12241
12500
|
init_MapsExtractor();
|
|
12501
|
+
init_MapsSearchExtractor();
|
|
12242
12502
|
init_BrowserDriver();
|
|
12243
12503
|
init_schemas3();
|
|
12244
12504
|
init_api_auth();
|
|
12505
|
+
init_errors();
|
|
12245
12506
|
mapsApp = new import_hono5.Hono();
|
|
12507
|
+
mapsApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
12508
|
+
const user = c.get("user");
|
|
12509
|
+
const body = await c.req.json().catch(() => ({}));
|
|
12510
|
+
const parsed = MapsSearchOptionsSchema.safeParse({
|
|
12511
|
+
kernelApiKey: process.env.KERNEL_API_KEY,
|
|
12512
|
+
...body
|
|
12513
|
+
});
|
|
12514
|
+
if (!parsed.success) {
|
|
12515
|
+
return c.json({ error: parsed.error.issues[0]?.message ?? "Invalid request" }, 400);
|
|
12516
|
+
}
|
|
12517
|
+
const { ok, balance_mc } = await debitMc(
|
|
12518
|
+
user.id,
|
|
12519
|
+
MC_COSTS.maps_search,
|
|
12520
|
+
LedgerOperation.MAPS_SEARCH,
|
|
12521
|
+
[parsed.data.query, parsed.data.location].filter(Boolean).join(" ")
|
|
12522
|
+
);
|
|
12523
|
+
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.maps_search), 402);
|
|
12524
|
+
const driver = new BrowserDriver();
|
|
12525
|
+
const extractor = new MapsSearchExtractor(driver);
|
|
12526
|
+
try {
|
|
12527
|
+
const result = await extractor.extract(parsed.data);
|
|
12528
|
+
await logRequestEvent({
|
|
12529
|
+
userId: user.id,
|
|
12530
|
+
source: "maps_search",
|
|
12531
|
+
status: "done",
|
|
12532
|
+
query: result.searchQuery,
|
|
12533
|
+
location: parsed.data.location,
|
|
12534
|
+
resultCount: result.resultCount,
|
|
12535
|
+
result
|
|
12536
|
+
});
|
|
12537
|
+
return c.json(result);
|
|
12538
|
+
} catch (err) {
|
|
12539
|
+
await creditMc(user.id, MC_COSTS.maps_search, LedgerOperation.REFUND, "failed maps_search call");
|
|
12540
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
12541
|
+
await logRequestEvent({
|
|
12542
|
+
userId: user.id,
|
|
12543
|
+
source: "maps_search",
|
|
12544
|
+
status: "failed",
|
|
12545
|
+
query: [parsed.data.query, parsed.data.location].filter(Boolean).join(" "),
|
|
12546
|
+
location: parsed.data.location,
|
|
12547
|
+
error: msg
|
|
12548
|
+
});
|
|
12549
|
+
return mapsErrorResponse(c, msg, "maps_search_failed");
|
|
12550
|
+
} finally {
|
|
12551
|
+
await driver.close();
|
|
12552
|
+
}
|
|
12553
|
+
});
|
|
12246
12554
|
mapsApp.post("/place", createApiKeyAuth(), async (c) => {
|
|
12247
12555
|
const user = c.get("user");
|
|
12248
12556
|
const body = await c.req.json().catch(() => ({}));
|
|
@@ -12309,10 +12617,7 @@ var init_maps_routes = __esm({
|
|
|
12309
12617
|
location: parsed.data.location,
|
|
12310
12618
|
error: msg
|
|
12311
12619
|
});
|
|
12312
|
-
|
|
12313
|
-
return c.json({ error: msg }, 503);
|
|
12314
|
-
}
|
|
12315
|
-
return c.json({ error: msg }, 500);
|
|
12620
|
+
return mapsErrorResponse(c, msg, "maps_place_failed");
|
|
12316
12621
|
} finally {
|
|
12317
12622
|
await driver.close();
|
|
12318
12623
|
}
|
|
@@ -12670,8 +12975,19 @@ function addCandidate(candidates, city, region, example) {
|
|
|
12670
12975
|
}
|
|
12671
12976
|
candidates.set(key, { city: normalizedCity, regionCode, count: 1, examples: [example] });
|
|
12672
12977
|
}
|
|
12978
|
+
function decodeSerpText(text) {
|
|
12979
|
+
try {
|
|
12980
|
+
return decodeURIComponent(text);
|
|
12981
|
+
} catch {
|
|
12982
|
+
}
|
|
12983
|
+
try {
|
|
12984
|
+
return decodeURIComponent(text.replace(/%(?![0-9a-fA-F]{2})/g, "%25"));
|
|
12985
|
+
} catch {
|
|
12986
|
+
return text;
|
|
12987
|
+
}
|
|
12988
|
+
}
|
|
12673
12989
|
function scanText(candidates, text) {
|
|
12674
|
-
const normalized =
|
|
12990
|
+
const normalized = decodeSerpText(text).replace(/[+/|_-]+/g, " ");
|
|
12675
12991
|
for (const match of normalized.matchAll(CITY_STATE_RE)) {
|
|
12676
12992
|
addCandidate(candidates, match[1] ?? "", match[2] ?? "", normalized.slice(0, 180));
|
|
12677
12993
|
}
|
|
@@ -14079,8 +14395,8 @@ async function harvest(rawOptions) {
|
|
|
14079
14395
|
const onAttemptEvent = getAttemptLogSink(rawOptions);
|
|
14080
14396
|
const requestedProxyMode = raw.proxyMode;
|
|
14081
14397
|
const proxyMode = requestedProxyMode === "none" ? "none" : requestedProxyMode === "configured" ? "configured" : "location";
|
|
14082
|
-
const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() :
|
|
14083
|
-
const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() :
|
|
14398
|
+
const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() : browserServiceApiKey();
|
|
14399
|
+
const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() : browserServiceProxyId();
|
|
14084
14400
|
const proxyOpts = {
|
|
14085
14401
|
kernelApiKey,
|
|
14086
14402
|
proxyMode,
|
|
@@ -14267,6 +14583,7 @@ var init_harvest = __esm({
|
|
|
14267
14583
|
"src/harvest.ts"() {
|
|
14268
14584
|
"use strict";
|
|
14269
14585
|
init_schemas3();
|
|
14586
|
+
init_browser_service_env();
|
|
14270
14587
|
init_BrowserDriver();
|
|
14271
14588
|
init_PAAExtractor();
|
|
14272
14589
|
init_OutputSerializer();
|
|
@@ -14691,8 +15008,8 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
|
|
|
14691
15008
|
debug,
|
|
14692
15009
|
serpOnly: true,
|
|
14693
15010
|
headless: runtimeOptions.headless ?? true,
|
|
14694
|
-
kernelApiKey: runtimeOptions.kernelApiKey ??
|
|
14695
|
-
kernelProxyId: runtimeOptions.kernelProxyId ??
|
|
15011
|
+
kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
|
|
15012
|
+
kernelProxyId: runtimeOptions.kernelProxyId ?? browserServiceProxyId(),
|
|
14696
15013
|
format: "json",
|
|
14697
15014
|
outputDir: runtimeOptions.outputDir ?? "/tmp/serp-intelligence-output",
|
|
14698
15015
|
signal: runtimeOptions.signal,
|
|
@@ -14703,7 +15020,7 @@ async function captureSerpIntelligenceSnapshot(rawInput, runtimeOptions = {}) {
|
|
|
14703
15020
|
const pageSnapshotLimit = normalizePageSnapshotLimit(parsedInput);
|
|
14704
15021
|
const pageSnapshotTargets = collectPageSnapshotTargets(harvestResult, pageSnapshotLimit);
|
|
14705
15022
|
const pageSnapshotArtifacts = pageSnapshotTargets.length > 0 ? (await capturePageSnapshotsFn(pageSnapshotTargets, {
|
|
14706
|
-
kernelApiKey: runtimeOptions.kernelApiKey ??
|
|
15023
|
+
kernelApiKey: runtimeOptions.kernelApiKey ?? browserServiceApiKey(),
|
|
14707
15024
|
timeoutMs: runtimeOptions.pageSnapshotTimeoutMs,
|
|
14708
15025
|
maxConcurrency: runtimeOptions.pageSnapshotMaxConcurrency,
|
|
14709
15026
|
debug,
|
|
@@ -14725,6 +15042,7 @@ var init_serp_capture_service = __esm({
|
|
|
14725
15042
|
"src/serp-intelligence/serp-capture-service.ts"() {
|
|
14726
15043
|
"use strict";
|
|
14727
15044
|
init_harvest();
|
|
15045
|
+
init_browser_service_env();
|
|
14728
15046
|
init_harvest_problems();
|
|
14729
15047
|
init_page_snapshot_extractor();
|
|
14730
15048
|
init_schemas4();
|
|
@@ -14829,6 +15147,7 @@ var init_serp_intelligence_routes = __esm({
|
|
|
14829
15147
|
"src/api/serp-intelligence-routes.ts"() {
|
|
14830
15148
|
"use strict";
|
|
14831
15149
|
import_hono6 = require("hono");
|
|
15150
|
+
init_browser_service_env();
|
|
14832
15151
|
init_page_snapshot_extractor();
|
|
14833
15152
|
init_serp_capture_service();
|
|
14834
15153
|
init_schemas4();
|
|
@@ -14861,8 +15180,8 @@ var init_serp_intelligence_routes = __esm({
|
|
|
14861
15180
|
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
|
|
14862
15181
|
try {
|
|
14863
15182
|
const result = await captureSerpIntelligenceSnapshot(parsed.data, {
|
|
14864
|
-
kernelApiKey:
|
|
14865
|
-
kernelProxyId:
|
|
15183
|
+
kernelApiKey: browserServiceApiKey(),
|
|
15184
|
+
kernelProxyId: browserServiceProxyId(),
|
|
14866
15185
|
signal: c.req.raw.signal,
|
|
14867
15186
|
billing: { creditsUsed: cost / 1e3 }
|
|
14868
15187
|
});
|
|
@@ -14917,7 +15236,7 @@ var init_serp_intelligence_routes = __esm({
|
|
|
14917
15236
|
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, cost), 402);
|
|
14918
15237
|
try {
|
|
14919
15238
|
const result = await capturePageSnapshots(targets, {
|
|
14920
|
-
kernelApiKey:
|
|
15239
|
+
kernelApiKey: browserServiceApiKey(),
|
|
14921
15240
|
timeoutMs: parsed.data.timeoutMs,
|
|
14922
15241
|
maxConcurrency: parsed.data.maxConcurrency,
|
|
14923
15242
|
debug: parsed.data.debug
|
|
@@ -14952,8 +15271,17 @@ var init_serp_intelligence_routes = __esm({
|
|
|
14952
15271
|
}
|
|
14953
15272
|
});
|
|
14954
15273
|
|
|
15274
|
+
// src/version.ts
|
|
15275
|
+
var PACKAGE_VERSION;
|
|
15276
|
+
var init_version = __esm({
|
|
15277
|
+
"src/version.ts"() {
|
|
15278
|
+
"use strict";
|
|
15279
|
+
PACKAGE_VERSION = "0.1.8";
|
|
15280
|
+
}
|
|
15281
|
+
});
|
|
15282
|
+
|
|
14955
15283
|
// src/mcp/mcp-tool-schemas.ts
|
|
14956
|
-
var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
|
|
15284
|
+
var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, MapsSearchInputSchema, NullableString, MapsSearchOutputSchema, OrganicResultOutput, AiOverviewOutput, EntityIdsOutput, HarvestPaaOutputSchema, SearchSerpOutputSchema, ExtractUrlOutputSchema, ExtractSiteOutputSchema, MapsPlaceIntelOutputSchema, CreditsInfoOutputSchema, MapSiteUrlsOutputSchema, YoutubeHarvestOutputSchema, FacebookAdSearchOutputSchema, FacebookPageIntelOutputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
|
|
14957
15285
|
var init_mcp_tool_schemas = __esm({
|
|
14958
15286
|
"src/mcp/mcp-tool-schemas.ts"() {
|
|
14959
15287
|
"use strict";
|
|
@@ -15018,6 +15346,207 @@ var init_mcp_tool_schemas = __esm({
|
|
|
15018
15346
|
includeReviews: import_zod19.z.boolean().default(false).describe("Whether to fetch individual review cards"),
|
|
15019
15347
|
maxReviews: import_zod19.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
|
|
15020
15348
|
};
|
|
15349
|
+
MapsSearchInputSchema = {
|
|
15350
|
+
query: import_zod19.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
|
|
15351
|
+
location: import_zod19.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
15352
|
+
gl: import_zod19.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
15353
|
+
hl: import_zod19.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
15354
|
+
maxResults: import_zod19.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
15355
|
+
};
|
|
15356
|
+
NullableString = import_zod19.z.string().nullable();
|
|
15357
|
+
MapsSearchOutputSchema = {
|
|
15358
|
+
query: import_zod19.z.string(),
|
|
15359
|
+
location: import_zod19.z.string().nullable(),
|
|
15360
|
+
searchQuery: import_zod19.z.string(),
|
|
15361
|
+
searchUrl: import_zod19.z.string().url(),
|
|
15362
|
+
extractedAt: import_zod19.z.string(),
|
|
15363
|
+
requestedMaxResults: import_zod19.z.number().int().min(1).max(50),
|
|
15364
|
+
resultCount: import_zod19.z.number().int().min(0).max(50),
|
|
15365
|
+
results: import_zod19.z.array(import_zod19.z.object({
|
|
15366
|
+
position: import_zod19.z.number().int().min(1),
|
|
15367
|
+
name: import_zod19.z.string(),
|
|
15368
|
+
placeUrl: import_zod19.z.string().url(),
|
|
15369
|
+
cid: NullableString,
|
|
15370
|
+
cidDecimal: NullableString,
|
|
15371
|
+
rating: NullableString,
|
|
15372
|
+
reviewCount: NullableString,
|
|
15373
|
+
category: NullableString,
|
|
15374
|
+
address: NullableString,
|
|
15375
|
+
websiteUrl: NullableString,
|
|
15376
|
+
directionsUrl: NullableString,
|
|
15377
|
+
metadata: import_zod19.z.array(import_zod19.z.string())
|
|
15378
|
+
})),
|
|
15379
|
+
durationMs: import_zod19.z.number().int().min(0)
|
|
15380
|
+
};
|
|
15381
|
+
OrganicResultOutput = import_zod19.z.object({
|
|
15382
|
+
position: import_zod19.z.number().int(),
|
|
15383
|
+
title: import_zod19.z.string(),
|
|
15384
|
+
url: import_zod19.z.string(),
|
|
15385
|
+
domain: import_zod19.z.string(),
|
|
15386
|
+
snippet: NullableString
|
|
15387
|
+
});
|
|
15388
|
+
AiOverviewOutput = import_zod19.z.object({
|
|
15389
|
+
detected: import_zod19.z.boolean(),
|
|
15390
|
+
text: NullableString
|
|
15391
|
+
}).nullable();
|
|
15392
|
+
EntityIdsOutput = import_zod19.z.object({
|
|
15393
|
+
kgIds: import_zod19.z.array(import_zod19.z.string()),
|
|
15394
|
+
cids: import_zod19.z.array(import_zod19.z.string()),
|
|
15395
|
+
gcids: import_zod19.z.array(import_zod19.z.string())
|
|
15396
|
+
}).nullable();
|
|
15397
|
+
HarvestPaaOutputSchema = {
|
|
15398
|
+
query: import_zod19.z.string(),
|
|
15399
|
+
location: NullableString,
|
|
15400
|
+
questionCount: import_zod19.z.number().int().min(0),
|
|
15401
|
+
completionStatus: NullableString,
|
|
15402
|
+
questions: import_zod19.z.array(import_zod19.z.object({
|
|
15403
|
+
question: import_zod19.z.string(),
|
|
15404
|
+
answer: NullableString,
|
|
15405
|
+
sourceTitle: NullableString,
|
|
15406
|
+
sourceSite: NullableString
|
|
15407
|
+
})),
|
|
15408
|
+
organicResults: import_zod19.z.array(OrganicResultOutput),
|
|
15409
|
+
aiOverview: AiOverviewOutput,
|
|
15410
|
+
entityIds: EntityIdsOutput,
|
|
15411
|
+
durationMs: import_zod19.z.number().min(0).nullable()
|
|
15412
|
+
};
|
|
15413
|
+
SearchSerpOutputSchema = {
|
|
15414
|
+
query: import_zod19.z.string(),
|
|
15415
|
+
location: NullableString,
|
|
15416
|
+
organicResults: import_zod19.z.array(OrganicResultOutput),
|
|
15417
|
+
localPack: import_zod19.z.array(import_zod19.z.object({
|
|
15418
|
+
position: import_zod19.z.number().int(),
|
|
15419
|
+
name: import_zod19.z.string(),
|
|
15420
|
+
rating: NullableString,
|
|
15421
|
+
reviewCount: NullableString,
|
|
15422
|
+
websiteUrl: NullableString
|
|
15423
|
+
})),
|
|
15424
|
+
aiOverview: AiOverviewOutput,
|
|
15425
|
+
entityIds: EntityIdsOutput
|
|
15426
|
+
};
|
|
15427
|
+
ExtractUrlOutputSchema = {
|
|
15428
|
+
url: import_zod19.z.string(),
|
|
15429
|
+
title: NullableString,
|
|
15430
|
+
headings: import_zod19.z.array(import_zod19.z.object({
|
|
15431
|
+
level: import_zod19.z.number().int(),
|
|
15432
|
+
text: import_zod19.z.string()
|
|
15433
|
+
})),
|
|
15434
|
+
schemaBlockCount: import_zod19.z.number().int().min(0),
|
|
15435
|
+
entityName: NullableString,
|
|
15436
|
+
entityTypes: import_zod19.z.array(import_zod19.z.string()),
|
|
15437
|
+
napScore: import_zod19.z.number().nullable(),
|
|
15438
|
+
missingSchemaFields: import_zod19.z.array(import_zod19.z.string()),
|
|
15439
|
+
screenshotSaved: NullableString
|
|
15440
|
+
};
|
|
15441
|
+
ExtractSiteOutputSchema = {
|
|
15442
|
+
url: import_zod19.z.string(),
|
|
15443
|
+
pageCount: import_zod19.z.number().int().min(0),
|
|
15444
|
+
pages: import_zod19.z.array(import_zod19.z.object({
|
|
15445
|
+
url: import_zod19.z.string(),
|
|
15446
|
+
title: NullableString,
|
|
15447
|
+
schemaTypes: import_zod19.z.array(import_zod19.z.string())
|
|
15448
|
+
})),
|
|
15449
|
+
durationMs: import_zod19.z.number().min(0)
|
|
15450
|
+
};
|
|
15451
|
+
MapsPlaceIntelOutputSchema = {
|
|
15452
|
+
name: import_zod19.z.string(),
|
|
15453
|
+
rating: NullableString,
|
|
15454
|
+
reviewCount: NullableString,
|
|
15455
|
+
category: NullableString,
|
|
15456
|
+
address: NullableString,
|
|
15457
|
+
phone: NullableString,
|
|
15458
|
+
website: NullableString,
|
|
15459
|
+
hoursSummary: NullableString,
|
|
15460
|
+
bookingUrl: NullableString,
|
|
15461
|
+
kgmid: NullableString,
|
|
15462
|
+
cidDecimal: NullableString,
|
|
15463
|
+
cidUrl: NullableString,
|
|
15464
|
+
lat: import_zod19.z.number().nullable(),
|
|
15465
|
+
lng: import_zod19.z.number().nullable(),
|
|
15466
|
+
reviewsStatus: import_zod19.z.string(),
|
|
15467
|
+
reviewsCollected: import_zod19.z.number().int().min(0),
|
|
15468
|
+
reviewTopics: import_zod19.z.array(import_zod19.z.object({
|
|
15469
|
+
label: import_zod19.z.string(),
|
|
15470
|
+
count: import_zod19.z.string()
|
|
15471
|
+
}))
|
|
15472
|
+
};
|
|
15473
|
+
CreditsInfoOutputSchema = {
|
|
15474
|
+
balanceCredits: import_zod19.z.number().nullable(),
|
|
15475
|
+
matchedCost: import_zod19.z.object({
|
|
15476
|
+
label: import_zod19.z.string(),
|
|
15477
|
+
credits: import_zod19.z.number(),
|
|
15478
|
+
unit: import_zod19.z.string(),
|
|
15479
|
+
notes: NullableString
|
|
15480
|
+
}).nullable(),
|
|
15481
|
+
costs: import_zod19.z.array(import_zod19.z.object({
|
|
15482
|
+
key: import_zod19.z.string(),
|
|
15483
|
+
label: import_zod19.z.string(),
|
|
15484
|
+
credits: import_zod19.z.number(),
|
|
15485
|
+
unit: import_zod19.z.string(),
|
|
15486
|
+
notes: NullableString
|
|
15487
|
+
})),
|
|
15488
|
+
ledger: import_zod19.z.array(import_zod19.z.object({
|
|
15489
|
+
createdAt: import_zod19.z.string(),
|
|
15490
|
+
operation: import_zod19.z.string(),
|
|
15491
|
+
credits: import_zod19.z.number(),
|
|
15492
|
+
description: NullableString
|
|
15493
|
+
}))
|
|
15494
|
+
};
|
|
15495
|
+
MapSiteUrlsOutputSchema = {
|
|
15496
|
+
startUrl: import_zod19.z.string(),
|
|
15497
|
+
totalFound: import_zod19.z.number().int().min(0),
|
|
15498
|
+
truncated: import_zod19.z.boolean(),
|
|
15499
|
+
okCount: import_zod19.z.number().int().min(0),
|
|
15500
|
+
redirectCount: import_zod19.z.number().int().min(0),
|
|
15501
|
+
brokenCount: import_zod19.z.number().int().min(0),
|
|
15502
|
+
urls: import_zod19.z.array(import_zod19.z.object({
|
|
15503
|
+
url: import_zod19.z.string(),
|
|
15504
|
+
status: import_zod19.z.number().int().nullable()
|
|
15505
|
+
})),
|
|
15506
|
+
durationMs: import_zod19.z.number().min(0)
|
|
15507
|
+
};
|
|
15508
|
+
YoutubeHarvestOutputSchema = {
|
|
15509
|
+
mode: import_zod19.z.string(),
|
|
15510
|
+
videoCount: import_zod19.z.number().int().min(0),
|
|
15511
|
+
channel: import_zod19.z.object({
|
|
15512
|
+
title: NullableString,
|
|
15513
|
+
subscriberCount: NullableString
|
|
15514
|
+
}).nullable(),
|
|
15515
|
+
videos: import_zod19.z.array(import_zod19.z.object({
|
|
15516
|
+
videoId: import_zod19.z.string(),
|
|
15517
|
+
title: import_zod19.z.string(),
|
|
15518
|
+
channelName: NullableString,
|
|
15519
|
+
views: NullableString,
|
|
15520
|
+
duration: NullableString,
|
|
15521
|
+
url: NullableString
|
|
15522
|
+
}))
|
|
15523
|
+
};
|
|
15524
|
+
FacebookAdSearchOutputSchema = {
|
|
15525
|
+
query: import_zod19.z.string(),
|
|
15526
|
+
advertiserCount: import_zod19.z.number().int().min(0),
|
|
15527
|
+
advertisers: import_zod19.z.array(import_zod19.z.object({
|
|
15528
|
+
name: NullableString,
|
|
15529
|
+
adCount: import_zod19.z.number().int().nullable(),
|
|
15530
|
+
libraryId: NullableString
|
|
15531
|
+
}))
|
|
15532
|
+
};
|
|
15533
|
+
FacebookPageIntelOutputSchema = {
|
|
15534
|
+
advertiserName: NullableString,
|
|
15535
|
+
totalAds: import_zod19.z.number().int().min(0),
|
|
15536
|
+
activeCount: import_zod19.z.number().int().min(0),
|
|
15537
|
+
videoCount: import_zod19.z.number().int().min(0),
|
|
15538
|
+
imageCount: import_zod19.z.number().int().min(0),
|
|
15539
|
+
ads: import_zod19.z.array(import_zod19.z.object({
|
|
15540
|
+
libraryId: NullableString,
|
|
15541
|
+
status: NullableString,
|
|
15542
|
+
creativeType: NullableString,
|
|
15543
|
+
headline: NullableString,
|
|
15544
|
+
cta: NullableString,
|
|
15545
|
+
startDate: NullableString,
|
|
15546
|
+
videoUrl: NullableString,
|
|
15547
|
+
variations: import_zod19.z.number().int().nullable()
|
|
15548
|
+
}))
|
|
15549
|
+
};
|
|
15021
15550
|
CreditsInfoInputSchema = {
|
|
15022
15551
|
item: import_zod19.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
|
|
15023
15552
|
includeLedger: import_zod19.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
|
|
@@ -15066,6 +15595,14 @@ var init_mcp_tool_schemas = __esm({
|
|
|
15066
15595
|
});
|
|
15067
15596
|
|
|
15068
15597
|
// src/mcp/mcp-response-formatter.ts
|
|
15598
|
+
function configureReportSaving(enabled) {
|
|
15599
|
+
reportSavingEnabled = enabled;
|
|
15600
|
+
}
|
|
15601
|
+
function sanitizeVendorText(text) {
|
|
15602
|
+
return sanitizeVendorName(
|
|
15603
|
+
text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
|
|
15604
|
+
);
|
|
15605
|
+
}
|
|
15069
15606
|
function slugifyReportName(input) {
|
|
15070
15607
|
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
|
|
15071
15608
|
}
|
|
@@ -15077,7 +15614,7 @@ function outputBaseDir() {
|
|
|
15077
15614
|
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
|
|
15078
15615
|
}
|
|
15079
15616
|
function saveFullReport(full) {
|
|
15080
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15617
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15081
15618
|
const outDir = outputBaseDir();
|
|
15082
15619
|
try {
|
|
15083
15620
|
(0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
|
|
@@ -15090,7 +15627,7 @@ function saveFullReport(full) {
|
|
|
15090
15627
|
}
|
|
15091
15628
|
}
|
|
15092
15629
|
function persistScreenshotLocally(base64, url) {
|
|
15093
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15630
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15094
15631
|
try {
|
|
15095
15632
|
const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
|
|
15096
15633
|
(0, import_node_fs4.mkdirSync)(dir, { recursive: true });
|
|
@@ -15130,11 +15667,11 @@ function parseData(raw) {
|
|
|
15130
15667
|
const text = first?.type === "text" ? first.text : "";
|
|
15131
15668
|
try {
|
|
15132
15669
|
const parsed = JSON.parse(text || "{}");
|
|
15133
|
-
if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
|
|
15670
|
+
if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
|
|
15134
15671
|
const data = parsed.result ?? parsed;
|
|
15135
15672
|
return { data };
|
|
15136
15673
|
} catch {
|
|
15137
|
-
if (raw.isError) return { error: text || "Tool error" };
|
|
15674
|
+
if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
|
|
15138
15675
|
return { error: "Failed to parse tool response" };
|
|
15139
15676
|
}
|
|
15140
15677
|
}
|
|
@@ -15148,15 +15685,6 @@ function entityIdsSection(ids) {
|
|
|
15148
15685
|
## Entity IDs
|
|
15149
15686
|
${lines.join("\n")}` : "";
|
|
15150
15687
|
}
|
|
15151
|
-
function entityIdsSummaryLine(ids) {
|
|
15152
|
-
if (!ids) return "";
|
|
15153
|
-
const parts = [];
|
|
15154
|
-
if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
|
|
15155
|
-
if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
|
|
15156
|
-
if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
|
|
15157
|
-
return parts.length ? `
|
|
15158
|
-
**Entity IDs:** ${parts.join(" \xB7 ")}` : "";
|
|
15159
|
-
}
|
|
15160
15688
|
function truncate(s, max) {
|
|
15161
15689
|
if (!s) return "";
|
|
15162
15690
|
return s.length > max ? s.slice(0, max) + "\u2026" : s;
|
|
@@ -15168,7 +15696,7 @@ function debugSection(debug) {
|
|
|
15168
15696
|
if (!debug || typeof debug !== "object") return "";
|
|
15169
15697
|
const request = debug.request ?? {};
|
|
15170
15698
|
const browser = debug.browser ?? {};
|
|
15171
|
-
const kernel = browser.kernel ?? {};
|
|
15699
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
15172
15700
|
const network = browser.networkLocation ?? {};
|
|
15173
15701
|
const nav = browser.serpNavigation ?? {};
|
|
15174
15702
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
@@ -15186,7 +15714,7 @@ function debugSection(debug) {
|
|
|
15186
15714
|
if (locationEvidence) {
|
|
15187
15715
|
lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
|
|
15188
15716
|
}
|
|
15189
|
-
return lines.join("\n");
|
|
15717
|
+
return sanitizeVendorText(lines.join("\n"));
|
|
15190
15718
|
}
|
|
15191
15719
|
function errorAttemptsSection(body) {
|
|
15192
15720
|
const attempts = Array.isArray(body.attempts) ? body.attempts : [];
|
|
@@ -15194,12 +15722,14 @@ function errorAttemptsSection(body) {
|
|
|
15194
15722
|
const lines = attempts.slice(0, 5).map((attempt) => {
|
|
15195
15723
|
const debug = attempt.debug ?? {};
|
|
15196
15724
|
const browser = debug.browser ?? {};
|
|
15197
|
-
const kernel = browser.kernel ?? {};
|
|
15725
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
15198
15726
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
15199
15727
|
const network = browser.networkLocation ?? {};
|
|
15200
15728
|
const nav = browser.serpNavigation ?? {};
|
|
15201
15729
|
const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
|
|
15202
|
-
|
|
15730
|
+
const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
|
|
15731
|
+
const cleanupSucceeded2 = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
|
|
15732
|
+
return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded2 === true ? "yes" : cleanupSucceeded2 === false ? "no" : "unknown"}`;
|
|
15203
15733
|
});
|
|
15204
15734
|
return `
|
|
15205
15735
|
|
|
@@ -15240,27 +15770,37 @@ ${serpRows}` : "";
|
|
|
15240
15770
|
const tips = `
|
|
15241
15771
|
---
|
|
15242
15772
|
\u{1F4A1} **Tips**
|
|
15243
|
-
- Max questions: \`maxQuestions:
|
|
15773
|
+
- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
|
|
15244
15774
|
- Organic results only: use \`search_serp\`
|
|
15245
15775
|
- Dig into a result: use \`extract_url\` on any organic URL`;
|
|
15246
15776
|
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
15247
15777
|
|
|
15248
15778
|
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
15249
|
-
|
|
15250
|
-
|
|
15251
|
-
|
|
15252
|
-
|
|
15253
|
-
|
|
15254
|
-
|
|
15255
|
-
|
|
15256
|
-
|
|
15257
|
-
|
|
15258
|
-
|
|
15259
|
-
|
|
15260
|
-
|
|
15261
|
-
|
|
15262
|
-
|
|
15263
|
-
|
|
15779
|
+
return {
|
|
15780
|
+
...oneBlock(full),
|
|
15781
|
+
structuredContent: {
|
|
15782
|
+
query: input.query,
|
|
15783
|
+
location: input.location ?? null,
|
|
15784
|
+
questionCount: flat.length,
|
|
15785
|
+
completionStatus: diagnostics?.completionStatus ?? null,
|
|
15786
|
+
questions: flat.map((r) => ({
|
|
15787
|
+
question: String(r.question ?? ""),
|
|
15788
|
+
answer: r.answer ?? null,
|
|
15789
|
+
sourceTitle: r.source_title ?? null,
|
|
15790
|
+
sourceSite: r.source_site ?? null
|
|
15791
|
+
})),
|
|
15792
|
+
organicResults: organic.map((r) => ({
|
|
15793
|
+
position: Number(r.position) || 0,
|
|
15794
|
+
title: String(r.title ?? ""),
|
|
15795
|
+
url: String(r.url ?? ""),
|
|
15796
|
+
domain: String(r.domain ?? ""),
|
|
15797
|
+
snippet: r.snippet ?? null
|
|
15798
|
+
})),
|
|
15799
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
15800
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
|
|
15801
|
+
durationMs: durationMs ?? null
|
|
15802
|
+
}
|
|
15803
|
+
};
|
|
15264
15804
|
}
|
|
15265
15805
|
function formatSearchSerp(raw, input) {
|
|
15266
15806
|
const parsed = parseData(raw);
|
|
@@ -15298,19 +15838,29 @@ ${localRows}` : "";
|
|
|
15298
15838
|
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
15299
15839
|
|
|
15300
15840
|
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
15301
|
-
|
|
15302
|
-
|
|
15303
|
-
|
|
15304
|
-
|
|
15305
|
-
|
|
15306
|
-
|
|
15307
|
-
|
|
15308
|
-
|
|
15309
|
-
|
|
15310
|
-
|
|
15311
|
-
|
|
15312
|
-
|
|
15313
|
-
|
|
15841
|
+
return {
|
|
15842
|
+
...oneBlock(full),
|
|
15843
|
+
structuredContent: {
|
|
15844
|
+
query: input.query,
|
|
15845
|
+
location: input.location ?? null,
|
|
15846
|
+
organicResults: organic.map((r) => ({
|
|
15847
|
+
position: Number(r.position) || 0,
|
|
15848
|
+
title: String(r.title ?? ""),
|
|
15849
|
+
url: String(r.url ?? ""),
|
|
15850
|
+
domain: String(r.domain ?? ""),
|
|
15851
|
+
snippet: r.snippet ?? null
|
|
15852
|
+
})),
|
|
15853
|
+
localPack: localPack.map((b) => ({
|
|
15854
|
+
position: Number(b.position) || 0,
|
|
15855
|
+
name: String(b.name ?? ""),
|
|
15856
|
+
rating: b.rating ?? null,
|
|
15857
|
+
reviewCount: b.reviewCount ?? null,
|
|
15858
|
+
websiteUrl: b.websiteUrl ?? null
|
|
15859
|
+
})),
|
|
15860
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
15861
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
|
|
15862
|
+
}
|
|
15863
|
+
};
|
|
15314
15864
|
}
|
|
15315
15865
|
function formatExtractUrl(raw, input) {
|
|
15316
15866
|
const parsed = parseData(raw);
|
|
@@ -15379,15 +15929,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
15379
15929
|
**${title}**
|
|
15380
15930
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
15381
15931
|
const textResult = oneBlock(full);
|
|
15932
|
+
const structuredContent = {
|
|
15933
|
+
url,
|
|
15934
|
+
title: d.title ?? null,
|
|
15935
|
+
headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
|
|
15936
|
+
schemaBlockCount: schemaCount,
|
|
15937
|
+
entityName: kpo?.entityName ?? null,
|
|
15938
|
+
entityTypes: kpo?.type ?? [],
|
|
15939
|
+
napScore: kpo?.napScore ?? null,
|
|
15940
|
+
missingSchemaFields: kpo?.missingFields ?? [],
|
|
15941
|
+
screenshotSaved: screenshotPath ?? null
|
|
15942
|
+
};
|
|
15382
15943
|
if (screenshotMeta?.base64) {
|
|
15383
15944
|
return {
|
|
15384
15945
|
content: [
|
|
15385
15946
|
...textResult.content,
|
|
15386
15947
|
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
15387
|
-
]
|
|
15948
|
+
],
|
|
15949
|
+
structuredContent
|
|
15388
15950
|
};
|
|
15389
15951
|
}
|
|
15390
|
-
return textResult;
|
|
15952
|
+
return { ...textResult, structuredContent };
|
|
15391
15953
|
}
|
|
15392
15954
|
function formatMapSiteUrls(raw, input) {
|
|
15393
15955
|
const parsed = parseData(raw);
|
|
@@ -15420,15 +15982,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
|
|
|
15420
15982
|
- Extract content from all pages: use \`extract_site\`
|
|
15421
15983
|
- Scrape a single page: use \`extract_url\``
|
|
15422
15984
|
].filter(Boolean).join("\n");
|
|
15423
|
-
|
|
15424
|
-
|
|
15425
|
-
|
|
15426
|
-
|
|
15427
|
-
|
|
15428
|
-
|
|
15429
|
-
|
|
15430
|
-
|
|
15431
|
-
|
|
15985
|
+
return {
|
|
15986
|
+
...oneBlock(full),
|
|
15987
|
+
structuredContent: {
|
|
15988
|
+
startUrl: d.startUrl ?? input.url,
|
|
15989
|
+
totalFound: d.totalFound ?? urls.length,
|
|
15990
|
+
truncated: d.truncated === true,
|
|
15991
|
+
okCount: ok.length,
|
|
15992
|
+
redirectCount: redirects.length,
|
|
15993
|
+
brokenCount: broken.length,
|
|
15994
|
+
urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
|
|
15995
|
+
durationMs: d.durationMs ?? 0
|
|
15996
|
+
}
|
|
15997
|
+
};
|
|
15432
15998
|
}
|
|
15433
15999
|
function formatExtractSite(raw, input) {
|
|
15434
16000
|
const parsed = parseData(raw);
|
|
@@ -15453,14 +16019,19 @@ ${pageRows}`,
|
|
|
15453
16019
|
- Map URLs first: use \`map_site_urls\`
|
|
15454
16020
|
- Inspect a single page: use \`extract_url\``
|
|
15455
16021
|
].join("\n");
|
|
15456
|
-
|
|
15457
|
-
|
|
15458
|
-
|
|
15459
|
-
|
|
15460
|
-
|
|
15461
|
-
|
|
15462
|
-
|
|
15463
|
-
|
|
16022
|
+
return {
|
|
16023
|
+
...oneBlock(full),
|
|
16024
|
+
structuredContent: {
|
|
16025
|
+
url: input.url,
|
|
16026
|
+
pageCount: pages.length,
|
|
16027
|
+
pages: pages.map((p) => ({
|
|
16028
|
+
url: String(p.url ?? ""),
|
|
16029
|
+
title: p.title ?? null,
|
|
16030
|
+
schemaTypes: p.kpo?.type ?? []
|
|
16031
|
+
})),
|
|
16032
|
+
durationMs: d.durationMs ?? 0
|
|
16033
|
+
}
|
|
16034
|
+
};
|
|
15464
16035
|
}
|
|
15465
16036
|
function formatYoutubeHarvest(raw, input) {
|
|
15466
16037
|
const parsed = parseData(raw);
|
|
@@ -15490,16 +16061,22 @@ ${videoRows}`,
|
|
|
15490
16061
|
- Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
|
|
15491
16062
|
- Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
|
|
15492
16063
|
].filter(Boolean).join("\n");
|
|
15493
|
-
|
|
15494
|
-
|
|
15495
|
-
|
|
15496
|
-
|
|
15497
|
-
|
|
15498
|
-
|
|
15499
|
-
|
|
15500
|
-
|
|
15501
|
-
|
|
15502
|
-
|
|
16064
|
+
return {
|
|
16065
|
+
...oneBlock(full),
|
|
16066
|
+
structuredContent: {
|
|
16067
|
+
mode: input.mode,
|
|
16068
|
+
videoCount: videos.length,
|
|
16069
|
+
channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
|
|
16070
|
+
videos: videos.map((v) => ({
|
|
16071
|
+
videoId: String(v.videoId ?? ""),
|
|
16072
|
+
title: String(v.title ?? ""),
|
|
16073
|
+
channelName: v.channelName ?? null,
|
|
16074
|
+
views: v.views ?? null,
|
|
16075
|
+
duration: v.duration ?? null,
|
|
16076
|
+
url: v.url ?? null
|
|
16077
|
+
}))
|
|
16078
|
+
}
|
|
16079
|
+
};
|
|
15503
16080
|
}
|
|
15504
16081
|
function formatYoutubeTranscribe(raw, input) {
|
|
15505
16082
|
const parsed = parseData(raw);
|
|
@@ -15529,14 +16106,6 @@ ${chunkRows}` : "",
|
|
|
15529
16106
|
---
|
|
15530
16107
|
\u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
|
|
15531
16108
|
].filter(Boolean).join("\n");
|
|
15532
|
-
const summary = [
|
|
15533
|
-
`**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
15534
|
-
`
|
|
15535
|
-
**Preview:**
|
|
15536
|
-
> ${truncate(text, 300)}`,
|
|
15537
|
-
`
|
|
15538
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
15539
|
-
].join("\n");
|
|
15540
16109
|
return oneBlock(full);
|
|
15541
16110
|
}
|
|
15542
16111
|
function formatFacebookPageIntel(raw, input) {
|
|
@@ -15565,19 +16134,26 @@ ${adBlocks}`,
|
|
|
15565
16134
|
- Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
|
|
15566
16135
|
- Find other advertisers: use \`facebook_ad_search\``
|
|
15567
16136
|
].filter(Boolean).join("\n");
|
|
15568
|
-
|
|
15569
|
-
|
|
15570
|
-
|
|
15571
|
-
|
|
15572
|
-
|
|
15573
|
-
|
|
15574
|
-
|
|
15575
|
-
|
|
15576
|
-
|
|
15577
|
-
|
|
15578
|
-
|
|
15579
|
-
|
|
15580
|
-
|
|
16137
|
+
return {
|
|
16138
|
+
...oneBlock(full),
|
|
16139
|
+
structuredContent: {
|
|
16140
|
+
advertiserName: d.advertiserName ?? null,
|
|
16141
|
+
totalAds: s.totalAds ?? 0,
|
|
16142
|
+
activeCount: s.activeCount ?? 0,
|
|
16143
|
+
videoCount: s.videoCount ?? 0,
|
|
16144
|
+
imageCount: s.imageCount ?? 0,
|
|
16145
|
+
ads: ads.map((ad) => ({
|
|
16146
|
+
libraryId: ad.libraryId ?? null,
|
|
16147
|
+
status: ad.status ?? null,
|
|
16148
|
+
creativeType: ad.creativeType ?? null,
|
|
16149
|
+
headline: ad.headline ?? null,
|
|
16150
|
+
cta: ad.cta ?? null,
|
|
16151
|
+
startDate: ad.startDate ?? null,
|
|
16152
|
+
videoUrl: ad.videoUrl ?? null,
|
|
16153
|
+
variations: typeof ad.variations === "number" ? ad.variations : null
|
|
16154
|
+
}))
|
|
16155
|
+
}
|
|
16156
|
+
};
|
|
15581
16157
|
}
|
|
15582
16158
|
function formatFacebookAdSearch(raw, input) {
|
|
15583
16159
|
const parsed = parseData(raw);
|
|
@@ -15601,15 +16177,18 @@ ${rows}`,
|
|
|
15601
16177
|
- Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
|
|
15602
16178
|
- Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
|
|
15603
16179
|
].join("\n");
|
|
15604
|
-
|
|
15605
|
-
|
|
15606
|
-
|
|
15607
|
-
|
|
15608
|
-
|
|
15609
|
-
|
|
15610
|
-
|
|
15611
|
-
|
|
15612
|
-
|
|
16180
|
+
return {
|
|
16181
|
+
...oneBlock(full),
|
|
16182
|
+
structuredContent: {
|
|
16183
|
+
query: input.query,
|
|
16184
|
+
advertiserCount: advertisers.length,
|
|
16185
|
+
advertisers: advertisers.map((a) => ({
|
|
16186
|
+
name: a.pageName ?? a.name ?? null,
|
|
16187
|
+
adCount: typeof a.adCount === "number" ? a.adCount : null,
|
|
16188
|
+
libraryId: a.sampleLibraryId ?? a.libraryId ?? null
|
|
16189
|
+
}))
|
|
16190
|
+
}
|
|
16191
|
+
};
|
|
15613
16192
|
}
|
|
15614
16193
|
function formatCreditsInfo(raw, input) {
|
|
15615
16194
|
const parsed = parseData(raw);
|
|
@@ -15649,14 +16228,75 @@ ${costRows}` : "",
|
|
|
15649
16228
|
|------|-----------|---------|-------------|
|
|
15650
16229
|
${ledgerRows}` : ""
|
|
15651
16230
|
].filter(Boolean).join("\n");
|
|
15652
|
-
|
|
15653
|
-
|
|
15654
|
-
|
|
15655
|
-
|
|
15656
|
-
|
|
15657
|
-
|
|
16231
|
+
return {
|
|
16232
|
+
...oneBlock(full),
|
|
16233
|
+
structuredContent: {
|
|
16234
|
+
balanceCredits: typeof balance === "number" ? balance : null,
|
|
16235
|
+
matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
|
|
16236
|
+
costs: costs.map((c) => ({
|
|
16237
|
+
key: c.key,
|
|
16238
|
+
label: c.label,
|
|
16239
|
+
credits: c.credits,
|
|
16240
|
+
unit: c.unit,
|
|
16241
|
+
notes: c.notes ?? null
|
|
16242
|
+
})),
|
|
16243
|
+
ledger: ledger.map((row) => ({
|
|
16244
|
+
createdAt: String(row.created_at ?? ""),
|
|
16245
|
+
operation: String(row.operation ?? ""),
|
|
16246
|
+
credits: row.amount_mc / 1e3,
|
|
16247
|
+
description: row.description ?? null
|
|
16248
|
+
}))
|
|
16249
|
+
}
|
|
16250
|
+
};
|
|
16251
|
+
}
|
|
16252
|
+
function formatMapsSearch(raw, input) {
|
|
16253
|
+
const parsed = parseData(raw);
|
|
16254
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
16255
|
+
const d = parsed.data;
|
|
16256
|
+
const results = d.results ?? [];
|
|
16257
|
+
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
16258
|
+
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
16259
|
+
const durationMs = d.durationMs;
|
|
16260
|
+
const rows = results.map((r) => {
|
|
16261
|
+
const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
|
|
16262
|
+
return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
|
|
16263
|
+
}).join("\n");
|
|
16264
|
+
const metadataSection = results.length ? `
|
|
16265
|
+
## Candidate Metadata
|
|
16266
|
+
${results.map((r) => {
|
|
16267
|
+
const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
|
|
16268
|
+
return `### ${r.position}. ${r.name}
|
|
16269
|
+
${meta}`;
|
|
16270
|
+
}).join("\n\n")}` : "";
|
|
16271
|
+
const full = [
|
|
16272
|
+
`# Google Maps Search: "${searchQuery}"`,
|
|
16273
|
+
`**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
|
|
16274
|
+
`
|
|
16275
|
+
## Results
|
|
16276
|
+
| # | Name | Category | Rating | Address | CID | Website | Maps |
|
|
16277
|
+
|---|------|----------|--------|---------|-----|---------|------|
|
|
16278
|
+
${rows}`,
|
|
16279
|
+
metadataSection,
|
|
16280
|
+
`
|
|
16281
|
+
---
|
|
16282
|
+
\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
|
|
16283
|
+
durationMs != null ? `
|
|
16284
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
15658
16285
|
].filter(Boolean).join("\n");
|
|
15659
|
-
return
|
|
16286
|
+
return {
|
|
16287
|
+
...oneBlock(full),
|
|
16288
|
+
structuredContent: {
|
|
16289
|
+
query: d.query,
|
|
16290
|
+
location: d.location ?? null,
|
|
16291
|
+
searchQuery: d.searchQuery,
|
|
16292
|
+
searchUrl: d.searchUrl,
|
|
16293
|
+
extractedAt: d.extractedAt,
|
|
16294
|
+
requestedMaxResults: requestedMax,
|
|
16295
|
+
resultCount: results.length,
|
|
16296
|
+
results,
|
|
16297
|
+
durationMs: durationMs ?? 0
|
|
16298
|
+
}
|
|
16299
|
+
};
|
|
15660
16300
|
}
|
|
15661
16301
|
function formatMapsPlaceIntel(raw, input) {
|
|
15662
16302
|
const parsed = parseData(raw);
|
|
@@ -15756,20 +16396,28 @@ ${entitySection}` : null,
|
|
|
15756
16396
|
---
|
|
15757
16397
|
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
15758
16398
|
].filter(Boolean).join("\n");
|
|
15759
|
-
|
|
15760
|
-
|
|
15761
|
-
|
|
15762
|
-
|
|
15763
|
-
|
|
15764
|
-
|
|
15765
|
-
|
|
15766
|
-
|
|
15767
|
-
|
|
15768
|
-
|
|
15769
|
-
|
|
15770
|
-
|
|
15771
|
-
|
|
15772
|
-
|
|
16399
|
+
return {
|
|
16400
|
+
...oneBlock(full),
|
|
16401
|
+
structuredContent: {
|
|
16402
|
+
name,
|
|
16403
|
+
rating: rating ?? null,
|
|
16404
|
+
reviewCount: reviewCount ?? null,
|
|
16405
|
+
category: category ?? null,
|
|
16406
|
+
address: address ?? null,
|
|
16407
|
+
phone: phone ?? null,
|
|
16408
|
+
website: website ?? null,
|
|
16409
|
+
hoursSummary: hoursSummary ?? null,
|
|
16410
|
+
bookingUrl: bookingUrl ?? null,
|
|
16411
|
+
kgmid: kgmid ?? null,
|
|
16412
|
+
cidDecimal: cidDecimal ?? null,
|
|
16413
|
+
cidUrl: cidUrl ?? null,
|
|
16414
|
+
lat: lat ?? null,
|
|
16415
|
+
lng: lng ?? null,
|
|
16416
|
+
reviewsStatus,
|
|
16417
|
+
reviewsCollected: reviews.length,
|
|
16418
|
+
reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
|
|
16419
|
+
}
|
|
16420
|
+
};
|
|
15773
16421
|
}
|
|
15774
16422
|
function formatFacebookAdTranscribe(raw, input) {
|
|
15775
16423
|
const parsed = parseData(raw);
|
|
@@ -15799,76 +16447,129 @@ ${chunkRows}` : "",
|
|
|
15799
16447
|
---
|
|
15800
16448
|
\u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
|
|
15801
16449
|
].filter(Boolean).join("\n");
|
|
15802
|
-
const summary = [
|
|
15803
|
-
`**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
15804
|
-
`
|
|
15805
|
-
**Preview:**
|
|
15806
|
-
> ${truncate(text, 300)}`,
|
|
15807
|
-
`
|
|
15808
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
15809
|
-
].join("\n");
|
|
15810
16450
|
return oneBlock(full);
|
|
15811
16451
|
}
|
|
15812
|
-
var import_node_fs4, import_node_os3, import_node_path6;
|
|
16452
|
+
var import_node_fs4, import_node_os3, import_node_path6, reportSavingEnabled;
|
|
15813
16453
|
var init_mcp_response_formatter = __esm({
|
|
15814
16454
|
"src/mcp/mcp-response-formatter.ts"() {
|
|
15815
16455
|
"use strict";
|
|
15816
16456
|
import_node_fs4 = require("fs");
|
|
15817
16457
|
import_node_os3 = require("os");
|
|
15818
16458
|
import_node_path6 = require("path");
|
|
16459
|
+
init_errors();
|
|
16460
|
+
reportSavingEnabled = true;
|
|
15819
16461
|
}
|
|
15820
16462
|
});
|
|
15821
16463
|
|
|
15822
16464
|
// src/mcp/paa-mcp-server.ts
|
|
15823
|
-
function
|
|
15824
|
-
|
|
16465
|
+
function liveWebToolAnnotations(title) {
|
|
16466
|
+
return {
|
|
16467
|
+
title,
|
|
16468
|
+
readOnlyHint: true,
|
|
16469
|
+
destructiveHint: false,
|
|
16470
|
+
idempotentHint: false,
|
|
16471
|
+
openWorldHint: true
|
|
16472
|
+
};
|
|
16473
|
+
}
|
|
16474
|
+
function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
16475
|
+
const savesReports = options.savesReportsLocally !== false;
|
|
16476
|
+
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
16477
|
+
const withReportNote = (description) => `${description}${reportNote}`;
|
|
16478
|
+
const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
15825
16479
|
server.registerTool("harvest_paa", {
|
|
15826
|
-
|
|
15827
|
-
|
|
16480
|
+
title: "Google PAA + SERP Harvest",
|
|
16481
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
16482
|
+
inputSchema: HarvestPaaInputSchema,
|
|
16483
|
+
outputSchema: HarvestPaaOutputSchema,
|
|
16484
|
+
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
15828
16485
|
}, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
|
|
15829
16486
|
server.registerTool("search_serp", {
|
|
15830
|
-
|
|
15831
|
-
|
|
16487
|
+
title: "Google SERP Lookup",
|
|
16488
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
16489
|
+
inputSchema: SearchSerpInputSchema,
|
|
16490
|
+
outputSchema: SearchSerpOutputSchema,
|
|
16491
|
+
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
15832
16492
|
}, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
|
|
15833
16493
|
server.registerTool("extract_url", {
|
|
15834
|
-
|
|
15835
|
-
|
|
16494
|
+
title: "Single URL Extract",
|
|
16495
|
+
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
16496
|
+
inputSchema: ExtractUrlInputSchema,
|
|
16497
|
+
outputSchema: ExtractUrlOutputSchema,
|
|
16498
|
+
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
15836
16499
|
}, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
|
|
15837
16500
|
server.registerTool("map_site_urls", {
|
|
15838
|
-
|
|
15839
|
-
|
|
16501
|
+
title: "Site URL Map",
|
|
16502
|
+
description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
|
|
16503
|
+
inputSchema: MapSiteUrlsInputSchema,
|
|
16504
|
+
outputSchema: MapSiteUrlsOutputSchema,
|
|
16505
|
+
annotations: liveWebToolAnnotations("Site URL Map")
|
|
15840
16506
|
}, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
|
|
15841
16507
|
server.registerTool("extract_site", {
|
|
15842
|
-
|
|
15843
|
-
|
|
16508
|
+
title: "Multi-Page Site Extract",
|
|
16509
|
+
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
16510
|
+
inputSchema: ExtractSiteInputSchema,
|
|
16511
|
+
outputSchema: ExtractSiteOutputSchema,
|
|
16512
|
+
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
15844
16513
|
}, async (input) => formatExtractSite(await executor.extractSite(input), input));
|
|
15845
16514
|
server.registerTool("youtube_harvest", {
|
|
15846
|
-
|
|
15847
|
-
|
|
16515
|
+
title: "YouTube Video Harvest",
|
|
16516
|
+
description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
|
|
16517
|
+
inputSchema: YoutubeHarvestInputSchema,
|
|
16518
|
+
outputSchema: YoutubeHarvestOutputSchema,
|
|
16519
|
+
annotations: liveWebToolAnnotations("YouTube Video Harvest")
|
|
15848
16520
|
}, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
|
|
15849
16521
|
server.registerTool("youtube_transcribe", {
|
|
15850
|
-
|
|
15851
|
-
|
|
16522
|
+
title: "YouTube Transcription",
|
|
16523
|
+
description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
|
|
16524
|
+
inputSchema: YoutubeTranscribeInputSchema,
|
|
16525
|
+
annotations: liveWebToolAnnotations("YouTube Transcription")
|
|
15852
16526
|
}, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
|
|
15853
16527
|
server.registerTool("facebook_page_intel", {
|
|
15854
|
-
|
|
15855
|
-
|
|
16528
|
+
title: "Facebook Advertiser Ad Intel",
|
|
16529
|
+
description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
|
|
16530
|
+
inputSchema: FacebookPageIntelInputSchema,
|
|
16531
|
+
outputSchema: FacebookPageIntelOutputSchema,
|
|
16532
|
+
annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
|
|
15856
16533
|
}, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
|
|
15857
16534
|
server.registerTool("facebook_ad_search", {
|
|
15858
|
-
|
|
15859
|
-
|
|
16535
|
+
title: "Facebook Ad Library Search",
|
|
16536
|
+
description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
|
|
16537
|
+
inputSchema: FacebookAdSearchInputSchema,
|
|
16538
|
+
outputSchema: FacebookAdSearchOutputSchema,
|
|
16539
|
+
annotations: liveWebToolAnnotations("Facebook Ad Library Search")
|
|
15860
16540
|
}, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
|
|
15861
16541
|
server.registerTool("facebook_ad_transcribe", {
|
|
16542
|
+
title: "Facebook Ad Transcription",
|
|
15862
16543
|
description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
|
|
15863
|
-
inputSchema: FacebookAdTranscribeInputSchema
|
|
16544
|
+
inputSchema: FacebookAdTranscribeInputSchema,
|
|
16545
|
+
annotations: liveWebToolAnnotations("Facebook Ad Transcription")
|
|
15864
16546
|
}, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
|
|
15865
16547
|
server.registerTool("maps_place_intel", {
|
|
15866
|
-
|
|
15867
|
-
|
|
16548
|
+
title: "Google Maps Business Profile Details",
|
|
16549
|
+
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
16550
|
+
inputSchema: MapsPlaceIntelInputSchema,
|
|
16551
|
+
outputSchema: MapsPlaceIntelOutputSchema,
|
|
16552
|
+
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
15868
16553
|
}, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
|
|
16554
|
+
server.registerTool("maps_search", {
|
|
16555
|
+
title: "Google Maps Business Search",
|
|
16556
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
16557
|
+
inputSchema: MapsSearchInputSchema,
|
|
16558
|
+
outputSchema: MapsSearchOutputSchema,
|
|
16559
|
+
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
16560
|
+
}, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
|
|
15869
16561
|
server.registerTool("credits_info", {
|
|
16562
|
+
title: "MCP Scraper Credits & Costs",
|
|
15870
16563
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
15871
|
-
inputSchema: CreditsInfoInputSchema
|
|
16564
|
+
inputSchema: CreditsInfoInputSchema,
|
|
16565
|
+
outputSchema: CreditsInfoOutputSchema,
|
|
16566
|
+
annotations: {
|
|
16567
|
+
title: "MCP Scraper Credits & Costs",
|
|
16568
|
+
readOnlyHint: true,
|
|
16569
|
+
destructiveHint: false,
|
|
16570
|
+
idempotentHint: true,
|
|
16571
|
+
openWorldHint: false
|
|
16572
|
+
}
|
|
15872
16573
|
}, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
|
|
15873
16574
|
return server;
|
|
15874
16575
|
}
|
|
@@ -15877,6 +16578,7 @@ var init_paa_mcp_server = __esm({
|
|
|
15877
16578
|
"src/mcp/paa-mcp-server.ts"() {
|
|
15878
16579
|
"use strict";
|
|
15879
16580
|
import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
16581
|
+
init_version();
|
|
15880
16582
|
init_mcp_tool_schemas();
|
|
15881
16583
|
init_mcp_response_formatter();
|
|
15882
16584
|
}
|
|
@@ -15976,6 +16678,9 @@ var init_http_mcp_tool_executor = __esm({
|
|
|
15976
16678
|
mapsPlaceIntel(input) {
|
|
15977
16679
|
return this.call("/maps/place", input);
|
|
15978
16680
|
}
|
|
16681
|
+
mapsSearch(input) {
|
|
16682
|
+
return this.call("/maps/search", input);
|
|
16683
|
+
}
|
|
15979
16684
|
creditsInfo(input) {
|
|
15980
16685
|
return this.call("/billing/credits", input);
|
|
15981
16686
|
}
|
|
@@ -16015,15 +16720,18 @@ async function requireMcpCallerKey(c) {
|
|
|
16015
16720
|
return callerKey;
|
|
16016
16721
|
}
|
|
16017
16722
|
function registerSerpIntelligenceCaptureTools(server, executor) {
|
|
16018
|
-
const serpExecutor = executor;
|
|
16019
16723
|
server.registerTool("capture_serp_snapshot", {
|
|
16724
|
+
title: "SERP Intelligence Snapshot",
|
|
16020
16725
|
description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized residential proxy evidence, configured for the static residential proxy, and none only for direct-network debugging. Set debug true when investigating location evidence, proxy behavior, CAPTCHA, or capture reliability.",
|
|
16021
|
-
inputSchema: CaptureSerpSnapshotInputSchema
|
|
16022
|
-
|
|
16726
|
+
inputSchema: CaptureSerpSnapshotInputSchema,
|
|
16727
|
+
annotations: liveWebToolAnnotations("SERP Intelligence Snapshot")
|
|
16728
|
+
}, async (input) => executor.captureSerpSnapshot(input));
|
|
16023
16729
|
server.registerTool("capture_serp_page_snapshots", {
|
|
16730
|
+
title: "SERP Intelligence Page Snapshots",
|
|
16024
16731
|
description: "Capture public ranking-page evidence through POST /serp-intelligence/page-snapshots, the same product page snapshot path used by Phoenix. Provide urls for simple captures or targets when preserving organic, AI citation, local-pack, configured target, or site-subject source metadata. Private IPs, localhost, file URLs, and internal URLs are rejected by the service. Use timeoutMs for slow pages and debug true for sanitized proxy/browser diagnostics.",
|
|
16025
|
-
inputSchema: CaptureSerpPageSnapshotsInputSchema
|
|
16026
|
-
|
|
16732
|
+
inputSchema: CaptureSerpPageSnapshotsInputSchema,
|
|
16733
|
+
annotations: liveWebToolAnnotations("SERP Intelligence Page Snapshots")
|
|
16734
|
+
}, async (input) => executor.captureSerpPageSnapshots(input));
|
|
16027
16735
|
}
|
|
16028
16736
|
var import_hono7, import_webStandardStreamableHttp, mcpApp;
|
|
16029
16737
|
var init_mcp_routes = __esm({
|
|
@@ -16033,8 +16741,10 @@ var init_mcp_routes = __esm({
|
|
|
16033
16741
|
import_webStandardStreamableHttp = require("@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js");
|
|
16034
16742
|
init_paa_mcp_server();
|
|
16035
16743
|
init_http_mcp_tool_executor();
|
|
16744
|
+
init_mcp_response_formatter();
|
|
16036
16745
|
init_db();
|
|
16037
16746
|
init_mcp_tool_schemas();
|
|
16747
|
+
configureReportSaving(false);
|
|
16038
16748
|
mcpApp = new import_hono7.Hono();
|
|
16039
16749
|
mcpApp.all("/", async (c) => {
|
|
16040
16750
|
try {
|
|
@@ -16047,7 +16757,7 @@ var init_mcp_routes = __esm({
|
|
|
16047
16757
|
sessionIdGenerator: void 0,
|
|
16048
16758
|
enableJsonResponse: true
|
|
16049
16759
|
});
|
|
16050
|
-
const server = buildPaaExtractorMcpServer(executor);
|
|
16760
|
+
const server = buildPaaExtractorMcpServer(executor, { savesReportsLocally: false });
|
|
16051
16761
|
registerSerpIntelligenceCaptureTools(server, executor);
|
|
16052
16762
|
await server.connect(transport);
|
|
16053
16763
|
return transport.handleRequest(c.req.raw);
|
|
@@ -16432,7 +17142,7 @@ async function processJob(job) {
|
|
|
16432
17142
|
const opts = typeof job.options === "string" ? JSON.parse(job.options) : job.options;
|
|
16433
17143
|
const result = await harvest({
|
|
16434
17144
|
...opts,
|
|
16435
|
-
kernelApiKey:
|
|
17145
|
+
kernelApiKey: browserServiceApiKey(),
|
|
16436
17146
|
headless: true,
|
|
16437
17147
|
format: "json",
|
|
16438
17148
|
outputDir: "/tmp/paa-output-api",
|
|
@@ -16497,6 +17207,7 @@ var init_worker = __esm({
|
|
|
16497
17207
|
"src/api/worker.ts"() {
|
|
16498
17208
|
"use strict";
|
|
16499
17209
|
init_db();
|
|
17210
|
+
init_browser_service_env();
|
|
16500
17211
|
init_harvest();
|
|
16501
17212
|
init_webhook();
|
|
16502
17213
|
init_rates();
|
|
@@ -16599,6 +17310,8 @@ var init_server = __esm({
|
|
|
16599
17310
|
"src/api/server.ts"() {
|
|
16600
17311
|
"use strict";
|
|
16601
17312
|
init_harvest_timeout();
|
|
17313
|
+
init_browser_service_env();
|
|
17314
|
+
init_outbound_sanitize();
|
|
16602
17315
|
init_registry();
|
|
16603
17316
|
init_template();
|
|
16604
17317
|
init_og();
|
|
@@ -16915,7 +17628,7 @@ var init_server = __esm({
|
|
|
16915
17628
|
try {
|
|
16916
17629
|
const result = await harvest({
|
|
16917
17630
|
...options,
|
|
16918
|
-
kernelApiKey:
|
|
17631
|
+
kernelApiKey: browserServiceApiKey(),
|
|
16919
17632
|
headless: true,
|
|
16920
17633
|
format: "json",
|
|
16921
17634
|
outputDir: "/tmp/paa-output-api",
|
|
@@ -16930,7 +17643,7 @@ var init_server = __esm({
|
|
|
16930
17643
|
if (diff > 0) await creditMc(user.id, diff, LedgerOperation.PAA_REFUND, "overestimate refund");
|
|
16931
17644
|
else if (diff < 0) await debitMc(user.id, -diff, LedgerOperation.PAA, options.query);
|
|
16932
17645
|
}
|
|
16933
|
-
return c.json({ job_id: jobId, status: "done", result, attempts });
|
|
17646
|
+
return c.json({ job_id: jobId, status: "done", result: sanitizeHarvestResult(result), attempts: sanitizeAttempts(attempts) });
|
|
16934
17647
|
} catch (err) {
|
|
16935
17648
|
const problem = classifyHarvestProblem(err);
|
|
16936
17649
|
const response = harvestProblemResponse(problem);
|
|
@@ -16938,18 +17651,19 @@ var init_server = __esm({
|
|
|
16938
17651
|
if (problem.terminalStatus === "cancelled" || c.req.raw.signal.aborted) {
|
|
16939
17652
|
await cancelJob(jobId, serializeHarvestProblem(problem));
|
|
16940
17653
|
await creditMc(user.id, syncCost, LedgerOperation.REFUND, "cancelled call");
|
|
16941
|
-
return c.json({ job_id: jobId, status: "cancelled", ...response, attempts }, problem.httpStatus);
|
|
17654
|
+
return c.json({ job_id: jobId, status: "cancelled", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
|
|
16942
17655
|
}
|
|
16943
17656
|
await failJob(jobId, serializeHarvestProblem(problem));
|
|
16944
17657
|
await creditMc(user.id, syncCost, LedgerOperation.REFUND, "failed call");
|
|
16945
|
-
return c.json({ job_id: jobId, status: "failed", ...response, attempts }, problem.httpStatus);
|
|
17658
|
+
return c.json({ job_id: jobId, status: "failed", ...response, attempts: sanitizeAttempts(attempts) }, problem.httpStatus);
|
|
16946
17659
|
}
|
|
16947
17660
|
});
|
|
16948
17661
|
app.get("/jobs/:id", auth, async (c) => {
|
|
16949
17662
|
const job = await getJob(c.req.param("id"), c.get("user").id);
|
|
16950
17663
|
if (!job) return c.json({ error: "Job not found" }, 404);
|
|
16951
17664
|
const attempts = await listHarvestAttempts(job.id, c.get("user").id);
|
|
16952
|
-
|
|
17665
|
+
const safeResult = job.result && typeof job.result === "object" ? sanitizeHarvestResult(job.result) : job.result;
|
|
17666
|
+
return c.json({ ...job, result: safeResult, attempts: sanitizeAttempts(attempts) });
|
|
16953
17667
|
});
|
|
16954
17668
|
app.get("/jobs", auth, async (c) => {
|
|
16955
17669
|
return c.json(await listJobs(c.get("user").id));
|
|
@@ -17048,7 +17762,7 @@ var init_server = __esm({
|
|
|
17048
17762
|
const { ok: euOk, balance_mc: euBal } = await debitMc(user.id, MC_COSTS.page_scrape, LedgerOperation.EXTRACT_URL, new URL(canonicalUrl).hostname);
|
|
17049
17763
|
if (!euOk) return c.json(insufficientBalanceResponse(euBal, MC_COSTS.page_scrape), 402);
|
|
17050
17764
|
try {
|
|
17051
|
-
const kernelApiKey =
|
|
17765
|
+
const kernelApiKey = browserServiceApiKey();
|
|
17052
17766
|
const device = screenshotDevice === "mobile" ? "mobile" : "desktop";
|
|
17053
17767
|
const [result, pageData] = await Promise.all([
|
|
17054
17768
|
extractKpo({ url: canonicalUrl, kernelApiKey }),
|
|
@@ -17086,7 +17800,7 @@ var init_server = __esm({
|
|
|
17086
17800
|
startUrl: parsed.href,
|
|
17087
17801
|
maxUrls: Math.min(2e3, Math.max(1, body.maxUrls ?? 500)),
|
|
17088
17802
|
concurrency: Math.min(20, Math.max(1, body.concurrency ?? 12)),
|
|
17089
|
-
kernelApiKey: body.browserFallback ?? body.kernelFallback ?
|
|
17803
|
+
kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
|
|
17090
17804
|
});
|
|
17091
17805
|
await logRequestEvent({
|
|
17092
17806
|
userId: user.id,
|
|
@@ -17126,7 +17840,7 @@ var init_server = __esm({
|
|
|
17126
17840
|
const result = await extractSite({
|
|
17127
17841
|
startUrl: parsed.href,
|
|
17128
17842
|
maxPages: Math.min(200, Math.max(1, body.maxPages ?? 100)),
|
|
17129
|
-
kernelApiKey: body.browserFallback ?? body.kernelFallback ?
|
|
17843
|
+
kernelApiKey: body.browserFallback ?? body.kernelFallback ? browserServiceApiKey() : void 0
|
|
17130
17844
|
});
|
|
17131
17845
|
const pageCount = result.pages?.length ?? 1;
|
|
17132
17846
|
const actualSiteMc = pageCount * MC_COSTS.page_scrape;
|