mcp-scraper 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/bin/api-server.cjs +573 -172
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +300 -150
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +2 -1
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +22 -1
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -1
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-4OHPDEZM.js → chunk-3OIRNUF5.js} +303 -151
- package/dist/chunk-3OIRNUF5.js.map +1 -0
- package/dist/{chunk-W4P2U5VF.js → chunk-LUBDFS67.js} +32 -32
- package/dist/chunk-LUBDFS67.js.map +1 -0
- package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
- package/dist/chunk-ZK456YXN.js.map +1 -0
- package/dist/chunk-ZMOWIBMK.js +36 -0
- package/dist/chunk-ZMOWIBMK.js.map +1 -0
- package/dist/index.cjs +22 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/{server-V5XMVRYE.js → server-YNJHP5PU.js} +235 -22
- package/dist/server-YNJHP5PU.js.map +1 -0
- package/dist/{worker-UT4ZQU2T.js → worker-PBG6LGET.js} +4 -3
- package/dist/{worker-UT4ZQU2T.js.map → worker-PBG6LGET.js.map} +1 -1
- package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
- package/docs/adr/README.md +11 -0
- package/docs/mcp-tool-quality-spec.md +238 -0
- package/package.json +5 -4
- package/dist/chunk-4OHPDEZM.js.map +0 -1
- package/dist/chunk-7HB7NDOY.js.map +0 -1
- package/dist/chunk-W4P2U5VF.js.map +0 -1
- package/dist/server-V5XMVRYE.js.map +0 -1
package/dist/bin/api-server.cjs
CHANGED
|
@@ -3468,9 +3468,9 @@ async function extractKpo(opts) {
|
|
|
3468
3468
|
redirect: "manual"
|
|
3469
3469
|
});
|
|
3470
3470
|
if (res.status >= 300 && res.status < 400) {
|
|
3471
|
-
const
|
|
3472
|
-
if (!
|
|
3473
|
-
const next = new URL(
|
|
3471
|
+
const location2 = res.headers.get("location");
|
|
3472
|
+
if (!location2) return null;
|
|
3473
|
+
const next = new URL(location2, target).href;
|
|
3474
3474
|
const checkedRedirect = await validatePublicHttpUrl(next, { field: "redirect URL" });
|
|
3475
3475
|
if (checkedRedirect.error || !checkedRedirect.parsed) return null;
|
|
3476
3476
|
target = checkedRedirect.parsed.href;
|
|
@@ -8216,6 +8216,7 @@ var init_rates = __esm({
|
|
|
8216
8216
|
yt_channel: 50,
|
|
8217
8217
|
yt_transcription: 200,
|
|
8218
8218
|
fb_ad: 50,
|
|
8219
|
+
maps_search: 2e3,
|
|
8219
8220
|
maps_place: 2e3,
|
|
8220
8221
|
maps_review: 50,
|
|
8221
8222
|
fb_search: 50,
|
|
@@ -8277,6 +8278,14 @@ var init_rates = __esm({
|
|
|
8277
8278
|
credits: mcToCredits(MC_COSTS.fb_ad),
|
|
8278
8279
|
unit: "per call"
|
|
8279
8280
|
},
|
|
8281
|
+
{
|
|
8282
|
+
key: "maps_search",
|
|
8283
|
+
label: "Maps business search",
|
|
8284
|
+
aliases: ["maps_search", "google maps search", "gmb search", "gbp search", "business profiles"],
|
|
8285
|
+
credits: mcToCredits(MC_COSTS.maps_search),
|
|
8286
|
+
unit: "per search",
|
|
8287
|
+
notes: "Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses."
|
|
8288
|
+
},
|
|
8280
8289
|
{
|
|
8281
8290
|
key: "maps_place",
|
|
8282
8291
|
label: "Maps business lookup",
|
|
@@ -8338,6 +8347,7 @@ var init_rates = __esm({
|
|
|
8338
8347
|
TRANSCRIPTION_REFUND: "transcription_refund",
|
|
8339
8348
|
YT_CHANNEL: "yt_channel",
|
|
8340
8349
|
FB_AD: "fb_ad",
|
|
8350
|
+
MAPS_SEARCH: "maps_search",
|
|
8341
8351
|
MAPS_PLACE: "maps_place",
|
|
8342
8352
|
MAPS_REVIEW: "maps_review",
|
|
8343
8353
|
MAPS_REVIEW_REFUND: "maps_review_refund",
|
|
@@ -11034,9 +11044,9 @@ function proxyName(country, state, city) {
|
|
|
11034
11044
|
function zipProxyName(zip) {
|
|
11035
11045
|
return `mcp-serp-residential-us-zip-${zip}`;
|
|
11036
11046
|
}
|
|
11037
|
-
function parseKernelLocationProxyTarget(
|
|
11038
|
-
if (!
|
|
11039
|
-
const canonicalLocation = normalizeLocation(
|
|
11047
|
+
function parseKernelLocationProxyTarget(location2, gl) {
|
|
11048
|
+
if (!location2 || gl.toLowerCase() !== "us") return null;
|
|
11049
|
+
const canonicalLocation = normalizeLocation(location2);
|
|
11040
11050
|
let parts = canonicalLocation.split(",").map((part) => part.trim()).filter(Boolean);
|
|
11041
11051
|
if (parts.length > 1 && isUnitedStates(parts[parts.length - 1])) {
|
|
11042
11052
|
parts = parts.slice(0, -1);
|
|
@@ -11687,7 +11697,7 @@ var init_facebook_ad_routes = __esm({
|
|
|
11687
11697
|
});
|
|
11688
11698
|
|
|
11689
11699
|
// src/schemas.ts
|
|
11690
|
-
var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
|
|
11700
|
+
var import_zod16, HarvestOptionsSchema, MapsPlaceOptionsSchema, MapsSearchOptionsSchema, RawPAAItemSchema, RawMapsOverviewSchema, RawMapsHoursRowSchema, RawMapsReviewStatsSchema, RawMapsReviewCardSchema, RawMapsAboutAttributeSchema;
|
|
11691
11701
|
var init_schemas3 = __esm({
|
|
11692
11702
|
"src/schemas.ts"() {
|
|
11693
11703
|
"use strict";
|
|
@@ -11725,6 +11735,16 @@ var init_schemas3 = __esm({
|
|
|
11725
11735
|
kernelProxyId: import_zod16.z.string().optional(),
|
|
11726
11736
|
headless: import_zod16.z.boolean().default(true)
|
|
11727
11737
|
});
|
|
11738
|
+
MapsSearchOptionsSchema = import_zod16.z.object({
|
|
11739
|
+
query: import_zod16.z.string().min(1),
|
|
11740
|
+
location: import_zod16.z.string().optional(),
|
|
11741
|
+
gl: import_zod16.z.string().length(2).default("us"),
|
|
11742
|
+
hl: import_zod16.z.string().length(2).default("en"),
|
|
11743
|
+
maxResults: import_zod16.z.number().int().min(1).max(50).default(10),
|
|
11744
|
+
kernelApiKey: import_zod16.z.string().optional(),
|
|
11745
|
+
kernelProxyId: import_zod16.z.string().optional(),
|
|
11746
|
+
headless: import_zod16.z.boolean().default(true)
|
|
11747
|
+
});
|
|
11728
11748
|
RawPAAItemSchema = import_zod16.z.object({
|
|
11729
11749
|
question: import_zod16.z.string().min(1),
|
|
11730
11750
|
answer: import_zod16.z.string().optional(),
|
|
@@ -11785,8 +11805,8 @@ var init_MapsNavigator = __esm({
|
|
|
11785
11805
|
this.page = page;
|
|
11786
11806
|
}
|
|
11787
11807
|
page;
|
|
11788
|
-
async navigateToPlacePage(businessName,
|
|
11789
|
-
const query = `${businessName} ${
|
|
11808
|
+
async navigateToPlacePage(businessName, location2) {
|
|
11809
|
+
const query = `${businessName} ${location2}`;
|
|
11790
11810
|
const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(query)}`;
|
|
11791
11811
|
await this.page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 45e3 });
|
|
11792
11812
|
const onPlacePage = await this.page.evaluate(() => /\/maps\/place\//.test(window.location.href));
|
|
@@ -12230,7 +12250,172 @@ var init_MapsExtractor = __esm({
|
|
|
12230
12250
|
}
|
|
12231
12251
|
});
|
|
12232
12252
|
|
|
12253
|
+
// src/extractor/MapsSearchExtractor.ts
|
|
12254
|
+
var MAPS_SEARCH_SCROLL_BUDGET_MS, MAPS_SEARCH_SCROLL_STEP_MS, MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS, MapsSearchExtractor;
|
|
12255
|
+
var init_MapsSearchExtractor = __esm({
|
|
12256
|
+
"src/extractor/MapsSearchExtractor.ts"() {
|
|
12257
|
+
"use strict";
|
|
12258
|
+
init_errors();
|
|
12259
|
+
MAPS_SEARCH_SCROLL_BUDGET_MS = 6e4;
|
|
12260
|
+
MAPS_SEARCH_SCROLL_STEP_MS = 1200;
|
|
12261
|
+
MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS = 4;
|
|
12262
|
+
MapsSearchExtractor = class {
|
|
12263
|
+
constructor(driver) {
|
|
12264
|
+
this.driver = driver;
|
|
12265
|
+
}
|
|
12266
|
+
driver;
|
|
12267
|
+
async extract(options) {
|
|
12268
|
+
const startMs = Date.now();
|
|
12269
|
+
const searchQuery = [options.query, options.location].filter(Boolean).join(" ");
|
|
12270
|
+
const searchUrl = `https://www.google.com/maps/search/${encodeURIComponent(searchQuery)}?hl=${encodeURIComponent(options.hl)}`;
|
|
12271
|
+
const config = {
|
|
12272
|
+
headless: options.headless,
|
|
12273
|
+
kernelApiKey: options.kernelApiKey,
|
|
12274
|
+
kernelProxyId: options.kernelProxyId,
|
|
12275
|
+
viewport: { width: 1280, height: 900 },
|
|
12276
|
+
locale: `${options.hl}-${options.gl.toUpperCase()}`
|
|
12277
|
+
};
|
|
12278
|
+
try {
|
|
12279
|
+
await this.driver.launch(config);
|
|
12280
|
+
const page = this.driver.getPage();
|
|
12281
|
+
await page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 6e4 });
|
|
12282
|
+
await page.waitForTimeout(3e3);
|
|
12283
|
+
const blocked = await this.detectBlock(page);
|
|
12284
|
+
if (blocked) throw new CaptchaError(RECAPTCHA_INSTRUCTIONS);
|
|
12285
|
+
const results = await this.collectResults(page, options.maxResults);
|
|
12286
|
+
return {
|
|
12287
|
+
query: options.query,
|
|
12288
|
+
location: options.location ?? null,
|
|
12289
|
+
searchQuery,
|
|
12290
|
+
searchUrl,
|
|
12291
|
+
extractedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
12292
|
+
requestedMaxResults: options.maxResults,
|
|
12293
|
+
resultCount: results.length,
|
|
12294
|
+
results,
|
|
12295
|
+
durationMs: Date.now() - startMs
|
|
12296
|
+
};
|
|
12297
|
+
} finally {
|
|
12298
|
+
await this.driver.close();
|
|
12299
|
+
}
|
|
12300
|
+
}
|
|
12301
|
+
async detectBlock(page) {
|
|
12302
|
+
return page.evaluate(() => {
|
|
12303
|
+
const text = document.body.innerText.slice(0, 2e3);
|
|
12304
|
+
return /unusual traffic|captcha|recaptcha|about this page/i.test(text) || /\/sorry\//.test(location.href);
|
|
12305
|
+
});
|
|
12306
|
+
}
|
|
12307
|
+
async collectResults(page, maxResults) {
|
|
12308
|
+
const seen = /* @__PURE__ */ new Map();
|
|
12309
|
+
const started = Date.now();
|
|
12310
|
+
let noGrowthRounds = 0;
|
|
12311
|
+
while (Date.now() - started < MAPS_SEARCH_SCROLL_BUDGET_MS) {
|
|
12312
|
+
const before = seen.size;
|
|
12313
|
+
const batch = await this.extractVisibleResults(page);
|
|
12314
|
+
for (const result of batch) {
|
|
12315
|
+
const key = this.resultKey(result);
|
|
12316
|
+
if (!seen.has(key)) seen.set(key, { ...result, position: seen.size + 1 });
|
|
12317
|
+
if (seen.size >= maxResults) break;
|
|
12318
|
+
}
|
|
12319
|
+
if (seen.size >= maxResults) break;
|
|
12320
|
+
if (seen.size === before) noGrowthRounds += 1;
|
|
12321
|
+
else noGrowthRounds = 0;
|
|
12322
|
+
if (noGrowthRounds >= MAPS_SEARCH_MAX_NO_GROWTH_ROUNDS) break;
|
|
12323
|
+
await page.evaluate(() => {
|
|
12324
|
+
const feed = document.querySelector('[role="feed"]');
|
|
12325
|
+
if (feed) {
|
|
12326
|
+
feed.scrollTop = feed.scrollHeight;
|
|
12327
|
+
} else {
|
|
12328
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
12329
|
+
}
|
|
12330
|
+
});
|
|
12331
|
+
await page.waitForTimeout(MAPS_SEARCH_SCROLL_STEP_MS);
|
|
12332
|
+
}
|
|
12333
|
+
return [...seen.values()].slice(0, maxResults);
|
|
12334
|
+
}
|
|
12335
|
+
resultKey(result) {
|
|
12336
|
+
return result.cidDecimal ?? result.placeUrl.replace(/[?&].*$/, "") ?? result.name;
|
|
12337
|
+
}
|
|
12338
|
+
async extractVisibleResults(page) {
|
|
12339
|
+
return page.evaluate(() => {
|
|
12340
|
+
function normalizeText(value) {
|
|
12341
|
+
const text = value?.replace(/\s+/g, " ").trim() ?? "";
|
|
12342
|
+
return text || null;
|
|
12343
|
+
}
|
|
12344
|
+
function cidFromUrl(url) {
|
|
12345
|
+
const fid = url.match(/!1s(0x[0-9a-f]+):(0x[0-9a-f]+)/i);
|
|
12346
|
+
if (!fid) return { cid: null, cidDecimal: null };
|
|
12347
|
+
let cidDecimal = null;
|
|
12348
|
+
try {
|
|
12349
|
+
cidDecimal = BigInt(fid[2]).toString();
|
|
12350
|
+
} catch {
|
|
12351
|
+
}
|
|
12352
|
+
return { cid: `${fid[1]}:${fid[2]}`, cidDecimal };
|
|
12353
|
+
}
|
|
12354
|
+
function textParts(card) {
|
|
12355
|
+
if (!card) return [];
|
|
12356
|
+
const parts = [];
|
|
12357
|
+
card.querySelectorAll("div, span").forEach((el2) => {
|
|
12358
|
+
const text = Array.from(el2.childNodes).filter((node) => node.nodeType === 3).map((node) => node.textContent?.trim() ?? "").filter((text2) => text2.length > 1 && text2.length < 140).join(" ");
|
|
12359
|
+
if (text && !parts.includes(text)) parts.push(text);
|
|
12360
|
+
});
|
|
12361
|
+
return parts;
|
|
12362
|
+
}
|
|
12363
|
+
function firstMatching(parts, pattern) {
|
|
12364
|
+
const value = parts.find((part) => pattern.test(part));
|
|
12365
|
+
return value ?? null;
|
|
12366
|
+
}
|
|
12367
|
+
const out = [];
|
|
12368
|
+
const seen = /* @__PURE__ */ new Set();
|
|
12369
|
+
const anchors = Array.from(document.querySelectorAll('a[href*="/maps/place/"]'));
|
|
12370
|
+
for (const anchor of anchors) {
|
|
12371
|
+
const placeUrl = anchor.href;
|
|
12372
|
+
const stableUrl = placeUrl.replace(/[?&].*$/, "");
|
|
12373
|
+
if (seen.has(stableUrl)) continue;
|
|
12374
|
+
seen.add(stableUrl);
|
|
12375
|
+
const card = anchor.closest('.Nv2PK, [role="article"], .bfdHYd') ?? anchor.parentElement;
|
|
12376
|
+
const parts = textParts(card);
|
|
12377
|
+
const aria = normalizeText(anchor.getAttribute("aria-label"));
|
|
12378
|
+
const heading = normalizeText(card?.querySelector('.qBF1Pd, .fontHeadlineSmall, [role="heading"]')?.textContent);
|
|
12379
|
+
const name = aria ?? heading ?? parts[0] ?? stableUrl;
|
|
12380
|
+
const links = Array.from(card?.querySelectorAll("a[href]") ?? []);
|
|
12381
|
+
const websiteUrl = links.find((link) => link.href.startsWith("http") && !link.href.includes("google."))?.href ?? null;
|
|
12382
|
+
const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? null;
|
|
12383
|
+
const rating = firstMatching(parts, /^\d(?:\.\d)?$/);
|
|
12384
|
+
const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?$/);
|
|
12385
|
+
const category = parts.find((part) => !/^\d(?:\.\d)?$|^\(?[\d,]+\)?$|open|closed|directions|website/i.test(part)) ?? null;
|
|
12386
|
+
const address = parts.find((part) => /\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part)) ?? null;
|
|
12387
|
+
const { cid, cidDecimal } = cidFromUrl(placeUrl);
|
|
12388
|
+
out.push({
|
|
12389
|
+
position: out.length + 1,
|
|
12390
|
+
name,
|
|
12391
|
+
placeUrl,
|
|
12392
|
+
cid,
|
|
12393
|
+
cidDecimal,
|
|
12394
|
+
rating,
|
|
12395
|
+
reviewCount: reviewCountRaw ? reviewCountRaw.replace(/[()]/g, "") : null,
|
|
12396
|
+
category,
|
|
12397
|
+
address,
|
|
12398
|
+
websiteUrl,
|
|
12399
|
+
directionsUrl,
|
|
12400
|
+
metadata: parts.slice(0, 20)
|
|
12401
|
+
});
|
|
12402
|
+
}
|
|
12403
|
+
return out;
|
|
12404
|
+
});
|
|
12405
|
+
}
|
|
12406
|
+
};
|
|
12407
|
+
}
|
|
12408
|
+
});
|
|
12409
|
+
|
|
12233
12410
|
// src/api/maps-routes.ts
|
|
12411
|
+
function mapsErrorResponse(c, msg, errorCode) {
|
|
12412
|
+
const blocked = msg.includes("CAPTCHA") || msg.includes("blocked");
|
|
12413
|
+
return c.json({
|
|
12414
|
+
error: sanitizeVendorName(msg),
|
|
12415
|
+
error_code: blocked ? "captcha_or_blocked" : errorCode,
|
|
12416
|
+
retryable: blocked
|
|
12417
|
+
}, blocked ? 503 : 500);
|
|
12418
|
+
}
|
|
12234
12419
|
var import_hono5, mapsApp;
|
|
12235
12420
|
var init_maps_routes = __esm({
|
|
12236
12421
|
"src/api/maps-routes.ts"() {
|
|
@@ -12239,10 +12424,59 @@ var init_maps_routes = __esm({
|
|
|
12239
12424
|
init_db();
|
|
12240
12425
|
init_rates();
|
|
12241
12426
|
init_MapsExtractor();
|
|
12427
|
+
init_MapsSearchExtractor();
|
|
12242
12428
|
init_BrowserDriver();
|
|
12243
12429
|
init_schemas3();
|
|
12244
12430
|
init_api_auth();
|
|
12431
|
+
init_errors();
|
|
12245
12432
|
mapsApp = new import_hono5.Hono();
|
|
12433
|
+
mapsApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
12434
|
+
const user = c.get("user");
|
|
12435
|
+
const body = await c.req.json().catch(() => ({}));
|
|
12436
|
+
const parsed = MapsSearchOptionsSchema.safeParse({
|
|
12437
|
+
kernelApiKey: process.env.KERNEL_API_KEY,
|
|
12438
|
+
...body
|
|
12439
|
+
});
|
|
12440
|
+
if (!parsed.success) {
|
|
12441
|
+
return c.json({ error: parsed.error.issues[0]?.message ?? "Invalid request" }, 400);
|
|
12442
|
+
}
|
|
12443
|
+
const { ok, balance_mc } = await debitMc(
|
|
12444
|
+
user.id,
|
|
12445
|
+
MC_COSTS.maps_search,
|
|
12446
|
+
LedgerOperation.MAPS_SEARCH,
|
|
12447
|
+
[parsed.data.query, parsed.data.location].filter(Boolean).join(" ")
|
|
12448
|
+
);
|
|
12449
|
+
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.maps_search), 402);
|
|
12450
|
+
const driver = new BrowserDriver();
|
|
12451
|
+
const extractor = new MapsSearchExtractor(driver);
|
|
12452
|
+
try {
|
|
12453
|
+
const result = await extractor.extract(parsed.data);
|
|
12454
|
+
await logRequestEvent({
|
|
12455
|
+
userId: user.id,
|
|
12456
|
+
source: "maps_search",
|
|
12457
|
+
status: "done",
|
|
12458
|
+
query: result.searchQuery,
|
|
12459
|
+
location: parsed.data.location,
|
|
12460
|
+
resultCount: result.resultCount,
|
|
12461
|
+
result
|
|
12462
|
+
});
|
|
12463
|
+
return c.json(result);
|
|
12464
|
+
} catch (err) {
|
|
12465
|
+
await creditMc(user.id, MC_COSTS.maps_search, LedgerOperation.REFUND, "failed maps_search call");
|
|
12466
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
12467
|
+
await logRequestEvent({
|
|
12468
|
+
userId: user.id,
|
|
12469
|
+
source: "maps_search",
|
|
12470
|
+
status: "failed",
|
|
12471
|
+
query: [parsed.data.query, parsed.data.location].filter(Boolean).join(" "),
|
|
12472
|
+
location: parsed.data.location,
|
|
12473
|
+
error: msg
|
|
12474
|
+
});
|
|
12475
|
+
return mapsErrorResponse(c, msg, "maps_search_failed");
|
|
12476
|
+
} finally {
|
|
12477
|
+
await driver.close();
|
|
12478
|
+
}
|
|
12479
|
+
});
|
|
12246
12480
|
mapsApp.post("/place", createApiKeyAuth(), async (c) => {
|
|
12247
12481
|
const user = c.get("user");
|
|
12248
12482
|
const body = await c.req.json().catch(() => ({}));
|
|
@@ -12309,10 +12543,7 @@ var init_maps_routes = __esm({
|
|
|
12309
12543
|
location: parsed.data.location,
|
|
12310
12544
|
error: msg
|
|
12311
12545
|
});
|
|
12312
|
-
|
|
12313
|
-
return c.json({ error: msg }, 503);
|
|
12314
|
-
}
|
|
12315
|
-
return c.json({ error: msg }, 500);
|
|
12546
|
+
return mapsErrorResponse(c, msg, "maps_place_failed");
|
|
12316
12547
|
} finally {
|
|
12317
12548
|
await driver.close();
|
|
12318
12549
|
}
|
|
@@ -12670,8 +12901,19 @@ function addCandidate(candidates, city, region, example) {
|
|
|
12670
12901
|
}
|
|
12671
12902
|
candidates.set(key, { city: normalizedCity, regionCode, count: 1, examples: [example] });
|
|
12672
12903
|
}
|
|
12904
|
+
function decodeSerpText(text) {
|
|
12905
|
+
try {
|
|
12906
|
+
return decodeURIComponent(text);
|
|
12907
|
+
} catch {
|
|
12908
|
+
}
|
|
12909
|
+
try {
|
|
12910
|
+
return decodeURIComponent(text.replace(/%(?![0-9a-fA-F]{2})/g, "%25"));
|
|
12911
|
+
} catch {
|
|
12912
|
+
return text;
|
|
12913
|
+
}
|
|
12914
|
+
}
|
|
12673
12915
|
function scanText(candidates, text) {
|
|
12674
|
-
const normalized =
|
|
12916
|
+
const normalized = decodeSerpText(text).replace(/[+/|_-]+/g, " ");
|
|
12675
12917
|
for (const match of normalized.matchAll(CITY_STATE_RE)) {
|
|
12676
12918
|
addCandidate(candidates, match[1] ?? "", match[2] ?? "", normalized.slice(0, 180));
|
|
12677
12919
|
}
|
|
@@ -14952,8 +15194,17 @@ var init_serp_intelligence_routes = __esm({
|
|
|
14952
15194
|
}
|
|
14953
15195
|
});
|
|
14954
15196
|
|
|
15197
|
+
// src/version.ts
|
|
15198
|
+
var PACKAGE_VERSION;
|
|
15199
|
+
var init_version = __esm({
|
|
15200
|
+
"src/version.ts"() {
|
|
15201
|
+
"use strict";
|
|
15202
|
+
PACKAGE_VERSION = "0.1.7";
|
|
15203
|
+
}
|
|
15204
|
+
});
|
|
15205
|
+
|
|
14955
15206
|
// src/mcp/mcp-tool-schemas.ts
|
|
14956
|
-
var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
|
|
15207
|
+
var import_zod19, HarvestPaaInputSchema, ExtractUrlInputSchema, MapSiteUrlsInputSchema, ExtractSiteInputSchema, YoutubeHarvestInputSchema, YoutubeTranscribeInputSchema, FacebookPageIntelInputSchema, FacebookAdSearchInputSchema, FacebookAdTranscribeInputSchema, MapsPlaceIntelInputSchema, MapsSearchInputSchema, NullableString, MapsSearchOutputSchema, MapSiteUrlsOutputSchema, YoutubeHarvestOutputSchema, FacebookAdSearchOutputSchema, FacebookPageIntelOutputSchema, CreditsInfoInputSchema, SearchSerpInputSchema, CaptureSerpSnapshotInputSchema, ScreenshotInputSchema, CaptureSerpPageSnapshotsInputSchema;
|
|
14957
15208
|
var init_mcp_tool_schemas = __esm({
|
|
14958
15209
|
"src/mcp/mcp-tool-schemas.ts"() {
|
|
14959
15210
|
"use strict";
|
|
@@ -14961,7 +15212,7 @@ var init_mcp_tool_schemas = __esm({
|
|
|
14961
15212
|
HarvestPaaInputSchema = {
|
|
14962
15213
|
query: import_zod19.z.string().min(1).describe('Core search topic only. If the user says "best hvac company in Denver CO", use query="best hvac company" and location="Denver, CO". Do not include the location in query when it can be separated.'),
|
|
14963
15214
|
location: import_zod19.z.string().optional().describe('City, region, or country for geo-targeted results, inferred from the user request when present, e.g. "Denver, CO", "Tokyo, Japan", "London, UK".'),
|
|
14964
|
-
maxQuestions: import_zod19.z.number().int().min(1).max(
|
|
15215
|
+
maxQuestions: import_zod19.z.number().int().min(1).max(200).default(30).describe("Number of PAA questions to extract. Default 30. Maximum 200. Use 10 for quick probes, 30 for normal research, 100-200 when the user asks for everything/full/deep research. Larger harvests get a longer server time budget (151-200 questions \u2192 up to 280s). Credits are charged by extracted question; unused request hold is refunded."),
|
|
14965
15216
|
gl: import_zod19.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
|
|
14966
15217
|
hl: import_zod19.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
|
|
14967
15218
|
device: import_zod19.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
@@ -15018,6 +15269,93 @@ var init_mcp_tool_schemas = __esm({
|
|
|
15018
15269
|
includeReviews: import_zod19.z.boolean().default(false).describe("Whether to fetch individual review cards"),
|
|
15019
15270
|
maxReviews: import_zod19.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
|
|
15020
15271
|
};
|
|
15272
|
+
MapsSearchInputSchema = {
|
|
15273
|
+
query: import_zod19.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
|
|
15274
|
+
location: import_zod19.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
15275
|
+
gl: import_zod19.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
15276
|
+
hl: import_zod19.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
15277
|
+
maxResults: import_zod19.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
15278
|
+
};
|
|
15279
|
+
NullableString = import_zod19.z.string().nullable();
|
|
15280
|
+
MapsSearchOutputSchema = {
|
|
15281
|
+
query: import_zod19.z.string(),
|
|
15282
|
+
location: import_zod19.z.string().nullable(),
|
|
15283
|
+
searchQuery: import_zod19.z.string(),
|
|
15284
|
+
searchUrl: import_zod19.z.string().url(),
|
|
15285
|
+
extractedAt: import_zod19.z.string(),
|
|
15286
|
+
requestedMaxResults: import_zod19.z.number().int().min(1).max(50),
|
|
15287
|
+
resultCount: import_zod19.z.number().int().min(0).max(50),
|
|
15288
|
+
results: import_zod19.z.array(import_zod19.z.object({
|
|
15289
|
+
position: import_zod19.z.number().int().min(1),
|
|
15290
|
+
name: import_zod19.z.string(),
|
|
15291
|
+
placeUrl: import_zod19.z.string().url(),
|
|
15292
|
+
cid: NullableString,
|
|
15293
|
+
cidDecimal: NullableString,
|
|
15294
|
+
rating: NullableString,
|
|
15295
|
+
reviewCount: NullableString,
|
|
15296
|
+
category: NullableString,
|
|
15297
|
+
address: NullableString,
|
|
15298
|
+
websiteUrl: NullableString,
|
|
15299
|
+
directionsUrl: NullableString,
|
|
15300
|
+
metadata: import_zod19.z.array(import_zod19.z.string())
|
|
15301
|
+
})),
|
|
15302
|
+
durationMs: import_zod19.z.number().int().min(0)
|
|
15303
|
+
};
|
|
15304
|
+
MapSiteUrlsOutputSchema = {
|
|
15305
|
+
startUrl: import_zod19.z.string(),
|
|
15306
|
+
totalFound: import_zod19.z.number().int().min(0),
|
|
15307
|
+
truncated: import_zod19.z.boolean(),
|
|
15308
|
+
okCount: import_zod19.z.number().int().min(0),
|
|
15309
|
+
redirectCount: import_zod19.z.number().int().min(0),
|
|
15310
|
+
brokenCount: import_zod19.z.number().int().min(0),
|
|
15311
|
+
urls: import_zod19.z.array(import_zod19.z.object({
|
|
15312
|
+
url: import_zod19.z.string(),
|
|
15313
|
+
status: import_zod19.z.number().int().nullable()
|
|
15314
|
+
})),
|
|
15315
|
+
durationMs: import_zod19.z.number().min(0)
|
|
15316
|
+
};
|
|
15317
|
+
YoutubeHarvestOutputSchema = {
|
|
15318
|
+
mode: import_zod19.z.string(),
|
|
15319
|
+
videoCount: import_zod19.z.number().int().min(0),
|
|
15320
|
+
channel: import_zod19.z.object({
|
|
15321
|
+
title: NullableString,
|
|
15322
|
+
subscriberCount: NullableString
|
|
15323
|
+
}).nullable(),
|
|
15324
|
+
videos: import_zod19.z.array(import_zod19.z.object({
|
|
15325
|
+
videoId: import_zod19.z.string(),
|
|
15326
|
+
title: import_zod19.z.string(),
|
|
15327
|
+
channelName: NullableString,
|
|
15328
|
+
views: NullableString,
|
|
15329
|
+
duration: NullableString,
|
|
15330
|
+
url: NullableString
|
|
15331
|
+
}))
|
|
15332
|
+
};
|
|
15333
|
+
FacebookAdSearchOutputSchema = {
|
|
15334
|
+
query: import_zod19.z.string(),
|
|
15335
|
+
advertiserCount: import_zod19.z.number().int().min(0),
|
|
15336
|
+
advertisers: import_zod19.z.array(import_zod19.z.object({
|
|
15337
|
+
name: NullableString,
|
|
15338
|
+
adCount: import_zod19.z.number().int().nullable(),
|
|
15339
|
+
libraryId: NullableString
|
|
15340
|
+
}))
|
|
15341
|
+
};
|
|
15342
|
+
FacebookPageIntelOutputSchema = {
|
|
15343
|
+
advertiserName: NullableString,
|
|
15344
|
+
totalAds: import_zod19.z.number().int().min(0),
|
|
15345
|
+
activeCount: import_zod19.z.number().int().min(0),
|
|
15346
|
+
videoCount: import_zod19.z.number().int().min(0),
|
|
15347
|
+
imageCount: import_zod19.z.number().int().min(0),
|
|
15348
|
+
ads: import_zod19.z.array(import_zod19.z.object({
|
|
15349
|
+
libraryId: NullableString,
|
|
15350
|
+
status: NullableString,
|
|
15351
|
+
creativeType: NullableString,
|
|
15352
|
+
headline: NullableString,
|
|
15353
|
+
cta: NullableString,
|
|
15354
|
+
startDate: NullableString,
|
|
15355
|
+
videoUrl: NullableString,
|
|
15356
|
+
variations: import_zod19.z.number().int().nullable()
|
|
15357
|
+
}))
|
|
15358
|
+
};
|
|
15021
15359
|
CreditsInfoInputSchema = {
|
|
15022
15360
|
item: import_zod19.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
|
|
15023
15361
|
includeLedger: import_zod19.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
|
|
@@ -15066,6 +15404,14 @@ var init_mcp_tool_schemas = __esm({
|
|
|
15066
15404
|
});
|
|
15067
15405
|
|
|
15068
15406
|
// src/mcp/mcp-response-formatter.ts
|
|
15407
|
+
function configureReportSaving(enabled) {
|
|
15408
|
+
reportSavingEnabled = enabled;
|
|
15409
|
+
}
|
|
15410
|
+
function sanitizeVendorText(text) {
|
|
15411
|
+
return sanitizeVendorName(
|
|
15412
|
+
text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
|
|
15413
|
+
);
|
|
15414
|
+
}
|
|
15069
15415
|
function slugifyReportName(input) {
|
|
15070
15416
|
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
|
|
15071
15417
|
}
|
|
@@ -15077,7 +15423,7 @@ function outputBaseDir() {
|
|
|
15077
15423
|
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path6.join)((0, import_node_os3.homedir)(), "Downloads", "mcp-scraper");
|
|
15078
15424
|
}
|
|
15079
15425
|
function saveFullReport(full) {
|
|
15080
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15426
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15081
15427
|
const outDir = outputBaseDir();
|
|
15082
15428
|
try {
|
|
15083
15429
|
(0, import_node_fs4.mkdirSync)(outDir, { recursive: true });
|
|
@@ -15090,7 +15436,7 @@ function saveFullReport(full) {
|
|
|
15090
15436
|
}
|
|
15091
15437
|
}
|
|
15092
15438
|
function persistScreenshotLocally(base64, url) {
|
|
15093
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15439
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
15094
15440
|
try {
|
|
15095
15441
|
const dir = (0, import_node_path6.join)(outputBaseDir(), "screenshots");
|
|
15096
15442
|
(0, import_node_fs4.mkdirSync)(dir, { recursive: true });
|
|
@@ -15130,11 +15476,11 @@ function parseData(raw) {
|
|
|
15130
15476
|
const text = first?.type === "text" ? first.text : "";
|
|
15131
15477
|
try {
|
|
15132
15478
|
const parsed = JSON.parse(text || "{}");
|
|
15133
|
-
if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
|
|
15479
|
+
if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
|
|
15134
15480
|
const data = parsed.result ?? parsed;
|
|
15135
15481
|
return { data };
|
|
15136
15482
|
} catch {
|
|
15137
|
-
if (raw.isError) return { error: text || "Tool error" };
|
|
15483
|
+
if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
|
|
15138
15484
|
return { error: "Failed to parse tool response" };
|
|
15139
15485
|
}
|
|
15140
15486
|
}
|
|
@@ -15148,15 +15494,6 @@ function entityIdsSection(ids) {
|
|
|
15148
15494
|
## Entity IDs
|
|
15149
15495
|
${lines.join("\n")}` : "";
|
|
15150
15496
|
}
|
|
15151
|
-
function entityIdsSummaryLine(ids) {
|
|
15152
|
-
if (!ids) return "";
|
|
15153
|
-
const parts = [];
|
|
15154
|
-
if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
|
|
15155
|
-
if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
|
|
15156
|
-
if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
|
|
15157
|
-
return parts.length ? `
|
|
15158
|
-
**Entity IDs:** ${parts.join(" \xB7 ")}` : "";
|
|
15159
|
-
}
|
|
15160
15497
|
function truncate(s, max) {
|
|
15161
15498
|
if (!s) return "";
|
|
15162
15499
|
return s.length > max ? s.slice(0, max) + "\u2026" : s;
|
|
@@ -15186,7 +15523,7 @@ function debugSection(debug) {
|
|
|
15186
15523
|
if (locationEvidence) {
|
|
15187
15524
|
lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
|
|
15188
15525
|
}
|
|
15189
|
-
return lines.join("\n");
|
|
15526
|
+
return sanitizeVendorText(lines.join("\n"));
|
|
15190
15527
|
}
|
|
15191
15528
|
function errorAttemptsSection(body) {
|
|
15192
15529
|
const attempts = Array.isArray(body.attempts) ? body.attempts : [];
|
|
@@ -15240,26 +15577,12 @@ ${serpRows}` : "";
|
|
|
15240
15577
|
const tips = `
|
|
15241
15578
|
---
|
|
15242
15579
|
\u{1F4A1} **Tips**
|
|
15243
|
-
- Max questions: \`maxQuestions:
|
|
15580
|
+
- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
|
|
15244
15581
|
- Organic results only: use \`search_serp\`
|
|
15245
15582
|
- Dig into a result: use \`extract_url\` on any organic URL`;
|
|
15246
15583
|
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
15247
15584
|
|
|
15248
15585
|
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
15249
|
-
const topQ = flat.slice(0, 10).map((r, i) => `${i + 1}. ${r.question}`).join("\n");
|
|
15250
|
-
const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
|
|
15251
|
-
const summary = [
|
|
15252
|
-
`**PAA: "${input.query}"** \u2014 ${flat.length} questions extracted`,
|
|
15253
|
-
topQ ? `
|
|
15254
|
-
**Top questions:**
|
|
15255
|
-
${topQ}` : "",
|
|
15256
|
-
organic.length ? `
|
|
15257
|
-
**Top organic results:**
|
|
15258
|
-
${topO}` : "",
|
|
15259
|
-
entityIdsSummaryLine(entityIds),
|
|
15260
|
-
`
|
|
15261
|
-
\u{1F4A1} \`maxQuestions\` up to 150 \xB7Use \`extract_url\` to dig into any result`
|
|
15262
|
-
].filter(Boolean).join("\n");
|
|
15263
15586
|
return oneBlock(full);
|
|
15264
15587
|
}
|
|
15265
15588
|
function formatSearchSerp(raw, input) {
|
|
@@ -15298,18 +15621,6 @@ ${localRows}` : "";
|
|
|
15298
15621
|
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
15299
15622
|
|
|
15300
15623
|
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
15301
|
-
const topO = organic.slice(0, 5).map((r) => `${r.position}. [${r.title}](${r.url}) \u2014 ${r.domain}`).join("\n");
|
|
15302
|
-
const summary = [
|
|
15303
|
-
`**SERP: "${input.query}"** \u2014 ${organic.length} organic results`,
|
|
15304
|
-
topO ? `
|
|
15305
|
-
**Top results:**
|
|
15306
|
-
${topO}` : "",
|
|
15307
|
-
localPack.length ? `
|
|
15308
|
-
**Local Pack:** ${localPack.map((b) => b.name).join(", ")}` : "",
|
|
15309
|
-
entityIdsSummaryLine(entityIds),
|
|
15310
|
-
`
|
|
15311
|
-
\u{1F4A1} Use \`harvest_paa\` for questions \xB7 \`extract_url\` to scrape any result`
|
|
15312
|
-
].filter(Boolean).join("\n");
|
|
15313
15624
|
return oneBlock(full);
|
|
15314
15625
|
}
|
|
15315
15626
|
function formatExtractUrl(raw, input) {
|
|
@@ -15420,15 +15731,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
|
|
|
15420
15731
|
- Extract content from all pages: use \`extract_site\`
|
|
15421
15732
|
- Scrape a single page: use \`extract_url\``
|
|
15422
15733
|
].filter(Boolean).join("\n");
|
|
15423
|
-
|
|
15424
|
-
|
|
15425
|
-
|
|
15426
|
-
|
|
15427
|
-
|
|
15428
|
-
|
|
15429
|
-
|
|
15430
|
-
|
|
15431
|
-
|
|
15734
|
+
return {
|
|
15735
|
+
...oneBlock(full),
|
|
15736
|
+
structuredContent: {
|
|
15737
|
+
startUrl: d.startUrl ?? input.url,
|
|
15738
|
+
totalFound: d.totalFound ?? urls.length,
|
|
15739
|
+
truncated: d.truncated === true,
|
|
15740
|
+
okCount: ok.length,
|
|
15741
|
+
redirectCount: redirects.length,
|
|
15742
|
+
brokenCount: broken.length,
|
|
15743
|
+
urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
|
|
15744
|
+
durationMs: d.durationMs ?? 0
|
|
15745
|
+
}
|
|
15746
|
+
};
|
|
15432
15747
|
}
|
|
15433
15748
|
function formatExtractSite(raw, input) {
|
|
15434
15749
|
const parsed = parseData(raw);
|
|
@@ -15453,13 +15768,6 @@ ${pageRows}`,
|
|
|
15453
15768
|
- Map URLs first: use \`map_site_urls\`
|
|
15454
15769
|
- Inspect a single page: use \`extract_url\``
|
|
15455
15770
|
].join("\n");
|
|
15456
|
-
const summary = [
|
|
15457
|
-
`**Site Extract: ${input.url}** \u2014 ${pages.length} pages`,
|
|
15458
|
-
pages.slice(0, 5).map((p) => `- ${p.title ?? p.url}`).join("\n"),
|
|
15459
|
-
pages.length > 5 ? `- \u2026 and ${pages.length - 5} more` : "",
|
|
15460
|
-
`
|
|
15461
|
-
\u{1F4A1} Use \`extract_url\` to inspect any individual page`
|
|
15462
|
-
].filter(Boolean).join("\n");
|
|
15463
15771
|
return oneBlock(full);
|
|
15464
15772
|
}
|
|
15465
15773
|
function formatYoutubeHarvest(raw, input) {
|
|
@@ -15490,16 +15798,22 @@ ${videoRows}`,
|
|
|
15490
15798
|
- Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
|
|
15491
15799
|
- Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
|
|
15492
15800
|
].filter(Boolean).join("\n");
|
|
15493
|
-
|
|
15494
|
-
|
|
15495
|
-
|
|
15496
|
-
|
|
15497
|
-
|
|
15498
|
-
|
|
15499
|
-
|
|
15500
|
-
|
|
15501
|
-
|
|
15502
|
-
|
|
15801
|
+
return {
|
|
15802
|
+
...oneBlock(full),
|
|
15803
|
+
structuredContent: {
|
|
15804
|
+
mode: input.mode,
|
|
15805
|
+
videoCount: videos.length,
|
|
15806
|
+
channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
|
|
15807
|
+
videos: videos.map((v) => ({
|
|
15808
|
+
videoId: String(v.videoId ?? ""),
|
|
15809
|
+
title: String(v.title ?? ""),
|
|
15810
|
+
channelName: v.channelName ?? null,
|
|
15811
|
+
views: v.views ?? null,
|
|
15812
|
+
duration: v.duration ?? null,
|
|
15813
|
+
url: v.url ?? null
|
|
15814
|
+
}))
|
|
15815
|
+
}
|
|
15816
|
+
};
|
|
15503
15817
|
}
|
|
15504
15818
|
function formatYoutubeTranscribe(raw, input) {
|
|
15505
15819
|
const parsed = parseData(raw);
|
|
@@ -15529,14 +15843,6 @@ ${chunkRows}` : "",
|
|
|
15529
15843
|
---
|
|
15530
15844
|
\u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
|
|
15531
15845
|
].filter(Boolean).join("\n");
|
|
15532
|
-
const summary = [
|
|
15533
|
-
`**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
15534
|
-
`
|
|
15535
|
-
**Preview:**
|
|
15536
|
-
> ${truncate(text, 300)}`,
|
|
15537
|
-
`
|
|
15538
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
15539
|
-
].join("\n");
|
|
15540
15846
|
return oneBlock(full);
|
|
15541
15847
|
}
|
|
15542
15848
|
function formatFacebookPageIntel(raw, input) {
|
|
@@ -15565,19 +15871,26 @@ ${adBlocks}`,
|
|
|
15565
15871
|
- Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
|
|
15566
15872
|
- Find other advertisers: use \`facebook_ad_search\``
|
|
15567
15873
|
].filter(Boolean).join("\n");
|
|
15568
|
-
|
|
15569
|
-
|
|
15570
|
-
|
|
15571
|
-
|
|
15572
|
-
|
|
15573
|
-
|
|
15574
|
-
|
|
15575
|
-
|
|
15576
|
-
|
|
15577
|
-
|
|
15578
|
-
|
|
15579
|
-
|
|
15580
|
-
|
|
15874
|
+
return {
|
|
15875
|
+
...oneBlock(full),
|
|
15876
|
+
structuredContent: {
|
|
15877
|
+
advertiserName: d.advertiserName ?? null,
|
|
15878
|
+
totalAds: s.totalAds ?? 0,
|
|
15879
|
+
activeCount: s.activeCount ?? 0,
|
|
15880
|
+
videoCount: s.videoCount ?? 0,
|
|
15881
|
+
imageCount: s.imageCount ?? 0,
|
|
15882
|
+
ads: ads.map((ad) => ({
|
|
15883
|
+
libraryId: ad.libraryId ?? null,
|
|
15884
|
+
status: ad.status ?? null,
|
|
15885
|
+
creativeType: ad.creativeType ?? null,
|
|
15886
|
+
headline: ad.headline ?? null,
|
|
15887
|
+
cta: ad.cta ?? null,
|
|
15888
|
+
startDate: ad.startDate ?? null,
|
|
15889
|
+
videoUrl: ad.videoUrl ?? null,
|
|
15890
|
+
variations: typeof ad.variations === "number" ? ad.variations : null
|
|
15891
|
+
}))
|
|
15892
|
+
}
|
|
15893
|
+
};
|
|
15581
15894
|
}
|
|
15582
15895
|
function formatFacebookAdSearch(raw, input) {
|
|
15583
15896
|
const parsed = parseData(raw);
|
|
@@ -15601,15 +15914,18 @@ ${rows}`,
|
|
|
15601
15914
|
- Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
|
|
15602
15915
|
- Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
|
|
15603
15916
|
].join("\n");
|
|
15604
|
-
|
|
15605
|
-
|
|
15606
|
-
|
|
15607
|
-
|
|
15608
|
-
|
|
15609
|
-
|
|
15610
|
-
|
|
15611
|
-
|
|
15612
|
-
|
|
15917
|
+
return {
|
|
15918
|
+
...oneBlock(full),
|
|
15919
|
+
structuredContent: {
|
|
15920
|
+
query: input.query,
|
|
15921
|
+
advertiserCount: advertisers.length,
|
|
15922
|
+
advertisers: advertisers.map((a) => ({
|
|
15923
|
+
name: a.pageName ?? a.name ?? null,
|
|
15924
|
+
adCount: typeof a.adCount === "number" ? a.adCount : null,
|
|
15925
|
+
libraryId: a.sampleLibraryId ?? a.libraryId ?? null
|
|
15926
|
+
}))
|
|
15927
|
+
}
|
|
15928
|
+
};
|
|
15613
15929
|
}
|
|
15614
15930
|
function formatCreditsInfo(raw, input) {
|
|
15615
15931
|
const parsed = parseData(raw);
|
|
@@ -15648,16 +15964,58 @@ ${costRows}` : "",
|
|
|
15648
15964
|
| Date | Operation | Credits | Description |
|
|
15649
15965
|
|------|-----------|---------|-------------|
|
|
15650
15966
|
${ledgerRows}` : ""
|
|
15651
|
-
].filter(Boolean).join("\n");
|
|
15652
|
-
const summary = [
|
|
15653
|
-
`**Credit balance:** ${balance ?? "unknown"} credits`,
|
|
15654
|
-
matched ? `
|
|
15655
|
-
**${matched.label}:** ${matched.credits} credits ${matched.unit}` : null,
|
|
15656
|
-
input.includeLedger && ledger.length ? `
|
|
15657
|
-
Recent ledger entries included in the full report.` : null
|
|
15658
15967
|
].filter(Boolean).join("\n");
|
|
15659
15968
|
return oneBlock(full);
|
|
15660
15969
|
}
|
|
15970
|
+
function formatMapsSearch(raw, input) {
|
|
15971
|
+
const parsed = parseData(raw);
|
|
15972
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
15973
|
+
const d = parsed.data;
|
|
15974
|
+
const results = d.results ?? [];
|
|
15975
|
+
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
15976
|
+
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
15977
|
+
const durationMs = d.durationMs;
|
|
15978
|
+
const rows = results.map((r) => {
|
|
15979
|
+
const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
|
|
15980
|
+
return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
|
|
15981
|
+
}).join("\n");
|
|
15982
|
+
const metadataSection = results.length ? `
|
|
15983
|
+
## Candidate Metadata
|
|
15984
|
+
${results.map((r) => {
|
|
15985
|
+
const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
|
|
15986
|
+
return `### ${r.position}. ${r.name}
|
|
15987
|
+
${meta}`;
|
|
15988
|
+
}).join("\n\n")}` : "";
|
|
15989
|
+
const full = [
|
|
15990
|
+
`# Google Maps Search: "${searchQuery}"`,
|
|
15991
|
+
`**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
|
|
15992
|
+
`
|
|
15993
|
+
## Results
|
|
15994
|
+
| # | Name | Category | Rating | Address | CID | Website | Maps |
|
|
15995
|
+
|---|------|----------|--------|---------|-----|---------|------|
|
|
15996
|
+
${rows}`,
|
|
15997
|
+
metadataSection,
|
|
15998
|
+
`
|
|
15999
|
+
---
|
|
16000
|
+
\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
|
|
16001
|
+
durationMs != null ? `
|
|
16002
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
16003
|
+
].filter(Boolean).join("\n");
|
|
16004
|
+
return {
|
|
16005
|
+
...oneBlock(full),
|
|
16006
|
+
structuredContent: {
|
|
16007
|
+
query: d.query,
|
|
16008
|
+
location: d.location ?? null,
|
|
16009
|
+
searchQuery: d.searchQuery,
|
|
16010
|
+
searchUrl: d.searchUrl,
|
|
16011
|
+
extractedAt: d.extractedAt,
|
|
16012
|
+
requestedMaxResults: requestedMax,
|
|
16013
|
+
resultCount: results.length,
|
|
16014
|
+
results,
|
|
16015
|
+
durationMs: durationMs ?? 0
|
|
16016
|
+
}
|
|
16017
|
+
};
|
|
16018
|
+
}
|
|
15661
16019
|
function formatMapsPlaceIntel(raw, input) {
|
|
15662
16020
|
const parsed = parseData(raw);
|
|
15663
16021
|
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
@@ -15755,19 +16113,6 @@ ${entitySection}` : null,
|
|
|
15755
16113
|
durationMs != null ? `
|
|
15756
16114
|
---
|
|
15757
16115
|
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
15758
|
-
].filter(Boolean).join("\n");
|
|
15759
|
-
const summary = [
|
|
15760
|
-
`**${name}** \u2014 ${category ?? "Business"} \xB7 ${ratingLine || "No rating"}`,
|
|
15761
|
-
address ? `\u{1F4CD} ${address}` : null,
|
|
15762
|
-
phone ? `\u{1F4DE} ${phone}` : null,
|
|
15763
|
-
hoursSummary ? `\u{1F550} ${hoursSummary}` : null,
|
|
15764
|
-
website ? `\u{1F310} ${website}` : null,
|
|
15765
|
-
reviewsStatus === "collected" && reviews.length ? `
|
|
15766
|
-
\u{1F4AC} ${reviews.length} reviews fetched \u2014 full list in artifact above` : null,
|
|
15767
|
-
reviewsStatus === "unavailable" ? `
|
|
15768
|
-
\u26A0\uFE0F Reviews could not be retrieved this run` : null,
|
|
15769
|
-
reviewsStatus === "none_exist" ? `
|
|
15770
|
-
\u{1F4AC} No reviews on Google Maps` : null
|
|
15771
16116
|
].filter(Boolean).join("\n");
|
|
15772
16117
|
return oneBlock(full);
|
|
15773
16118
|
}
|
|
@@ -15799,76 +16144,123 @@ ${chunkRows}` : "",
|
|
|
15799
16144
|
---
|
|
15800
16145
|
\u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
|
|
15801
16146
|
].filter(Boolean).join("\n");
|
|
15802
|
-
const summary = [
|
|
15803
|
-
`**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
15804
|
-
`
|
|
15805
|
-
**Preview:**
|
|
15806
|
-
> ${truncate(text, 300)}`,
|
|
15807
|
-
`
|
|
15808
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
15809
|
-
].join("\n");
|
|
15810
16147
|
return oneBlock(full);
|
|
15811
16148
|
}
|
|
15812
|
-
var import_node_fs4, import_node_os3, import_node_path6;
|
|
16149
|
+
var import_node_fs4, import_node_os3, import_node_path6, reportSavingEnabled;
|
|
15813
16150
|
var init_mcp_response_formatter = __esm({
|
|
15814
16151
|
"src/mcp/mcp-response-formatter.ts"() {
|
|
15815
16152
|
"use strict";
|
|
15816
16153
|
import_node_fs4 = require("fs");
|
|
15817
16154
|
import_node_os3 = require("os");
|
|
15818
16155
|
import_node_path6 = require("path");
|
|
16156
|
+
init_errors();
|
|
16157
|
+
reportSavingEnabled = true;
|
|
15819
16158
|
}
|
|
15820
16159
|
});
|
|
15821
16160
|
|
|
15822
16161
|
// src/mcp/paa-mcp-server.ts
|
|
15823
|
-
function
|
|
15824
|
-
|
|
16162
|
+
function liveWebToolAnnotations(title) {
|
|
16163
|
+
return {
|
|
16164
|
+
title,
|
|
16165
|
+
readOnlyHint: true,
|
|
16166
|
+
destructiveHint: false,
|
|
16167
|
+
idempotentHint: false,
|
|
16168
|
+
openWorldHint: true
|
|
16169
|
+
};
|
|
16170
|
+
}
|
|
16171
|
+
function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
16172
|
+
const savesReports = options.savesReportsLocally !== false;
|
|
16173
|
+
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
16174
|
+
const withReportNote = (description) => `${description}${reportNote}`;
|
|
16175
|
+
const server = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
15825
16176
|
server.registerTool("harvest_paa", {
|
|
15826
|
-
|
|
15827
|
-
|
|
16177
|
+
title: "Google PAA + SERP Harvest",
|
|
16178
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
16179
|
+
inputSchema: HarvestPaaInputSchema,
|
|
16180
|
+
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
15828
16181
|
}, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
|
|
15829
16182
|
server.registerTool("search_serp", {
|
|
15830
|
-
|
|
15831
|
-
|
|
16183
|
+
title: "Google SERP Lookup",
|
|
16184
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
16185
|
+
inputSchema: SearchSerpInputSchema,
|
|
16186
|
+
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
15832
16187
|
}, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
|
|
15833
16188
|
server.registerTool("extract_url", {
|
|
15834
|
-
|
|
15835
|
-
|
|
16189
|
+
title: "Single URL Extract",
|
|
16190
|
+
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
16191
|
+
inputSchema: ExtractUrlInputSchema,
|
|
16192
|
+
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
15836
16193
|
}, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
|
|
15837
16194
|
server.registerTool("map_site_urls", {
|
|
15838
|
-
|
|
15839
|
-
|
|
16195
|
+
title: "Site URL Map",
|
|
16196
|
+
description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
|
|
16197
|
+
inputSchema: MapSiteUrlsInputSchema,
|
|
16198
|
+
outputSchema: MapSiteUrlsOutputSchema,
|
|
16199
|
+
annotations: liveWebToolAnnotations("Site URL Map")
|
|
15840
16200
|
}, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
|
|
15841
16201
|
server.registerTool("extract_site", {
|
|
15842
|
-
|
|
15843
|
-
|
|
16202
|
+
title: "Multi-Page Site Extract",
|
|
16203
|
+
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
16204
|
+
inputSchema: ExtractSiteInputSchema,
|
|
16205
|
+
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
15844
16206
|
}, async (input) => formatExtractSite(await executor.extractSite(input), input));
|
|
15845
16207
|
server.registerTool("youtube_harvest", {
|
|
15846
|
-
|
|
15847
|
-
|
|
16208
|
+
title: "YouTube Video Harvest",
|
|
16209
|
+
description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
|
|
16210
|
+
inputSchema: YoutubeHarvestInputSchema,
|
|
16211
|
+
outputSchema: YoutubeHarvestOutputSchema,
|
|
16212
|
+
annotations: liveWebToolAnnotations("YouTube Video Harvest")
|
|
15848
16213
|
}, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
|
|
15849
16214
|
server.registerTool("youtube_transcribe", {
|
|
15850
|
-
|
|
15851
|
-
|
|
16215
|
+
title: "YouTube Transcription",
|
|
16216
|
+
description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
|
|
16217
|
+
inputSchema: YoutubeTranscribeInputSchema,
|
|
16218
|
+
annotations: liveWebToolAnnotations("YouTube Transcription")
|
|
15852
16219
|
}, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
|
|
15853
16220
|
server.registerTool("facebook_page_intel", {
|
|
15854
|
-
|
|
15855
|
-
|
|
16221
|
+
title: "Facebook Advertiser Ad Intel",
|
|
16222
|
+
description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
|
|
16223
|
+
inputSchema: FacebookPageIntelInputSchema,
|
|
16224
|
+
outputSchema: FacebookPageIntelOutputSchema,
|
|
16225
|
+
annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
|
|
15856
16226
|
}, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
|
|
15857
16227
|
server.registerTool("facebook_ad_search", {
|
|
15858
|
-
|
|
15859
|
-
|
|
16228
|
+
title: "Facebook Ad Library Search",
|
|
16229
|
+
description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
|
|
16230
|
+
inputSchema: FacebookAdSearchInputSchema,
|
|
16231
|
+
outputSchema: FacebookAdSearchOutputSchema,
|
|
16232
|
+
annotations: liveWebToolAnnotations("Facebook Ad Library Search")
|
|
15860
16233
|
}, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
|
|
15861
16234
|
server.registerTool("facebook_ad_transcribe", {
|
|
16235
|
+
title: "Facebook Ad Transcription",
|
|
15862
16236
|
description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
|
|
15863
|
-
inputSchema: FacebookAdTranscribeInputSchema
|
|
16237
|
+
inputSchema: FacebookAdTranscribeInputSchema,
|
|
16238
|
+
annotations: liveWebToolAnnotations("Facebook Ad Transcription")
|
|
15864
16239
|
}, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
|
|
15865
16240
|
server.registerTool("maps_place_intel", {
|
|
15866
|
-
|
|
15867
|
-
|
|
16241
|
+
title: "Google Maps Business Profile Details",
|
|
16242
|
+
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
16243
|
+
inputSchema: MapsPlaceIntelInputSchema,
|
|
16244
|
+
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
15868
16245
|
}, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
|
|
16246
|
+
server.registerTool("maps_search", {
|
|
16247
|
+
title: "Google Maps Business Search",
|
|
16248
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
16249
|
+
inputSchema: MapsSearchInputSchema,
|
|
16250
|
+
outputSchema: MapsSearchOutputSchema,
|
|
16251
|
+
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
16252
|
+
}, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
|
|
15869
16253
|
server.registerTool("credits_info", {
|
|
16254
|
+
title: "MCP Scraper Credits & Costs",
|
|
15870
16255
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
15871
|
-
inputSchema: CreditsInfoInputSchema
|
|
16256
|
+
inputSchema: CreditsInfoInputSchema,
|
|
16257
|
+
annotations: {
|
|
16258
|
+
title: "MCP Scraper Credits & Costs",
|
|
16259
|
+
readOnlyHint: true,
|
|
16260
|
+
destructiveHint: false,
|
|
16261
|
+
idempotentHint: true,
|
|
16262
|
+
openWorldHint: false
|
|
16263
|
+
}
|
|
15872
16264
|
}, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
|
|
15873
16265
|
return server;
|
|
15874
16266
|
}
|
|
@@ -15877,6 +16269,7 @@ var init_paa_mcp_server = __esm({
|
|
|
15877
16269
|
"src/mcp/paa-mcp-server.ts"() {
|
|
15878
16270
|
"use strict";
|
|
15879
16271
|
import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
16272
|
+
init_version();
|
|
15880
16273
|
init_mcp_tool_schemas();
|
|
15881
16274
|
init_mcp_response_formatter();
|
|
15882
16275
|
}
|
|
@@ -15976,6 +16369,9 @@ var init_http_mcp_tool_executor = __esm({
|
|
|
15976
16369
|
mapsPlaceIntel(input) {
|
|
15977
16370
|
return this.call("/maps/place", input);
|
|
15978
16371
|
}
|
|
16372
|
+
mapsSearch(input) {
|
|
16373
|
+
return this.call("/maps/search", input);
|
|
16374
|
+
}
|
|
15979
16375
|
creditsInfo(input) {
|
|
15980
16376
|
return this.call("/billing/credits", input);
|
|
15981
16377
|
}
|
|
@@ -16015,15 +16411,18 @@ async function requireMcpCallerKey(c) {
|
|
|
16015
16411
|
return callerKey;
|
|
16016
16412
|
}
|
|
16017
16413
|
function registerSerpIntelligenceCaptureTools(server, executor) {
|
|
16018
|
-
const serpExecutor = executor;
|
|
16019
16414
|
server.registerTool("capture_serp_snapshot", {
|
|
16415
|
+
title: "SERP Intelligence Snapshot",
|
|
16020
16416
|
description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized residential proxy evidence, configured for the static residential proxy, and none only for direct-network debugging. Set debug true when investigating location evidence, proxy behavior, CAPTCHA, or capture reliability.",
|
|
16021
|
-
inputSchema: CaptureSerpSnapshotInputSchema
|
|
16022
|
-
|
|
16417
|
+
inputSchema: CaptureSerpSnapshotInputSchema,
|
|
16418
|
+
annotations: liveWebToolAnnotations("SERP Intelligence Snapshot")
|
|
16419
|
+
}, async (input) => executor.captureSerpSnapshot(input));
|
|
16023
16420
|
server.registerTool("capture_serp_page_snapshots", {
|
|
16421
|
+
title: "SERP Intelligence Page Snapshots",
|
|
16024
16422
|
description: "Capture public ranking-page evidence through POST /serp-intelligence/page-snapshots, the same product page snapshot path used by Phoenix. Provide urls for simple captures or targets when preserving organic, AI citation, local-pack, configured target, or site-subject source metadata. Private IPs, localhost, file URLs, and internal URLs are rejected by the service. Use timeoutMs for slow pages and debug true for sanitized proxy/browser diagnostics.",
|
|
16025
|
-
inputSchema: CaptureSerpPageSnapshotsInputSchema
|
|
16026
|
-
|
|
16423
|
+
inputSchema: CaptureSerpPageSnapshotsInputSchema,
|
|
16424
|
+
annotations: liveWebToolAnnotations("SERP Intelligence Page Snapshots")
|
|
16425
|
+
}, async (input) => executor.captureSerpPageSnapshots(input));
|
|
16027
16426
|
}
|
|
16028
16427
|
var import_hono7, import_webStandardStreamableHttp, mcpApp;
|
|
16029
16428
|
var init_mcp_routes = __esm({
|
|
@@ -16033,8 +16432,10 @@ var init_mcp_routes = __esm({
|
|
|
16033
16432
|
import_webStandardStreamableHttp = require("@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js");
|
|
16034
16433
|
init_paa_mcp_server();
|
|
16035
16434
|
init_http_mcp_tool_executor();
|
|
16435
|
+
init_mcp_response_formatter();
|
|
16036
16436
|
init_db();
|
|
16037
16437
|
init_mcp_tool_schemas();
|
|
16438
|
+
configureReportSaving(false);
|
|
16038
16439
|
mcpApp = new import_hono7.Hono();
|
|
16039
16440
|
mcpApp.all("/", async (c) => {
|
|
16040
16441
|
try {
|
|
@@ -16047,7 +16448,7 @@ var init_mcp_routes = __esm({
|
|
|
16047
16448
|
sessionIdGenerator: void 0,
|
|
16048
16449
|
enableJsonResponse: true
|
|
16049
16450
|
});
|
|
16050
|
-
const server = buildPaaExtractorMcpServer(executor);
|
|
16451
|
+
const server = buildPaaExtractorMcpServer(executor, { savesReportsLocally: false });
|
|
16051
16452
|
registerSerpIntelligenceCaptureTools(server, executor);
|
|
16052
16453
|
await server.connect(transport);
|
|
16053
16454
|
return transport.handleRequest(c.req.raw);
|