mcp-scraper 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +388 -75
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +243 -11
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/bin/paa-harvest.cjs +14 -4
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +4 -3
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-3OIRNUF5.js → chunk-RE6HCRYC.js} +244 -12
- package/dist/chunk-RE6HCRYC.js.map +1 -0
- package/dist/{chunk-LUBDFS67.js → chunk-TM22BLWP.js} +15 -3
- package/dist/chunk-TM22BLWP.js.map +1 -0
- package/dist/index.cjs +12 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +1 -1
- package/dist/{server-YNJHP5PU.js → server-QXVVTKJP.js} +80 -30
- package/dist/server-QXVVTKJP.js.map +1 -0
- package/dist/{worker-PBG6LGET.js → worker-AUCXFHEL.js} +4 -3
- package/dist/worker-AUCXFHEL.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-3OIRNUF5.js.map +0 -1
- package/dist/chunk-LUBDFS67.js.map +0 -1
- package/dist/server-YNJHP5PU.js.map +0 -1
- package/dist/worker-PBG6LGET.js.map +0 -1
package/dist/bin/api-server.js
CHANGED
|
@@ -17,8 +17,8 @@ loadDotEnv();
|
|
|
17
17
|
async function main() {
|
|
18
18
|
const [{ serve }, { app }, { startWorker }, { migrate }] = await Promise.all([
|
|
19
19
|
import("@hono/node-server"),
|
|
20
|
-
import("../server-
|
|
21
|
-
import("../worker-
|
|
20
|
+
import("../server-QXVVTKJP.js"),
|
|
21
|
+
import("../worker-AUCXFHEL.js"),
|
|
22
22
|
import("../db-YWCNHBLH.js")
|
|
23
23
|
]);
|
|
24
24
|
const PORT = parseInt(process.env.PORT ?? "3001");
|
|
@@ -128,7 +128,7 @@ var HttpMcpToolExecutor = class {
|
|
|
128
128
|
var import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
129
129
|
|
|
130
130
|
// src/version.ts
|
|
131
|
-
var PACKAGE_VERSION = "0.1.
|
|
131
|
+
var PACKAGE_VERSION = "0.1.8";
|
|
132
132
|
|
|
133
133
|
// src/mcp/mcp-tool-schemas.ts
|
|
134
134
|
var import_zod = require("zod");
|
|
@@ -224,6 +224,120 @@ var MapsSearchOutputSchema = {
|
|
|
224
224
|
})),
|
|
225
225
|
durationMs: import_zod.z.number().int().min(0)
|
|
226
226
|
};
|
|
227
|
+
var OrganicResultOutput = import_zod.z.object({
|
|
228
|
+
position: import_zod.z.number().int(),
|
|
229
|
+
title: import_zod.z.string(),
|
|
230
|
+
url: import_zod.z.string(),
|
|
231
|
+
domain: import_zod.z.string(),
|
|
232
|
+
snippet: NullableString
|
|
233
|
+
});
|
|
234
|
+
var AiOverviewOutput = import_zod.z.object({
|
|
235
|
+
detected: import_zod.z.boolean(),
|
|
236
|
+
text: NullableString
|
|
237
|
+
}).nullable();
|
|
238
|
+
var EntityIdsOutput = import_zod.z.object({
|
|
239
|
+
kgIds: import_zod.z.array(import_zod.z.string()),
|
|
240
|
+
cids: import_zod.z.array(import_zod.z.string()),
|
|
241
|
+
gcids: import_zod.z.array(import_zod.z.string())
|
|
242
|
+
}).nullable();
|
|
243
|
+
var HarvestPaaOutputSchema = {
|
|
244
|
+
query: import_zod.z.string(),
|
|
245
|
+
location: NullableString,
|
|
246
|
+
questionCount: import_zod.z.number().int().min(0),
|
|
247
|
+
completionStatus: NullableString,
|
|
248
|
+
questions: import_zod.z.array(import_zod.z.object({
|
|
249
|
+
question: import_zod.z.string(),
|
|
250
|
+
answer: NullableString,
|
|
251
|
+
sourceTitle: NullableString,
|
|
252
|
+
sourceSite: NullableString
|
|
253
|
+
})),
|
|
254
|
+
organicResults: import_zod.z.array(OrganicResultOutput),
|
|
255
|
+
aiOverview: AiOverviewOutput,
|
|
256
|
+
entityIds: EntityIdsOutput,
|
|
257
|
+
durationMs: import_zod.z.number().min(0).nullable()
|
|
258
|
+
};
|
|
259
|
+
var SearchSerpOutputSchema = {
|
|
260
|
+
query: import_zod.z.string(),
|
|
261
|
+
location: NullableString,
|
|
262
|
+
organicResults: import_zod.z.array(OrganicResultOutput),
|
|
263
|
+
localPack: import_zod.z.array(import_zod.z.object({
|
|
264
|
+
position: import_zod.z.number().int(),
|
|
265
|
+
name: import_zod.z.string(),
|
|
266
|
+
rating: NullableString,
|
|
267
|
+
reviewCount: NullableString,
|
|
268
|
+
websiteUrl: NullableString
|
|
269
|
+
})),
|
|
270
|
+
aiOverview: AiOverviewOutput,
|
|
271
|
+
entityIds: EntityIdsOutput
|
|
272
|
+
};
|
|
273
|
+
var ExtractUrlOutputSchema = {
|
|
274
|
+
url: import_zod.z.string(),
|
|
275
|
+
title: NullableString,
|
|
276
|
+
headings: import_zod.z.array(import_zod.z.object({
|
|
277
|
+
level: import_zod.z.number().int(),
|
|
278
|
+
text: import_zod.z.string()
|
|
279
|
+
})),
|
|
280
|
+
schemaBlockCount: import_zod.z.number().int().min(0),
|
|
281
|
+
entityName: NullableString,
|
|
282
|
+
entityTypes: import_zod.z.array(import_zod.z.string()),
|
|
283
|
+
napScore: import_zod.z.number().nullable(),
|
|
284
|
+
missingSchemaFields: import_zod.z.array(import_zod.z.string()),
|
|
285
|
+
screenshotSaved: NullableString
|
|
286
|
+
};
|
|
287
|
+
var ExtractSiteOutputSchema = {
|
|
288
|
+
url: import_zod.z.string(),
|
|
289
|
+
pageCount: import_zod.z.number().int().min(0),
|
|
290
|
+
pages: import_zod.z.array(import_zod.z.object({
|
|
291
|
+
url: import_zod.z.string(),
|
|
292
|
+
title: NullableString,
|
|
293
|
+
schemaTypes: import_zod.z.array(import_zod.z.string())
|
|
294
|
+
})),
|
|
295
|
+
durationMs: import_zod.z.number().min(0)
|
|
296
|
+
};
|
|
297
|
+
var MapsPlaceIntelOutputSchema = {
|
|
298
|
+
name: import_zod.z.string(),
|
|
299
|
+
rating: NullableString,
|
|
300
|
+
reviewCount: NullableString,
|
|
301
|
+
category: NullableString,
|
|
302
|
+
address: NullableString,
|
|
303
|
+
phone: NullableString,
|
|
304
|
+
website: NullableString,
|
|
305
|
+
hoursSummary: NullableString,
|
|
306
|
+
bookingUrl: NullableString,
|
|
307
|
+
kgmid: NullableString,
|
|
308
|
+
cidDecimal: NullableString,
|
|
309
|
+
cidUrl: NullableString,
|
|
310
|
+
lat: import_zod.z.number().nullable(),
|
|
311
|
+
lng: import_zod.z.number().nullable(),
|
|
312
|
+
reviewsStatus: import_zod.z.string(),
|
|
313
|
+
reviewsCollected: import_zod.z.number().int().min(0),
|
|
314
|
+
reviewTopics: import_zod.z.array(import_zod.z.object({
|
|
315
|
+
label: import_zod.z.string(),
|
|
316
|
+
count: import_zod.z.string()
|
|
317
|
+
}))
|
|
318
|
+
};
|
|
319
|
+
var CreditsInfoOutputSchema = {
|
|
320
|
+
balanceCredits: import_zod.z.number().nullable(),
|
|
321
|
+
matchedCost: import_zod.z.object({
|
|
322
|
+
label: import_zod.z.string(),
|
|
323
|
+
credits: import_zod.z.number(),
|
|
324
|
+
unit: import_zod.z.string(),
|
|
325
|
+
notes: NullableString
|
|
326
|
+
}).nullable(),
|
|
327
|
+
costs: import_zod.z.array(import_zod.z.object({
|
|
328
|
+
key: import_zod.z.string(),
|
|
329
|
+
label: import_zod.z.string(),
|
|
330
|
+
credits: import_zod.z.number(),
|
|
331
|
+
unit: import_zod.z.string(),
|
|
332
|
+
notes: NullableString
|
|
333
|
+
})),
|
|
334
|
+
ledger: import_zod.z.array(import_zod.z.object({
|
|
335
|
+
createdAt: import_zod.z.string(),
|
|
336
|
+
operation: import_zod.z.string(),
|
|
337
|
+
credits: import_zod.z.number(),
|
|
338
|
+
description: NullableString
|
|
339
|
+
}))
|
|
340
|
+
};
|
|
227
341
|
var MapSiteUrlsOutputSchema = {
|
|
228
342
|
startUrl: import_zod.z.string(),
|
|
229
343
|
totalFound: import_zod.z.number().int().min(0),
|
|
@@ -434,7 +548,7 @@ function debugSection(debug) {
|
|
|
434
548
|
if (!debug || typeof debug !== "object") return "";
|
|
435
549
|
const request = debug.request ?? {};
|
|
436
550
|
const browser = debug.browser ?? {};
|
|
437
|
-
const kernel = browser.kernel ?? {};
|
|
551
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
438
552
|
const network = browser.networkLocation ?? {};
|
|
439
553
|
const nav = browser.serpNavigation ?? {};
|
|
440
554
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
@@ -460,12 +574,14 @@ function errorAttemptsSection(body) {
|
|
|
460
574
|
const lines = attempts.slice(0, 5).map((attempt) => {
|
|
461
575
|
const debug = attempt.debug ?? {};
|
|
462
576
|
const browser = debug.browser ?? {};
|
|
463
|
-
const kernel = browser.kernel ?? {};
|
|
577
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
464
578
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
465
579
|
const network = browser.networkLocation ?? {};
|
|
466
580
|
const nav = browser.serpNavigation ?? {};
|
|
467
581
|
const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
|
|
468
|
-
|
|
582
|
+
const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
|
|
583
|
+
const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
|
|
584
|
+
return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
|
|
469
585
|
});
|
|
470
586
|
return `
|
|
471
587
|
|
|
@@ -512,7 +628,31 @@ ${serpRows}` : "";
|
|
|
512
628
|
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
513
629
|
|
|
514
630
|
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
515
|
-
return
|
|
631
|
+
return {
|
|
632
|
+
...oneBlock(full),
|
|
633
|
+
structuredContent: {
|
|
634
|
+
query: input.query,
|
|
635
|
+
location: input.location ?? null,
|
|
636
|
+
questionCount: flat.length,
|
|
637
|
+
completionStatus: diagnostics?.completionStatus ?? null,
|
|
638
|
+
questions: flat.map((r) => ({
|
|
639
|
+
question: String(r.question ?? ""),
|
|
640
|
+
answer: r.answer ?? null,
|
|
641
|
+
sourceTitle: r.source_title ?? null,
|
|
642
|
+
sourceSite: r.source_site ?? null
|
|
643
|
+
})),
|
|
644
|
+
organicResults: organic.map((r) => ({
|
|
645
|
+
position: Number(r.position) || 0,
|
|
646
|
+
title: String(r.title ?? ""),
|
|
647
|
+
url: String(r.url ?? ""),
|
|
648
|
+
domain: String(r.domain ?? ""),
|
|
649
|
+
snippet: r.snippet ?? null
|
|
650
|
+
})),
|
|
651
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
652
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
|
|
653
|
+
durationMs: durationMs ?? null
|
|
654
|
+
}
|
|
655
|
+
};
|
|
516
656
|
}
|
|
517
657
|
function formatSearchSerp(raw, input) {
|
|
518
658
|
const parsed = parseData(raw);
|
|
@@ -550,7 +690,29 @@ ${localRows}` : "";
|
|
|
550
690
|
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
551
691
|
|
|
552
692
|
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
553
|
-
return
|
|
693
|
+
return {
|
|
694
|
+
...oneBlock(full),
|
|
695
|
+
structuredContent: {
|
|
696
|
+
query: input.query,
|
|
697
|
+
location: input.location ?? null,
|
|
698
|
+
organicResults: organic.map((r) => ({
|
|
699
|
+
position: Number(r.position) || 0,
|
|
700
|
+
title: String(r.title ?? ""),
|
|
701
|
+
url: String(r.url ?? ""),
|
|
702
|
+
domain: String(r.domain ?? ""),
|
|
703
|
+
snippet: r.snippet ?? null
|
|
704
|
+
})),
|
|
705
|
+
localPack: localPack.map((b) => ({
|
|
706
|
+
position: Number(b.position) || 0,
|
|
707
|
+
name: String(b.name ?? ""),
|
|
708
|
+
rating: b.rating ?? null,
|
|
709
|
+
reviewCount: b.reviewCount ?? null,
|
|
710
|
+
websiteUrl: b.websiteUrl ?? null
|
|
711
|
+
})),
|
|
712
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
713
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
|
|
714
|
+
}
|
|
715
|
+
};
|
|
554
716
|
}
|
|
555
717
|
function formatExtractUrl(raw, input) {
|
|
556
718
|
const parsed = parseData(raw);
|
|
@@ -619,15 +781,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
619
781
|
**${title}**
|
|
620
782
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
621
783
|
const textResult = oneBlock(full);
|
|
784
|
+
const structuredContent = {
|
|
785
|
+
url,
|
|
786
|
+
title: d.title ?? null,
|
|
787
|
+
headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
|
|
788
|
+
schemaBlockCount: schemaCount,
|
|
789
|
+
entityName: kpo?.entityName ?? null,
|
|
790
|
+
entityTypes: kpo?.type ?? [],
|
|
791
|
+
napScore: kpo?.napScore ?? null,
|
|
792
|
+
missingSchemaFields: kpo?.missingFields ?? [],
|
|
793
|
+
screenshotSaved: screenshotPath ?? null
|
|
794
|
+
};
|
|
622
795
|
if (screenshotMeta?.base64) {
|
|
623
796
|
return {
|
|
624
797
|
content: [
|
|
625
798
|
...textResult.content,
|
|
626
799
|
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
627
|
-
]
|
|
800
|
+
],
|
|
801
|
+
structuredContent
|
|
628
802
|
};
|
|
629
803
|
}
|
|
630
|
-
return textResult;
|
|
804
|
+
return { ...textResult, structuredContent };
|
|
631
805
|
}
|
|
632
806
|
function formatMapSiteUrls(raw, input) {
|
|
633
807
|
const parsed = parseData(raw);
|
|
@@ -697,7 +871,19 @@ ${pageRows}`,
|
|
|
697
871
|
- Map URLs first: use \`map_site_urls\`
|
|
698
872
|
- Inspect a single page: use \`extract_url\``
|
|
699
873
|
].join("\n");
|
|
700
|
-
return
|
|
874
|
+
return {
|
|
875
|
+
...oneBlock(full),
|
|
876
|
+
structuredContent: {
|
|
877
|
+
url: input.url,
|
|
878
|
+
pageCount: pages.length,
|
|
879
|
+
pages: pages.map((p) => ({
|
|
880
|
+
url: String(p.url ?? ""),
|
|
881
|
+
title: p.title ?? null,
|
|
882
|
+
schemaTypes: p.kpo?.type ?? []
|
|
883
|
+
})),
|
|
884
|
+
durationMs: d.durationMs ?? 0
|
|
885
|
+
}
|
|
886
|
+
};
|
|
701
887
|
}
|
|
702
888
|
function formatYoutubeHarvest(raw, input) {
|
|
703
889
|
const parsed = parseData(raw);
|
|
@@ -894,7 +1080,26 @@ ${costRows}` : "",
|
|
|
894
1080
|
|------|-----------|---------|-------------|
|
|
895
1081
|
${ledgerRows}` : ""
|
|
896
1082
|
].filter(Boolean).join("\n");
|
|
897
|
-
return
|
|
1083
|
+
return {
|
|
1084
|
+
...oneBlock(full),
|
|
1085
|
+
structuredContent: {
|
|
1086
|
+
balanceCredits: typeof balance === "number" ? balance : null,
|
|
1087
|
+
matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
|
|
1088
|
+
costs: costs.map((c) => ({
|
|
1089
|
+
key: c.key,
|
|
1090
|
+
label: c.label,
|
|
1091
|
+
credits: c.credits,
|
|
1092
|
+
unit: c.unit,
|
|
1093
|
+
notes: c.notes ?? null
|
|
1094
|
+
})),
|
|
1095
|
+
ledger: ledger.map((row) => ({
|
|
1096
|
+
createdAt: String(row.created_at ?? ""),
|
|
1097
|
+
operation: String(row.operation ?? ""),
|
|
1098
|
+
credits: row.amount_mc / 1e3,
|
|
1099
|
+
description: row.description ?? null
|
|
1100
|
+
}))
|
|
1101
|
+
}
|
|
1102
|
+
};
|
|
898
1103
|
}
|
|
899
1104
|
function formatMapsSearch(raw, input) {
|
|
900
1105
|
const parsed = parseData(raw);
|
|
@@ -1043,7 +1248,28 @@ ${entitySection}` : null,
|
|
|
1043
1248
|
---
|
|
1044
1249
|
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
1045
1250
|
].filter(Boolean).join("\n");
|
|
1046
|
-
return
|
|
1251
|
+
return {
|
|
1252
|
+
...oneBlock(full),
|
|
1253
|
+
structuredContent: {
|
|
1254
|
+
name,
|
|
1255
|
+
rating: rating ?? null,
|
|
1256
|
+
reviewCount: reviewCount ?? null,
|
|
1257
|
+
category: category ?? null,
|
|
1258
|
+
address: address ?? null,
|
|
1259
|
+
phone: phone ?? null,
|
|
1260
|
+
website: website ?? null,
|
|
1261
|
+
hoursSummary: hoursSummary ?? null,
|
|
1262
|
+
bookingUrl: bookingUrl ?? null,
|
|
1263
|
+
kgmid: kgmid ?? null,
|
|
1264
|
+
cidDecimal: cidDecimal ?? null,
|
|
1265
|
+
cidUrl: cidUrl ?? null,
|
|
1266
|
+
lat: lat ?? null,
|
|
1267
|
+
lng: lng ?? null,
|
|
1268
|
+
reviewsStatus,
|
|
1269
|
+
reviewsCollected: reviews.length,
|
|
1270
|
+
reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
|
|
1271
|
+
}
|
|
1272
|
+
};
|
|
1047
1273
|
}
|
|
1048
1274
|
function formatFacebookAdTranscribe(raw, input) {
|
|
1049
1275
|
const parsed = parseData(raw);
|
|
@@ -1095,18 +1321,21 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1095
1321
|
title: "Google PAA + SERP Harvest",
|
|
1096
1322
|
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1097
1323
|
inputSchema: HarvestPaaInputSchema,
|
|
1324
|
+
outputSchema: HarvestPaaOutputSchema,
|
|
1098
1325
|
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
1099
1326
|
}, async (input) => formatHarvestPaa(await executor2.harvestPaa(input), input));
|
|
1100
1327
|
server2.registerTool("search_serp", {
|
|
1101
1328
|
title: "Google SERP Lookup",
|
|
1102
1329
|
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
1103
1330
|
inputSchema: SearchSerpInputSchema,
|
|
1331
|
+
outputSchema: SearchSerpOutputSchema,
|
|
1104
1332
|
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
1105
1333
|
}, async (input) => formatSearchSerp(await executor2.searchSerp(input), input));
|
|
1106
1334
|
server2.registerTool("extract_url", {
|
|
1107
1335
|
title: "Single URL Extract",
|
|
1108
1336
|
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
1109
1337
|
inputSchema: ExtractUrlInputSchema,
|
|
1338
|
+
outputSchema: ExtractUrlOutputSchema,
|
|
1110
1339
|
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
1111
1340
|
}, async (input) => formatExtractUrl(await executor2.extractUrl(input), input));
|
|
1112
1341
|
server2.registerTool("map_site_urls", {
|
|
@@ -1120,6 +1349,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1120
1349
|
title: "Multi-Page Site Extract",
|
|
1121
1350
|
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
1122
1351
|
inputSchema: ExtractSiteInputSchema,
|
|
1352
|
+
outputSchema: ExtractSiteOutputSchema,
|
|
1123
1353
|
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
1124
1354
|
}, async (input) => formatExtractSite(await executor2.extractSite(input), input));
|
|
1125
1355
|
server2.registerTool("youtube_harvest", {
|
|
@@ -1159,6 +1389,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1159
1389
|
title: "Google Maps Business Profile Details",
|
|
1160
1390
|
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
1161
1391
|
inputSchema: MapsPlaceIntelInputSchema,
|
|
1392
|
+
outputSchema: MapsPlaceIntelOutputSchema,
|
|
1162
1393
|
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
1163
1394
|
}, async (input) => formatMapsPlaceIntel(await executor2.mapsPlaceIntel(input), input));
|
|
1164
1395
|
server2.registerTool("maps_search", {
|
|
@@ -1172,6 +1403,7 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1172
1403
|
title: "MCP Scraper Credits & Costs",
|
|
1173
1404
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
1174
1405
|
inputSchema: CreditsInfoInputSchema,
|
|
1406
|
+
outputSchema: CreditsInfoOutputSchema,
|
|
1175
1407
|
annotations: {
|
|
1176
1408
|
title: "MCP Scraper Credits & Costs",
|
|
1177
1409
|
readOnlyHint: true,
|