mcp-scraper 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/bin/api-server.cjs +957 -243
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +540 -158
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +2 -1
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +36 -5
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +5 -3
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-6TWZS2FQ.js → chunk-RE6HCRYC.js} +543 -159
- package/dist/chunk-RE6HCRYC.js.map +1 -0
- package/dist/{chunk-W4P2U5VF.js → chunk-TM22BLWP.js} +46 -34
- package/dist/chunk-TM22BLWP.js.map +1 -0
- package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
- package/dist/chunk-ZK456YXN.js.map +1 -0
- package/dist/chunk-ZMOWIBMK.js +36 -0
- package/dist/chunk-ZMOWIBMK.js.map +1 -0
- package/dist/index.cjs +34 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/{server-2Y27U4TO.js → server-QXVVTKJP.js} +311 -48
- package/dist/server-QXVVTKJP.js.map +1 -0
- package/dist/{worker-UT4ZQU2T.js → worker-AUCXFHEL.js} +6 -4
- package/dist/worker-AUCXFHEL.js.map +1 -0
- package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
- package/docs/adr/README.md +11 -0
- package/docs/mcp-tool-quality-spec.md +238 -0
- package/package.json +5 -4
- package/dist/chunk-6TWZS2FQ.js.map +0 -1
- package/dist/chunk-7HB7NDOY.js.map +0 -1
- package/dist/chunk-W4P2U5VF.js.map +0 -1
- package/dist/server-2Y27U4TO.js.map +0 -1
- package/dist/worker-UT4ZQU2T.js.map +0 -1
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import {
|
|
2
|
+
sanitizeVendorName
|
|
3
|
+
} from "./chunk-ZMOWIBMK.js";
|
|
4
|
+
|
|
1
5
|
// src/harvest-timeout.ts
|
|
2
6
|
var VERCEL_FUNCTION_MAX_MS = 3e5;
|
|
3
7
|
var CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
|
|
@@ -15,6 +19,9 @@ function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
|
|
|
15
19
|
// src/mcp/paa-mcp-server.ts
|
|
16
20
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
17
21
|
|
|
22
|
+
// src/version.ts
|
|
23
|
+
var PACKAGE_VERSION = "0.1.8";
|
|
24
|
+
|
|
18
25
|
// src/mcp/mcp-tool-schemas.ts
|
|
19
26
|
import { z } from "zod";
|
|
20
27
|
var HarvestPaaInputSchema = {
|
|
@@ -77,6 +84,207 @@ var MapsPlaceIntelInputSchema = {
|
|
|
77
84
|
includeReviews: z.boolean().default(false).describe("Whether to fetch individual review cards"),
|
|
78
85
|
maxReviews: z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
|
|
79
86
|
};
|
|
87
|
+
var MapsSearchInputSchema = {
|
|
88
|
+
query: z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
|
|
89
|
+
location: z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
90
|
+
gl: z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
91
|
+
hl: z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
92
|
+
maxResults: z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
93
|
+
};
|
|
94
|
+
var NullableString = z.string().nullable();
|
|
95
|
+
var MapsSearchOutputSchema = {
|
|
96
|
+
query: z.string(),
|
|
97
|
+
location: z.string().nullable(),
|
|
98
|
+
searchQuery: z.string(),
|
|
99
|
+
searchUrl: z.string().url(),
|
|
100
|
+
extractedAt: z.string(),
|
|
101
|
+
requestedMaxResults: z.number().int().min(1).max(50),
|
|
102
|
+
resultCount: z.number().int().min(0).max(50),
|
|
103
|
+
results: z.array(z.object({
|
|
104
|
+
position: z.number().int().min(1),
|
|
105
|
+
name: z.string(),
|
|
106
|
+
placeUrl: z.string().url(),
|
|
107
|
+
cid: NullableString,
|
|
108
|
+
cidDecimal: NullableString,
|
|
109
|
+
rating: NullableString,
|
|
110
|
+
reviewCount: NullableString,
|
|
111
|
+
category: NullableString,
|
|
112
|
+
address: NullableString,
|
|
113
|
+
websiteUrl: NullableString,
|
|
114
|
+
directionsUrl: NullableString,
|
|
115
|
+
metadata: z.array(z.string())
|
|
116
|
+
})),
|
|
117
|
+
durationMs: z.number().int().min(0)
|
|
118
|
+
};
|
|
119
|
+
var OrganicResultOutput = z.object({
|
|
120
|
+
position: z.number().int(),
|
|
121
|
+
title: z.string(),
|
|
122
|
+
url: z.string(),
|
|
123
|
+
domain: z.string(),
|
|
124
|
+
snippet: NullableString
|
|
125
|
+
});
|
|
126
|
+
var AiOverviewOutput = z.object({
|
|
127
|
+
detected: z.boolean(),
|
|
128
|
+
text: NullableString
|
|
129
|
+
}).nullable();
|
|
130
|
+
var EntityIdsOutput = z.object({
|
|
131
|
+
kgIds: z.array(z.string()),
|
|
132
|
+
cids: z.array(z.string()),
|
|
133
|
+
gcids: z.array(z.string())
|
|
134
|
+
}).nullable();
|
|
135
|
+
var HarvestPaaOutputSchema = {
|
|
136
|
+
query: z.string(),
|
|
137
|
+
location: NullableString,
|
|
138
|
+
questionCount: z.number().int().min(0),
|
|
139
|
+
completionStatus: NullableString,
|
|
140
|
+
questions: z.array(z.object({
|
|
141
|
+
question: z.string(),
|
|
142
|
+
answer: NullableString,
|
|
143
|
+
sourceTitle: NullableString,
|
|
144
|
+
sourceSite: NullableString
|
|
145
|
+
})),
|
|
146
|
+
organicResults: z.array(OrganicResultOutput),
|
|
147
|
+
aiOverview: AiOverviewOutput,
|
|
148
|
+
entityIds: EntityIdsOutput,
|
|
149
|
+
durationMs: z.number().min(0).nullable()
|
|
150
|
+
};
|
|
151
|
+
var SearchSerpOutputSchema = {
|
|
152
|
+
query: z.string(),
|
|
153
|
+
location: NullableString,
|
|
154
|
+
organicResults: z.array(OrganicResultOutput),
|
|
155
|
+
localPack: z.array(z.object({
|
|
156
|
+
position: z.number().int(),
|
|
157
|
+
name: z.string(),
|
|
158
|
+
rating: NullableString,
|
|
159
|
+
reviewCount: NullableString,
|
|
160
|
+
websiteUrl: NullableString
|
|
161
|
+
})),
|
|
162
|
+
aiOverview: AiOverviewOutput,
|
|
163
|
+
entityIds: EntityIdsOutput
|
|
164
|
+
};
|
|
165
|
+
var ExtractUrlOutputSchema = {
|
|
166
|
+
url: z.string(),
|
|
167
|
+
title: NullableString,
|
|
168
|
+
headings: z.array(z.object({
|
|
169
|
+
level: z.number().int(),
|
|
170
|
+
text: z.string()
|
|
171
|
+
})),
|
|
172
|
+
schemaBlockCount: z.number().int().min(0),
|
|
173
|
+
entityName: NullableString,
|
|
174
|
+
entityTypes: z.array(z.string()),
|
|
175
|
+
napScore: z.number().nullable(),
|
|
176
|
+
missingSchemaFields: z.array(z.string()),
|
|
177
|
+
screenshotSaved: NullableString
|
|
178
|
+
};
|
|
179
|
+
var ExtractSiteOutputSchema = {
|
|
180
|
+
url: z.string(),
|
|
181
|
+
pageCount: z.number().int().min(0),
|
|
182
|
+
pages: z.array(z.object({
|
|
183
|
+
url: z.string(),
|
|
184
|
+
title: NullableString,
|
|
185
|
+
schemaTypes: z.array(z.string())
|
|
186
|
+
})),
|
|
187
|
+
durationMs: z.number().min(0)
|
|
188
|
+
};
|
|
189
|
+
var MapsPlaceIntelOutputSchema = {
|
|
190
|
+
name: z.string(),
|
|
191
|
+
rating: NullableString,
|
|
192
|
+
reviewCount: NullableString,
|
|
193
|
+
category: NullableString,
|
|
194
|
+
address: NullableString,
|
|
195
|
+
phone: NullableString,
|
|
196
|
+
website: NullableString,
|
|
197
|
+
hoursSummary: NullableString,
|
|
198
|
+
bookingUrl: NullableString,
|
|
199
|
+
kgmid: NullableString,
|
|
200
|
+
cidDecimal: NullableString,
|
|
201
|
+
cidUrl: NullableString,
|
|
202
|
+
lat: z.number().nullable(),
|
|
203
|
+
lng: z.number().nullable(),
|
|
204
|
+
reviewsStatus: z.string(),
|
|
205
|
+
reviewsCollected: z.number().int().min(0),
|
|
206
|
+
reviewTopics: z.array(z.object({
|
|
207
|
+
label: z.string(),
|
|
208
|
+
count: z.string()
|
|
209
|
+
}))
|
|
210
|
+
};
|
|
211
|
+
var CreditsInfoOutputSchema = {
|
|
212
|
+
balanceCredits: z.number().nullable(),
|
|
213
|
+
matchedCost: z.object({
|
|
214
|
+
label: z.string(),
|
|
215
|
+
credits: z.number(),
|
|
216
|
+
unit: z.string(),
|
|
217
|
+
notes: NullableString
|
|
218
|
+
}).nullable(),
|
|
219
|
+
costs: z.array(z.object({
|
|
220
|
+
key: z.string(),
|
|
221
|
+
label: z.string(),
|
|
222
|
+
credits: z.number(),
|
|
223
|
+
unit: z.string(),
|
|
224
|
+
notes: NullableString
|
|
225
|
+
})),
|
|
226
|
+
ledger: z.array(z.object({
|
|
227
|
+
createdAt: z.string(),
|
|
228
|
+
operation: z.string(),
|
|
229
|
+
credits: z.number(),
|
|
230
|
+
description: NullableString
|
|
231
|
+
}))
|
|
232
|
+
};
|
|
233
|
+
var MapSiteUrlsOutputSchema = {
|
|
234
|
+
startUrl: z.string(),
|
|
235
|
+
totalFound: z.number().int().min(0),
|
|
236
|
+
truncated: z.boolean(),
|
|
237
|
+
okCount: z.number().int().min(0),
|
|
238
|
+
redirectCount: z.number().int().min(0),
|
|
239
|
+
brokenCount: z.number().int().min(0),
|
|
240
|
+
urls: z.array(z.object({
|
|
241
|
+
url: z.string(),
|
|
242
|
+
status: z.number().int().nullable()
|
|
243
|
+
})),
|
|
244
|
+
durationMs: z.number().min(0)
|
|
245
|
+
};
|
|
246
|
+
var YoutubeHarvestOutputSchema = {
|
|
247
|
+
mode: z.string(),
|
|
248
|
+
videoCount: z.number().int().min(0),
|
|
249
|
+
channel: z.object({
|
|
250
|
+
title: NullableString,
|
|
251
|
+
subscriberCount: NullableString
|
|
252
|
+
}).nullable(),
|
|
253
|
+
videos: z.array(z.object({
|
|
254
|
+
videoId: z.string(),
|
|
255
|
+
title: z.string(),
|
|
256
|
+
channelName: NullableString,
|
|
257
|
+
views: NullableString,
|
|
258
|
+
duration: NullableString,
|
|
259
|
+
url: NullableString
|
|
260
|
+
}))
|
|
261
|
+
};
|
|
262
|
+
var FacebookAdSearchOutputSchema = {
|
|
263
|
+
query: z.string(),
|
|
264
|
+
advertiserCount: z.number().int().min(0),
|
|
265
|
+
advertisers: z.array(z.object({
|
|
266
|
+
name: NullableString,
|
|
267
|
+
adCount: z.number().int().nullable(),
|
|
268
|
+
libraryId: NullableString
|
|
269
|
+
}))
|
|
270
|
+
};
|
|
271
|
+
var FacebookPageIntelOutputSchema = {
|
|
272
|
+
advertiserName: NullableString,
|
|
273
|
+
totalAds: z.number().int().min(0),
|
|
274
|
+
activeCount: z.number().int().min(0),
|
|
275
|
+
videoCount: z.number().int().min(0),
|
|
276
|
+
imageCount: z.number().int().min(0),
|
|
277
|
+
ads: z.array(z.object({
|
|
278
|
+
libraryId: NullableString,
|
|
279
|
+
status: NullableString,
|
|
280
|
+
creativeType: NullableString,
|
|
281
|
+
headline: NullableString,
|
|
282
|
+
cta: NullableString,
|
|
283
|
+
startDate: NullableString,
|
|
284
|
+
videoUrl: NullableString,
|
|
285
|
+
variations: z.number().int().nullable()
|
|
286
|
+
}))
|
|
287
|
+
};
|
|
80
288
|
var CreditsInfoInputSchema = {
|
|
81
289
|
item: z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
|
|
82
290
|
includeLedger: z.boolean().default(false).describe("Whether to include recent credit ledger entries")
|
|
@@ -126,6 +334,15 @@ var CaptureSerpPageSnapshotsInputSchema = {
|
|
|
126
334
|
import { mkdirSync, writeFileSync } from "fs";
|
|
127
335
|
import { homedir } from "os";
|
|
128
336
|
import { join } from "path";
|
|
337
|
+
var reportSavingEnabled = true;
|
|
338
|
+
function configureReportSaving(enabled) {
|
|
339
|
+
reportSavingEnabled = enabled;
|
|
340
|
+
}
|
|
341
|
+
function sanitizeVendorText(text) {
|
|
342
|
+
return sanitizeVendorName(
|
|
343
|
+
text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
|
|
344
|
+
);
|
|
345
|
+
}
|
|
129
346
|
function slugifyReportName(input) {
|
|
130
347
|
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
|
|
131
348
|
}
|
|
@@ -137,7 +354,7 @@ function outputBaseDir() {
|
|
|
137
354
|
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || join(homedir(), "Downloads", "mcp-scraper");
|
|
138
355
|
}
|
|
139
356
|
function saveFullReport(full) {
|
|
140
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
357
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
141
358
|
const outDir = outputBaseDir();
|
|
142
359
|
try {
|
|
143
360
|
mkdirSync(outDir, { recursive: true });
|
|
@@ -150,7 +367,7 @@ function saveFullReport(full) {
|
|
|
150
367
|
}
|
|
151
368
|
}
|
|
152
369
|
function persistScreenshotLocally(base64, url) {
|
|
153
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
370
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
154
371
|
try {
|
|
155
372
|
const dir = join(outputBaseDir(), "screenshots");
|
|
156
373
|
mkdirSync(dir, { recursive: true });
|
|
@@ -190,11 +407,11 @@ function parseData(raw) {
|
|
|
190
407
|
const text = first?.type === "text" ? first.text : "";
|
|
191
408
|
try {
|
|
192
409
|
const parsed = JSON.parse(text || "{}");
|
|
193
|
-
if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
|
|
410
|
+
if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
|
|
194
411
|
const data = parsed.result ?? parsed;
|
|
195
412
|
return { data };
|
|
196
413
|
} catch {
|
|
197
|
-
if (raw.isError) return { error: text || "Tool error" };
|
|
414
|
+
if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
|
|
198
415
|
return { error: "Failed to parse tool response" };
|
|
199
416
|
}
|
|
200
417
|
}
|
|
@@ -208,15 +425,6 @@ function entityIdsSection(ids) {
|
|
|
208
425
|
## Entity IDs
|
|
209
426
|
${lines.join("\n")}` : "";
|
|
210
427
|
}
|
|
211
|
-
function entityIdsSummaryLine(ids) {
|
|
212
|
-
if (!ids) return "";
|
|
213
|
-
const parts = [];
|
|
214
|
-
if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
|
|
215
|
-
if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
|
|
216
|
-
if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
|
|
217
|
-
return parts.length ? `
|
|
218
|
-
**Entity IDs:** ${parts.join(" \xB7 ")}` : "";
|
|
219
|
-
}
|
|
220
428
|
function truncate(s, max) {
|
|
221
429
|
if (!s) return "";
|
|
222
430
|
return s.length > max ? s.slice(0, max) + "\u2026" : s;
|
|
@@ -228,7 +436,7 @@ function debugSection(debug) {
|
|
|
228
436
|
if (!debug || typeof debug !== "object") return "";
|
|
229
437
|
const request = debug.request ?? {};
|
|
230
438
|
const browser = debug.browser ?? {};
|
|
231
|
-
const kernel = browser.kernel ?? {};
|
|
439
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
232
440
|
const network = browser.networkLocation ?? {};
|
|
233
441
|
const nav = browser.serpNavigation ?? {};
|
|
234
442
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
@@ -246,7 +454,7 @@ function debugSection(debug) {
|
|
|
246
454
|
if (locationEvidence) {
|
|
247
455
|
lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
|
|
248
456
|
}
|
|
249
|
-
return lines.join("\n");
|
|
457
|
+
return sanitizeVendorText(lines.join("\n"));
|
|
250
458
|
}
|
|
251
459
|
function errorAttemptsSection(body) {
|
|
252
460
|
const attempts = Array.isArray(body.attempts) ? body.attempts : [];
|
|
@@ -254,12 +462,14 @@ function errorAttemptsSection(body) {
|
|
|
254
462
|
const lines = attempts.slice(0, 5).map((attempt) => {
|
|
255
463
|
const debug = attempt.debug ?? {};
|
|
256
464
|
const browser = debug.browser ?? {};
|
|
257
|
-
const kernel = browser.kernel ?? {};
|
|
465
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
258
466
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
259
467
|
const network = browser.networkLocation ?? {};
|
|
260
468
|
const nav = browser.serpNavigation ?? {};
|
|
261
469
|
const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
|
|
262
|
-
|
|
470
|
+
const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
|
|
471
|
+
const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
|
|
472
|
+
return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
|
|
263
473
|
});
|
|
264
474
|
return `
|
|
265
475
|
|
|
@@ -300,27 +510,37 @@ ${serpRows}` : "";
|
|
|
300
510
|
const tips = `
|
|
301
511
|
---
|
|
302
512
|
\u{1F4A1} **Tips**
|
|
303
|
-
- Max questions: \`maxQuestions:
|
|
513
|
+
- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
|
|
304
514
|
- Organic results only: use \`search_serp\`
|
|
305
515
|
- Dig into a result: use \`extract_url\` on any organic URL`;
|
|
306
516
|
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
307
517
|
|
|
308
518
|
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
519
|
+
return {
|
|
520
|
+
...oneBlock(full),
|
|
521
|
+
structuredContent: {
|
|
522
|
+
query: input.query,
|
|
523
|
+
location: input.location ?? null,
|
|
524
|
+
questionCount: flat.length,
|
|
525
|
+
completionStatus: diagnostics?.completionStatus ?? null,
|
|
526
|
+
questions: flat.map((r) => ({
|
|
527
|
+
question: String(r.question ?? ""),
|
|
528
|
+
answer: r.answer ?? null,
|
|
529
|
+
sourceTitle: r.source_title ?? null,
|
|
530
|
+
sourceSite: r.source_site ?? null
|
|
531
|
+
})),
|
|
532
|
+
organicResults: organic.map((r) => ({
|
|
533
|
+
position: Number(r.position) || 0,
|
|
534
|
+
title: String(r.title ?? ""),
|
|
535
|
+
url: String(r.url ?? ""),
|
|
536
|
+
domain: String(r.domain ?? ""),
|
|
537
|
+
snippet: r.snippet ?? null
|
|
538
|
+
})),
|
|
539
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
540
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
|
|
541
|
+
durationMs: durationMs ?? null
|
|
542
|
+
}
|
|
543
|
+
};
|
|
324
544
|
}
|
|
325
545
|
function formatSearchSerp(raw, input) {
|
|
326
546
|
const parsed = parseData(raw);
|
|
@@ -358,19 +578,29 @@ ${localRows}` : "";
|
|
|
358
578
|
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
359
579
|
|
|
360
580
|
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
581
|
+
return {
|
|
582
|
+
...oneBlock(full),
|
|
583
|
+
structuredContent: {
|
|
584
|
+
query: input.query,
|
|
585
|
+
location: input.location ?? null,
|
|
586
|
+
organicResults: organic.map((r) => ({
|
|
587
|
+
position: Number(r.position) || 0,
|
|
588
|
+
title: String(r.title ?? ""),
|
|
589
|
+
url: String(r.url ?? ""),
|
|
590
|
+
domain: String(r.domain ?? ""),
|
|
591
|
+
snippet: r.snippet ?? null
|
|
592
|
+
})),
|
|
593
|
+
localPack: localPack.map((b) => ({
|
|
594
|
+
position: Number(b.position) || 0,
|
|
595
|
+
name: String(b.name ?? ""),
|
|
596
|
+
rating: b.rating ?? null,
|
|
597
|
+
reviewCount: b.reviewCount ?? null,
|
|
598
|
+
websiteUrl: b.websiteUrl ?? null
|
|
599
|
+
})),
|
|
600
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
601
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
|
|
602
|
+
}
|
|
603
|
+
};
|
|
374
604
|
}
|
|
375
605
|
function formatExtractUrl(raw, input) {
|
|
376
606
|
const parsed = parseData(raw);
|
|
@@ -439,15 +669,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
439
669
|
**${title}**
|
|
440
670
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
441
671
|
const textResult = oneBlock(full);
|
|
672
|
+
const structuredContent = {
|
|
673
|
+
url,
|
|
674
|
+
title: d.title ?? null,
|
|
675
|
+
headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
|
|
676
|
+
schemaBlockCount: schemaCount,
|
|
677
|
+
entityName: kpo?.entityName ?? null,
|
|
678
|
+
entityTypes: kpo?.type ?? [],
|
|
679
|
+
napScore: kpo?.napScore ?? null,
|
|
680
|
+
missingSchemaFields: kpo?.missingFields ?? [],
|
|
681
|
+
screenshotSaved: screenshotPath ?? null
|
|
682
|
+
};
|
|
442
683
|
if (screenshotMeta?.base64) {
|
|
443
684
|
return {
|
|
444
685
|
content: [
|
|
445
686
|
...textResult.content,
|
|
446
687
|
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
447
|
-
]
|
|
688
|
+
],
|
|
689
|
+
structuredContent
|
|
448
690
|
};
|
|
449
691
|
}
|
|
450
|
-
return textResult;
|
|
692
|
+
return { ...textResult, structuredContent };
|
|
451
693
|
}
|
|
452
694
|
function formatMapSiteUrls(raw, input) {
|
|
453
695
|
const parsed = parseData(raw);
|
|
@@ -480,15 +722,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
|
|
|
480
722
|
- Extract content from all pages: use \`extract_site\`
|
|
481
723
|
- Scrape a single page: use \`extract_url\``
|
|
482
724
|
].filter(Boolean).join("\n");
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
725
|
+
return {
|
|
726
|
+
...oneBlock(full),
|
|
727
|
+
structuredContent: {
|
|
728
|
+
startUrl: d.startUrl ?? input.url,
|
|
729
|
+
totalFound: d.totalFound ?? urls.length,
|
|
730
|
+
truncated: d.truncated === true,
|
|
731
|
+
okCount: ok.length,
|
|
732
|
+
redirectCount: redirects.length,
|
|
733
|
+
brokenCount: broken.length,
|
|
734
|
+
urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
|
|
735
|
+
durationMs: d.durationMs ?? 0
|
|
736
|
+
}
|
|
737
|
+
};
|
|
492
738
|
}
|
|
493
739
|
function formatExtractSite(raw, input) {
|
|
494
740
|
const parsed = parseData(raw);
|
|
@@ -513,14 +759,19 @@ ${pageRows}`,
|
|
|
513
759
|
- Map URLs first: use \`map_site_urls\`
|
|
514
760
|
- Inspect a single page: use \`extract_url\``
|
|
515
761
|
].join("\n");
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
762
|
+
return {
|
|
763
|
+
...oneBlock(full),
|
|
764
|
+
structuredContent: {
|
|
765
|
+
url: input.url,
|
|
766
|
+
pageCount: pages.length,
|
|
767
|
+
pages: pages.map((p) => ({
|
|
768
|
+
url: String(p.url ?? ""),
|
|
769
|
+
title: p.title ?? null,
|
|
770
|
+
schemaTypes: p.kpo?.type ?? []
|
|
771
|
+
})),
|
|
772
|
+
durationMs: d.durationMs ?? 0
|
|
773
|
+
}
|
|
774
|
+
};
|
|
524
775
|
}
|
|
525
776
|
function formatYoutubeHarvest(raw, input) {
|
|
526
777
|
const parsed = parseData(raw);
|
|
@@ -550,16 +801,22 @@ ${videoRows}`,
|
|
|
550
801
|
- Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
|
|
551
802
|
- Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
|
|
552
803
|
].filter(Boolean).join("\n");
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
804
|
+
return {
|
|
805
|
+
...oneBlock(full),
|
|
806
|
+
structuredContent: {
|
|
807
|
+
mode: input.mode,
|
|
808
|
+
videoCount: videos.length,
|
|
809
|
+
channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
|
|
810
|
+
videos: videos.map((v) => ({
|
|
811
|
+
videoId: String(v.videoId ?? ""),
|
|
812
|
+
title: String(v.title ?? ""),
|
|
813
|
+
channelName: v.channelName ?? null,
|
|
814
|
+
views: v.views ?? null,
|
|
815
|
+
duration: v.duration ?? null,
|
|
816
|
+
url: v.url ?? null
|
|
817
|
+
}))
|
|
818
|
+
}
|
|
819
|
+
};
|
|
563
820
|
}
|
|
564
821
|
function formatYoutubeTranscribe(raw, input) {
|
|
565
822
|
const parsed = parseData(raw);
|
|
@@ -589,14 +846,6 @@ ${chunkRows}` : "",
|
|
|
589
846
|
---
|
|
590
847
|
\u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
|
|
591
848
|
].filter(Boolean).join("\n");
|
|
592
|
-
const summary = [
|
|
593
|
-
`**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
594
|
-
`
|
|
595
|
-
**Preview:**
|
|
596
|
-
> ${truncate(text, 300)}`,
|
|
597
|
-
`
|
|
598
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
599
|
-
].join("\n");
|
|
600
849
|
return oneBlock(full);
|
|
601
850
|
}
|
|
602
851
|
function formatFacebookPageIntel(raw, input) {
|
|
@@ -625,19 +874,26 @@ ${adBlocks}`,
|
|
|
625
874
|
- Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
|
|
626
875
|
- Find other advertisers: use \`facebook_ad_search\``
|
|
627
876
|
].filter(Boolean).join("\n");
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
877
|
+
return {
|
|
878
|
+
...oneBlock(full),
|
|
879
|
+
structuredContent: {
|
|
880
|
+
advertiserName: d.advertiserName ?? null,
|
|
881
|
+
totalAds: s.totalAds ?? 0,
|
|
882
|
+
activeCount: s.activeCount ?? 0,
|
|
883
|
+
videoCount: s.videoCount ?? 0,
|
|
884
|
+
imageCount: s.imageCount ?? 0,
|
|
885
|
+
ads: ads.map((ad) => ({
|
|
886
|
+
libraryId: ad.libraryId ?? null,
|
|
887
|
+
status: ad.status ?? null,
|
|
888
|
+
creativeType: ad.creativeType ?? null,
|
|
889
|
+
headline: ad.headline ?? null,
|
|
890
|
+
cta: ad.cta ?? null,
|
|
891
|
+
startDate: ad.startDate ?? null,
|
|
892
|
+
videoUrl: ad.videoUrl ?? null,
|
|
893
|
+
variations: typeof ad.variations === "number" ? ad.variations : null
|
|
894
|
+
}))
|
|
895
|
+
}
|
|
896
|
+
};
|
|
641
897
|
}
|
|
642
898
|
function formatFacebookAdSearch(raw, input) {
|
|
643
899
|
const parsed = parseData(raw);
|
|
@@ -661,15 +917,18 @@ ${rows}`,
|
|
|
661
917
|
- Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
|
|
662
918
|
- Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
|
|
663
919
|
].join("\n");
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
920
|
+
return {
|
|
921
|
+
...oneBlock(full),
|
|
922
|
+
structuredContent: {
|
|
923
|
+
query: input.query,
|
|
924
|
+
advertiserCount: advertisers.length,
|
|
925
|
+
advertisers: advertisers.map((a) => ({
|
|
926
|
+
name: a.pageName ?? a.name ?? null,
|
|
927
|
+
adCount: typeof a.adCount === "number" ? a.adCount : null,
|
|
928
|
+
libraryId: a.sampleLibraryId ?? a.libraryId ?? null
|
|
929
|
+
}))
|
|
930
|
+
}
|
|
931
|
+
};
|
|
673
932
|
}
|
|
674
933
|
function formatCreditsInfo(raw, input) {
|
|
675
934
|
const parsed = parseData(raw);
|
|
@@ -709,14 +968,75 @@ ${costRows}` : "",
|
|
|
709
968
|
|------|-----------|---------|-------------|
|
|
710
969
|
${ledgerRows}` : ""
|
|
711
970
|
].filter(Boolean).join("\n");
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
971
|
+
return {
|
|
972
|
+
...oneBlock(full),
|
|
973
|
+
structuredContent: {
|
|
974
|
+
balanceCredits: typeof balance === "number" ? balance : null,
|
|
975
|
+
matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
|
|
976
|
+
costs: costs.map((c) => ({
|
|
977
|
+
key: c.key,
|
|
978
|
+
label: c.label,
|
|
979
|
+
credits: c.credits,
|
|
980
|
+
unit: c.unit,
|
|
981
|
+
notes: c.notes ?? null
|
|
982
|
+
})),
|
|
983
|
+
ledger: ledger.map((row) => ({
|
|
984
|
+
createdAt: String(row.created_at ?? ""),
|
|
985
|
+
operation: String(row.operation ?? ""),
|
|
986
|
+
credits: row.amount_mc / 1e3,
|
|
987
|
+
description: row.description ?? null
|
|
988
|
+
}))
|
|
989
|
+
}
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
function formatMapsSearch(raw, input) {
|
|
993
|
+
const parsed = parseData(raw);
|
|
994
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
995
|
+
const d = parsed.data;
|
|
996
|
+
const results = d.results ?? [];
|
|
997
|
+
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
998
|
+
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
999
|
+
const durationMs = d.durationMs;
|
|
1000
|
+
const rows = results.map((r) => {
|
|
1001
|
+
const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
|
|
1002
|
+
return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
|
|
1003
|
+
}).join("\n");
|
|
1004
|
+
const metadataSection = results.length ? `
|
|
1005
|
+
## Candidate Metadata
|
|
1006
|
+
${results.map((r) => {
|
|
1007
|
+
const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
|
|
1008
|
+
return `### ${r.position}. ${r.name}
|
|
1009
|
+
${meta}`;
|
|
1010
|
+
}).join("\n\n")}` : "";
|
|
1011
|
+
const full = [
|
|
1012
|
+
`# Google Maps Search: "${searchQuery}"`,
|
|
1013
|
+
`**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
|
|
1014
|
+
`
|
|
1015
|
+
## Results
|
|
1016
|
+
| # | Name | Category | Rating | Address | CID | Website | Maps |
|
|
1017
|
+
|---|------|----------|--------|---------|-----|---------|------|
|
|
1018
|
+
${rows}`,
|
|
1019
|
+
metadataSection,
|
|
1020
|
+
`
|
|
1021
|
+
---
|
|
1022
|
+
\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
|
|
1023
|
+
durationMs != null ? `
|
|
1024
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
718
1025
|
].filter(Boolean).join("\n");
|
|
719
|
-
return
|
|
1026
|
+
return {
|
|
1027
|
+
...oneBlock(full),
|
|
1028
|
+
structuredContent: {
|
|
1029
|
+
query: d.query,
|
|
1030
|
+
location: d.location ?? null,
|
|
1031
|
+
searchQuery: d.searchQuery,
|
|
1032
|
+
searchUrl: d.searchUrl,
|
|
1033
|
+
extractedAt: d.extractedAt,
|
|
1034
|
+
requestedMaxResults: requestedMax,
|
|
1035
|
+
resultCount: results.length,
|
|
1036
|
+
results,
|
|
1037
|
+
durationMs: durationMs ?? 0
|
|
1038
|
+
}
|
|
1039
|
+
};
|
|
720
1040
|
}
|
|
721
1041
|
function formatMapsPlaceIntel(raw, input) {
|
|
722
1042
|
const parsed = parseData(raw);
|
|
@@ -816,20 +1136,28 @@ ${entitySection}` : null,
|
|
|
816
1136
|
---
|
|
817
1137
|
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
818
1138
|
].filter(Boolean).join("\n");
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
1139
|
+
return {
|
|
1140
|
+
...oneBlock(full),
|
|
1141
|
+
structuredContent: {
|
|
1142
|
+
name,
|
|
1143
|
+
rating: rating ?? null,
|
|
1144
|
+
reviewCount: reviewCount ?? null,
|
|
1145
|
+
category: category ?? null,
|
|
1146
|
+
address: address ?? null,
|
|
1147
|
+
phone: phone ?? null,
|
|
1148
|
+
website: website ?? null,
|
|
1149
|
+
hoursSummary: hoursSummary ?? null,
|
|
1150
|
+
bookingUrl: bookingUrl ?? null,
|
|
1151
|
+
kgmid: kgmid ?? null,
|
|
1152
|
+
cidDecimal: cidDecimal ?? null,
|
|
1153
|
+
cidUrl: cidUrl ?? null,
|
|
1154
|
+
lat: lat ?? null,
|
|
1155
|
+
lng: lng ?? null,
|
|
1156
|
+
reviewsStatus,
|
|
1157
|
+
reviewsCollected: reviews.length,
|
|
1158
|
+
reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
|
|
1159
|
+
}
|
|
1160
|
+
};
|
|
833
1161
|
}
|
|
834
1162
|
function formatFacebookAdTranscribe(raw, input) {
|
|
835
1163
|
const parsed = parseData(raw);
|
|
@@ -859,67 +1187,118 @@ ${chunkRows}` : "",
|
|
|
859
1187
|
---
|
|
860
1188
|
\u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
|
|
861
1189
|
].filter(Boolean).join("\n");
|
|
862
|
-
const summary = [
|
|
863
|
-
`**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
864
|
-
`
|
|
865
|
-
**Preview:**
|
|
866
|
-
> ${truncate(text, 300)}`,
|
|
867
|
-
`
|
|
868
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
869
|
-
].join("\n");
|
|
870
1190
|
return oneBlock(full);
|
|
871
1191
|
}
|
|
872
1192
|
|
|
873
1193
|
// src/mcp/paa-mcp-server.ts
|
|
874
|
-
function
|
|
875
|
-
|
|
1194
|
+
function liveWebToolAnnotations(title) {
|
|
1195
|
+
return {
|
|
1196
|
+
title,
|
|
1197
|
+
readOnlyHint: true,
|
|
1198
|
+
destructiveHint: false,
|
|
1199
|
+
idempotentHint: false,
|
|
1200
|
+
openWorldHint: true
|
|
1201
|
+
};
|
|
1202
|
+
}
|
|
1203
|
+
function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
1204
|
+
const savesReports = options.savesReportsLocally !== false;
|
|
1205
|
+
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
1206
|
+
const withReportNote = (description) => `${description}${reportNote}`;
|
|
1207
|
+
const server = new McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
876
1208
|
server.registerTool("harvest_paa", {
|
|
877
|
-
|
|
878
|
-
|
|
1209
|
+
title: "Google PAA + SERP Harvest",
|
|
1210
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1211
|
+
inputSchema: HarvestPaaInputSchema,
|
|
1212
|
+
outputSchema: HarvestPaaOutputSchema,
|
|
1213
|
+
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
879
1214
|
}, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
|
|
880
1215
|
server.registerTool("search_serp", {
|
|
881
|
-
|
|
882
|
-
|
|
1216
|
+
title: "Google SERP Lookup",
|
|
1217
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
1218
|
+
inputSchema: SearchSerpInputSchema,
|
|
1219
|
+
outputSchema: SearchSerpOutputSchema,
|
|
1220
|
+
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
883
1221
|
}, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
|
|
884
1222
|
server.registerTool("extract_url", {
|
|
885
|
-
|
|
886
|
-
|
|
1223
|
+
title: "Single URL Extract",
|
|
1224
|
+
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
1225
|
+
inputSchema: ExtractUrlInputSchema,
|
|
1226
|
+
outputSchema: ExtractUrlOutputSchema,
|
|
1227
|
+
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
887
1228
|
}, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
|
|
888
1229
|
server.registerTool("map_site_urls", {
|
|
889
|
-
|
|
890
|
-
|
|
1230
|
+
title: "Site URL Map",
|
|
1231
|
+
description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
|
|
1232
|
+
inputSchema: MapSiteUrlsInputSchema,
|
|
1233
|
+
outputSchema: MapSiteUrlsOutputSchema,
|
|
1234
|
+
annotations: liveWebToolAnnotations("Site URL Map")
|
|
891
1235
|
}, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
|
|
892
1236
|
server.registerTool("extract_site", {
|
|
893
|
-
|
|
894
|
-
|
|
1237
|
+
title: "Multi-Page Site Extract",
|
|
1238
|
+
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
1239
|
+
inputSchema: ExtractSiteInputSchema,
|
|
1240
|
+
outputSchema: ExtractSiteOutputSchema,
|
|
1241
|
+
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
895
1242
|
}, async (input) => formatExtractSite(await executor.extractSite(input), input));
|
|
896
1243
|
server.registerTool("youtube_harvest", {
|
|
897
|
-
|
|
898
|
-
|
|
1244
|
+
title: "YouTube Video Harvest",
|
|
1245
|
+
description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
|
|
1246
|
+
inputSchema: YoutubeHarvestInputSchema,
|
|
1247
|
+
outputSchema: YoutubeHarvestOutputSchema,
|
|
1248
|
+
annotations: liveWebToolAnnotations("YouTube Video Harvest")
|
|
899
1249
|
}, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
|
|
900
1250
|
server.registerTool("youtube_transcribe", {
|
|
901
|
-
|
|
902
|
-
|
|
1251
|
+
title: "YouTube Transcription",
|
|
1252
|
+
description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
|
|
1253
|
+
inputSchema: YoutubeTranscribeInputSchema,
|
|
1254
|
+
annotations: liveWebToolAnnotations("YouTube Transcription")
|
|
903
1255
|
}, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
|
|
904
1256
|
server.registerTool("facebook_page_intel", {
|
|
905
|
-
|
|
906
|
-
|
|
1257
|
+
title: "Facebook Advertiser Ad Intel",
|
|
1258
|
+
description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
|
|
1259
|
+
inputSchema: FacebookPageIntelInputSchema,
|
|
1260
|
+
outputSchema: FacebookPageIntelOutputSchema,
|
|
1261
|
+
annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
|
|
907
1262
|
}, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
|
|
908
1263
|
server.registerTool("facebook_ad_search", {
|
|
909
|
-
|
|
910
|
-
|
|
1264
|
+
title: "Facebook Ad Library Search",
|
|
1265
|
+
description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
|
|
1266
|
+
inputSchema: FacebookAdSearchInputSchema,
|
|
1267
|
+
outputSchema: FacebookAdSearchOutputSchema,
|
|
1268
|
+
annotations: liveWebToolAnnotations("Facebook Ad Library Search")
|
|
911
1269
|
}, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
|
|
912
1270
|
server.registerTool("facebook_ad_transcribe", {
|
|
1271
|
+
title: "Facebook Ad Transcription",
|
|
913
1272
|
description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
|
|
914
|
-
inputSchema: FacebookAdTranscribeInputSchema
|
|
1273
|
+
inputSchema: FacebookAdTranscribeInputSchema,
|
|
1274
|
+
annotations: liveWebToolAnnotations("Facebook Ad Transcription")
|
|
915
1275
|
}, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
|
|
916
1276
|
server.registerTool("maps_place_intel", {
|
|
917
|
-
|
|
918
|
-
|
|
1277
|
+
title: "Google Maps Business Profile Details",
|
|
1278
|
+
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
1279
|
+
inputSchema: MapsPlaceIntelInputSchema,
|
|
1280
|
+
outputSchema: MapsPlaceIntelOutputSchema,
|
|
1281
|
+
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
919
1282
|
}, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
|
|
1283
|
+
server.registerTool("maps_search", {
|
|
1284
|
+
title: "Google Maps Business Search",
|
|
1285
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1286
|
+
inputSchema: MapsSearchInputSchema,
|
|
1287
|
+
outputSchema: MapsSearchOutputSchema,
|
|
1288
|
+
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
1289
|
+
}, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
|
|
920
1290
|
server.registerTool("credits_info", {
|
|
1291
|
+
title: "MCP Scraper Credits & Costs",
|
|
921
1292
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
922
|
-
inputSchema: CreditsInfoInputSchema
|
|
1293
|
+
inputSchema: CreditsInfoInputSchema,
|
|
1294
|
+
outputSchema: CreditsInfoOutputSchema,
|
|
1295
|
+
annotations: {
|
|
1296
|
+
title: "MCP Scraper Credits & Costs",
|
|
1297
|
+
readOnlyHint: true,
|
|
1298
|
+
destructiveHint: false,
|
|
1299
|
+
idempotentHint: true,
|
|
1300
|
+
openWorldHint: false
|
|
1301
|
+
}
|
|
923
1302
|
}, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
|
|
924
1303
|
return server;
|
|
925
1304
|
}
|
|
@@ -1013,6 +1392,9 @@ var HttpMcpToolExecutor = class {
|
|
|
1013
1392
|
mapsPlaceIntel(input) {
|
|
1014
1393
|
return this.call("/maps/place", input);
|
|
1015
1394
|
}
|
|
1395
|
+
mapsSearch(input) {
|
|
1396
|
+
return this.call("/maps/search", input);
|
|
1397
|
+
}
|
|
1016
1398
|
creditsInfo(input) {
|
|
1017
1399
|
return this.call("/billing/credits", input);
|
|
1018
1400
|
}
|
|
@@ -1028,7 +1410,9 @@ export {
|
|
|
1028
1410
|
harvestTimeoutBudget,
|
|
1029
1411
|
CaptureSerpSnapshotInputSchema,
|
|
1030
1412
|
CaptureSerpPageSnapshotsInputSchema,
|
|
1413
|
+
configureReportSaving,
|
|
1414
|
+
liveWebToolAnnotations,
|
|
1031
1415
|
buildPaaExtractorMcpServer,
|
|
1032
1416
|
HttpMcpToolExecutor
|
|
1033
1417
|
};
|
|
1034
|
-
//# sourceMappingURL=chunk-
|
|
1418
|
+
//# sourceMappingURL=chunk-RE6HCRYC.js.map
|