mcp-scraper 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/bin/api-server.cjs +957 -243
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +540 -158
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +2 -1
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +36 -5
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +5 -3
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-6TWZS2FQ.js → chunk-RE6HCRYC.js} +543 -159
- package/dist/chunk-RE6HCRYC.js.map +1 -0
- package/dist/{chunk-W4P2U5VF.js → chunk-TM22BLWP.js} +46 -34
- package/dist/chunk-TM22BLWP.js.map +1 -0
- package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
- package/dist/chunk-ZK456YXN.js.map +1 -0
- package/dist/chunk-ZMOWIBMK.js +36 -0
- package/dist/chunk-ZMOWIBMK.js.map +1 -0
- package/dist/index.cjs +34 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/{server-2Y27U4TO.js → server-QXVVTKJP.js} +311 -48
- package/dist/server-QXVVTKJP.js.map +1 -0
- package/dist/{worker-UT4ZQU2T.js → worker-AUCXFHEL.js} +6 -4
- package/dist/worker-AUCXFHEL.js.map +1 -0
- package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
- package/docs/adr/README.md +11 -0
- package/docs/mcp-tool-quality-spec.md +238 -0
- package/package.json +5 -4
- package/dist/chunk-6TWZS2FQ.js.map +0 -1
- package/dist/chunk-7HB7NDOY.js.map +0 -1
- package/dist/chunk-W4P2U5VF.js.map +0 -1
- package/dist/server-2Y27U4TO.js.map +0 -1
- package/dist/worker-UT4ZQU2T.js.map +0 -1
|
@@ -110,6 +110,9 @@ var HttpMcpToolExecutor = class {
|
|
|
110
110
|
mapsPlaceIntel(input) {
|
|
111
111
|
return this.call("/maps/place", input);
|
|
112
112
|
}
|
|
113
|
+
mapsSearch(input) {
|
|
114
|
+
return this.call("/maps/search", input);
|
|
115
|
+
}
|
|
113
116
|
creditsInfo(input) {
|
|
114
117
|
return this.call("/billing/credits", input);
|
|
115
118
|
}
|
|
@@ -124,6 +127,9 @@ var HttpMcpToolExecutor = class {
|
|
|
124
127
|
// src/mcp/paa-mcp-server.ts
|
|
125
128
|
var import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
126
129
|
|
|
130
|
+
// src/version.ts
|
|
131
|
+
var PACKAGE_VERSION = "0.1.8";
|
|
132
|
+
|
|
127
133
|
// src/mcp/mcp-tool-schemas.ts
|
|
128
134
|
var import_zod = require("zod");
|
|
129
135
|
var HarvestPaaInputSchema = {
|
|
@@ -186,6 +192,207 @@ var MapsPlaceIntelInputSchema = {
|
|
|
186
192
|
includeReviews: import_zod.z.boolean().default(false).describe("Whether to fetch individual review cards"),
|
|
187
193
|
maxReviews: import_zod.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
|
|
188
194
|
};
|
|
195
|
+
var MapsSearchInputSchema = {
|
|
196
|
+
query: import_zod.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
|
|
197
|
+
location: import_zod.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
198
|
+
gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
199
|
+
hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
200
|
+
maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
201
|
+
};
|
|
202
|
+
var NullableString = import_zod.z.string().nullable();
|
|
203
|
+
var MapsSearchOutputSchema = {
|
|
204
|
+
query: import_zod.z.string(),
|
|
205
|
+
location: import_zod.z.string().nullable(),
|
|
206
|
+
searchQuery: import_zod.z.string(),
|
|
207
|
+
searchUrl: import_zod.z.string().url(),
|
|
208
|
+
extractedAt: import_zod.z.string(),
|
|
209
|
+
requestedMaxResults: import_zod.z.number().int().min(1).max(50),
|
|
210
|
+
resultCount: import_zod.z.number().int().min(0).max(50),
|
|
211
|
+
results: import_zod.z.array(import_zod.z.object({
|
|
212
|
+
position: import_zod.z.number().int().min(1),
|
|
213
|
+
name: import_zod.z.string(),
|
|
214
|
+
placeUrl: import_zod.z.string().url(),
|
|
215
|
+
cid: NullableString,
|
|
216
|
+
cidDecimal: NullableString,
|
|
217
|
+
rating: NullableString,
|
|
218
|
+
reviewCount: NullableString,
|
|
219
|
+
category: NullableString,
|
|
220
|
+
address: NullableString,
|
|
221
|
+
websiteUrl: NullableString,
|
|
222
|
+
directionsUrl: NullableString,
|
|
223
|
+
metadata: import_zod.z.array(import_zod.z.string())
|
|
224
|
+
})),
|
|
225
|
+
durationMs: import_zod.z.number().int().min(0)
|
|
226
|
+
};
|
|
227
|
+
var OrganicResultOutput = import_zod.z.object({
|
|
228
|
+
position: import_zod.z.number().int(),
|
|
229
|
+
title: import_zod.z.string(),
|
|
230
|
+
url: import_zod.z.string(),
|
|
231
|
+
domain: import_zod.z.string(),
|
|
232
|
+
snippet: NullableString
|
|
233
|
+
});
|
|
234
|
+
var AiOverviewOutput = import_zod.z.object({
|
|
235
|
+
detected: import_zod.z.boolean(),
|
|
236
|
+
text: NullableString
|
|
237
|
+
}).nullable();
|
|
238
|
+
var EntityIdsOutput = import_zod.z.object({
|
|
239
|
+
kgIds: import_zod.z.array(import_zod.z.string()),
|
|
240
|
+
cids: import_zod.z.array(import_zod.z.string()),
|
|
241
|
+
gcids: import_zod.z.array(import_zod.z.string())
|
|
242
|
+
}).nullable();
|
|
243
|
+
var HarvestPaaOutputSchema = {
|
|
244
|
+
query: import_zod.z.string(),
|
|
245
|
+
location: NullableString,
|
|
246
|
+
questionCount: import_zod.z.number().int().min(0),
|
|
247
|
+
completionStatus: NullableString,
|
|
248
|
+
questions: import_zod.z.array(import_zod.z.object({
|
|
249
|
+
question: import_zod.z.string(),
|
|
250
|
+
answer: NullableString,
|
|
251
|
+
sourceTitle: NullableString,
|
|
252
|
+
sourceSite: NullableString
|
|
253
|
+
})),
|
|
254
|
+
organicResults: import_zod.z.array(OrganicResultOutput),
|
|
255
|
+
aiOverview: AiOverviewOutput,
|
|
256
|
+
entityIds: EntityIdsOutput,
|
|
257
|
+
durationMs: import_zod.z.number().min(0).nullable()
|
|
258
|
+
};
|
|
259
|
+
var SearchSerpOutputSchema = {
|
|
260
|
+
query: import_zod.z.string(),
|
|
261
|
+
location: NullableString,
|
|
262
|
+
organicResults: import_zod.z.array(OrganicResultOutput),
|
|
263
|
+
localPack: import_zod.z.array(import_zod.z.object({
|
|
264
|
+
position: import_zod.z.number().int(),
|
|
265
|
+
name: import_zod.z.string(),
|
|
266
|
+
rating: NullableString,
|
|
267
|
+
reviewCount: NullableString,
|
|
268
|
+
websiteUrl: NullableString
|
|
269
|
+
})),
|
|
270
|
+
aiOverview: AiOverviewOutput,
|
|
271
|
+
entityIds: EntityIdsOutput
|
|
272
|
+
};
|
|
273
|
+
var ExtractUrlOutputSchema = {
|
|
274
|
+
url: import_zod.z.string(),
|
|
275
|
+
title: NullableString,
|
|
276
|
+
headings: import_zod.z.array(import_zod.z.object({
|
|
277
|
+
level: import_zod.z.number().int(),
|
|
278
|
+
text: import_zod.z.string()
|
|
279
|
+
})),
|
|
280
|
+
schemaBlockCount: import_zod.z.number().int().min(0),
|
|
281
|
+
entityName: NullableString,
|
|
282
|
+
entityTypes: import_zod.z.array(import_zod.z.string()),
|
|
283
|
+
napScore: import_zod.z.number().nullable(),
|
|
284
|
+
missingSchemaFields: import_zod.z.array(import_zod.z.string()),
|
|
285
|
+
screenshotSaved: NullableString
|
|
286
|
+
};
|
|
287
|
+
var ExtractSiteOutputSchema = {
|
|
288
|
+
url: import_zod.z.string(),
|
|
289
|
+
pageCount: import_zod.z.number().int().min(0),
|
|
290
|
+
pages: import_zod.z.array(import_zod.z.object({
|
|
291
|
+
url: import_zod.z.string(),
|
|
292
|
+
title: NullableString,
|
|
293
|
+
schemaTypes: import_zod.z.array(import_zod.z.string())
|
|
294
|
+
})),
|
|
295
|
+
durationMs: import_zod.z.number().min(0)
|
|
296
|
+
};
|
|
297
|
+
var MapsPlaceIntelOutputSchema = {
|
|
298
|
+
name: import_zod.z.string(),
|
|
299
|
+
rating: NullableString,
|
|
300
|
+
reviewCount: NullableString,
|
|
301
|
+
category: NullableString,
|
|
302
|
+
address: NullableString,
|
|
303
|
+
phone: NullableString,
|
|
304
|
+
website: NullableString,
|
|
305
|
+
hoursSummary: NullableString,
|
|
306
|
+
bookingUrl: NullableString,
|
|
307
|
+
kgmid: NullableString,
|
|
308
|
+
cidDecimal: NullableString,
|
|
309
|
+
cidUrl: NullableString,
|
|
310
|
+
lat: import_zod.z.number().nullable(),
|
|
311
|
+
lng: import_zod.z.number().nullable(),
|
|
312
|
+
reviewsStatus: import_zod.z.string(),
|
|
313
|
+
reviewsCollected: import_zod.z.number().int().min(0),
|
|
314
|
+
reviewTopics: import_zod.z.array(import_zod.z.object({
|
|
315
|
+
label: import_zod.z.string(),
|
|
316
|
+
count: import_zod.z.string()
|
|
317
|
+
}))
|
|
318
|
+
};
|
|
319
|
+
var CreditsInfoOutputSchema = {
|
|
320
|
+
balanceCredits: import_zod.z.number().nullable(),
|
|
321
|
+
matchedCost: import_zod.z.object({
|
|
322
|
+
label: import_zod.z.string(),
|
|
323
|
+
credits: import_zod.z.number(),
|
|
324
|
+
unit: import_zod.z.string(),
|
|
325
|
+
notes: NullableString
|
|
326
|
+
}).nullable(),
|
|
327
|
+
costs: import_zod.z.array(import_zod.z.object({
|
|
328
|
+
key: import_zod.z.string(),
|
|
329
|
+
label: import_zod.z.string(),
|
|
330
|
+
credits: import_zod.z.number(),
|
|
331
|
+
unit: import_zod.z.string(),
|
|
332
|
+
notes: NullableString
|
|
333
|
+
})),
|
|
334
|
+
ledger: import_zod.z.array(import_zod.z.object({
|
|
335
|
+
createdAt: import_zod.z.string(),
|
|
336
|
+
operation: import_zod.z.string(),
|
|
337
|
+
credits: import_zod.z.number(),
|
|
338
|
+
description: NullableString
|
|
339
|
+
}))
|
|
340
|
+
};
|
|
341
|
+
var MapSiteUrlsOutputSchema = {
|
|
342
|
+
startUrl: import_zod.z.string(),
|
|
343
|
+
totalFound: import_zod.z.number().int().min(0),
|
|
344
|
+
truncated: import_zod.z.boolean(),
|
|
345
|
+
okCount: import_zod.z.number().int().min(0),
|
|
346
|
+
redirectCount: import_zod.z.number().int().min(0),
|
|
347
|
+
brokenCount: import_zod.z.number().int().min(0),
|
|
348
|
+
urls: import_zod.z.array(import_zod.z.object({
|
|
349
|
+
url: import_zod.z.string(),
|
|
350
|
+
status: import_zod.z.number().int().nullable()
|
|
351
|
+
})),
|
|
352
|
+
durationMs: import_zod.z.number().min(0)
|
|
353
|
+
};
|
|
354
|
+
var YoutubeHarvestOutputSchema = {
|
|
355
|
+
mode: import_zod.z.string(),
|
|
356
|
+
videoCount: import_zod.z.number().int().min(0),
|
|
357
|
+
channel: import_zod.z.object({
|
|
358
|
+
title: NullableString,
|
|
359
|
+
subscriberCount: NullableString
|
|
360
|
+
}).nullable(),
|
|
361
|
+
videos: import_zod.z.array(import_zod.z.object({
|
|
362
|
+
videoId: import_zod.z.string(),
|
|
363
|
+
title: import_zod.z.string(),
|
|
364
|
+
channelName: NullableString,
|
|
365
|
+
views: NullableString,
|
|
366
|
+
duration: NullableString,
|
|
367
|
+
url: NullableString
|
|
368
|
+
}))
|
|
369
|
+
};
|
|
370
|
+
var FacebookAdSearchOutputSchema = {
|
|
371
|
+
query: import_zod.z.string(),
|
|
372
|
+
advertiserCount: import_zod.z.number().int().min(0),
|
|
373
|
+
advertisers: import_zod.z.array(import_zod.z.object({
|
|
374
|
+
name: NullableString,
|
|
375
|
+
adCount: import_zod.z.number().int().nullable(),
|
|
376
|
+
libraryId: NullableString
|
|
377
|
+
}))
|
|
378
|
+
};
|
|
379
|
+
var FacebookPageIntelOutputSchema = {
|
|
380
|
+
advertiserName: NullableString,
|
|
381
|
+
totalAds: import_zod.z.number().int().min(0),
|
|
382
|
+
activeCount: import_zod.z.number().int().min(0),
|
|
383
|
+
videoCount: import_zod.z.number().int().min(0),
|
|
384
|
+
imageCount: import_zod.z.number().int().min(0),
|
|
385
|
+
ads: import_zod.z.array(import_zod.z.object({
|
|
386
|
+
libraryId: NullableString,
|
|
387
|
+
status: NullableString,
|
|
388
|
+
creativeType: NullableString,
|
|
389
|
+
headline: NullableString,
|
|
390
|
+
cta: NullableString,
|
|
391
|
+
startDate: NullableString,
|
|
392
|
+
videoUrl: NullableString,
|
|
393
|
+
variations: import_zod.z.number().int().nullable()
|
|
394
|
+
}))
|
|
395
|
+
};
|
|
189
396
|
var CreditsInfoInputSchema = {
|
|
190
397
|
item: import_zod.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
|
|
191
398
|
includeLedger: import_zod.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
|
|
@@ -235,6 +442,19 @@ var CaptureSerpPageSnapshotsInputSchema = {
|
|
|
235
442
|
var import_node_fs = require("fs");
|
|
236
443
|
var import_node_os = require("os");
|
|
237
444
|
var import_node_path = require("path");
|
|
445
|
+
|
|
446
|
+
// src/errors.ts
|
|
447
|
+
function sanitizeVendorName(message) {
|
|
448
|
+
return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// src/mcp/mcp-response-formatter.ts
|
|
452
|
+
var reportSavingEnabled = true;
|
|
453
|
+
function sanitizeVendorText(text) {
|
|
454
|
+
return sanitizeVendorName(
|
|
455
|
+
text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
|
|
456
|
+
);
|
|
457
|
+
}
|
|
238
458
|
function slugifyReportName(input) {
|
|
239
459
|
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
|
|
240
460
|
}
|
|
@@ -246,7 +466,7 @@ function outputBaseDir() {
|
|
|
246
466
|
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path.join)((0, import_node_os.homedir)(), "Downloads", "mcp-scraper");
|
|
247
467
|
}
|
|
248
468
|
function saveFullReport(full) {
|
|
249
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
469
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
250
470
|
const outDir = outputBaseDir();
|
|
251
471
|
try {
|
|
252
472
|
(0, import_node_fs.mkdirSync)(outDir, { recursive: true });
|
|
@@ -259,7 +479,7 @@ function saveFullReport(full) {
|
|
|
259
479
|
}
|
|
260
480
|
}
|
|
261
481
|
function persistScreenshotLocally(base64, url) {
|
|
262
|
-
if (process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
482
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
263
483
|
try {
|
|
264
484
|
const dir = (0, import_node_path.join)(outputBaseDir(), "screenshots");
|
|
265
485
|
(0, import_node_fs.mkdirSync)(dir, { recursive: true });
|
|
@@ -299,11 +519,11 @@ function parseData(raw) {
|
|
|
299
519
|
const text = first?.type === "text" ? first.text : "";
|
|
300
520
|
try {
|
|
301
521
|
const parsed = JSON.parse(text || "{}");
|
|
302
|
-
if (raw.isError || parsed.error || parsed.error_code) return { error: formatStructuredError(parsed, text) };
|
|
522
|
+
if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
|
|
303
523
|
const data = parsed.result ?? parsed;
|
|
304
524
|
return { data };
|
|
305
525
|
} catch {
|
|
306
|
-
if (raw.isError) return { error: text || "Tool error" };
|
|
526
|
+
if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
|
|
307
527
|
return { error: "Failed to parse tool response" };
|
|
308
528
|
}
|
|
309
529
|
}
|
|
@@ -317,15 +537,6 @@ function entityIdsSection(ids) {
|
|
|
317
537
|
## Entity IDs
|
|
318
538
|
${lines.join("\n")}` : "";
|
|
319
539
|
}
|
|
320
|
-
function entityIdsSummaryLine(ids) {
|
|
321
|
-
if (!ids) return "";
|
|
322
|
-
const parts = [];
|
|
323
|
-
if (ids.kgIds?.length) parts.push(`KG MID: ${ids.kgIds[0]}`);
|
|
324
|
-
if (ids.cids?.length) parts.push(`CID: ${ids.cids[0]}`);
|
|
325
|
-
if (ids.gcids?.length) parts.push(`GCID: ${ids.gcids[0]}`);
|
|
326
|
-
return parts.length ? `
|
|
327
|
-
**Entity IDs:** ${parts.join(" \xB7 ")}` : "";
|
|
328
|
-
}
|
|
329
540
|
function truncate(s, max) {
|
|
330
541
|
if (!s) return "";
|
|
331
542
|
return s.length > max ? s.slice(0, max) + "\u2026" : s;
|
|
@@ -337,7 +548,7 @@ function debugSection(debug) {
|
|
|
337
548
|
if (!debug || typeof debug !== "object") return "";
|
|
338
549
|
const request = debug.request ?? {};
|
|
339
550
|
const browser = debug.browser ?? {};
|
|
340
|
-
const kernel = browser.kernel ?? {};
|
|
551
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
341
552
|
const network = browser.networkLocation ?? {};
|
|
342
553
|
const nav = browser.serpNavigation ?? {};
|
|
343
554
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
@@ -355,7 +566,7 @@ function debugSection(debug) {
|
|
|
355
566
|
if (locationEvidence) {
|
|
356
567
|
lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
|
|
357
568
|
}
|
|
358
|
-
return lines.join("\n");
|
|
569
|
+
return sanitizeVendorText(lines.join("\n"));
|
|
359
570
|
}
|
|
360
571
|
function errorAttemptsSection(body) {
|
|
361
572
|
const attempts = Array.isArray(body.attempts) ? body.attempts : [];
|
|
@@ -363,12 +574,14 @@ function errorAttemptsSection(body) {
|
|
|
363
574
|
const lines = attempts.slice(0, 5).map((attempt) => {
|
|
364
575
|
const debug = attempt.debug ?? {};
|
|
365
576
|
const browser = debug.browser ?? {};
|
|
366
|
-
const kernel = browser.kernel ?? {};
|
|
577
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
367
578
|
const proxyResolution = kernel.proxyResolution ?? {};
|
|
368
579
|
const network = browser.networkLocation ?? {};
|
|
369
580
|
const nav = browser.serpNavigation ?? {};
|
|
370
581
|
const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
|
|
371
|
-
|
|
582
|
+
const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
|
|
583
|
+
const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
|
|
584
|
+
return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
|
|
372
585
|
});
|
|
373
586
|
return `
|
|
374
587
|
|
|
@@ -409,27 +622,37 @@ ${serpRows}` : "";
|
|
|
409
622
|
const tips = `
|
|
410
623
|
---
|
|
411
624
|
\u{1F4A1} **Tips**
|
|
412
|
-
- Max questions: \`maxQuestions:
|
|
625
|
+
- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
|
|
413
626
|
- Organic results only: use \`search_serp\`
|
|
414
627
|
- Dig into a result: use \`extract_url\` on any organic URL`;
|
|
415
628
|
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
416
629
|
|
|
417
630
|
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
631
|
+
return {
|
|
632
|
+
...oneBlock(full),
|
|
633
|
+
structuredContent: {
|
|
634
|
+
query: input.query,
|
|
635
|
+
location: input.location ?? null,
|
|
636
|
+
questionCount: flat.length,
|
|
637
|
+
completionStatus: diagnostics?.completionStatus ?? null,
|
|
638
|
+
questions: flat.map((r) => ({
|
|
639
|
+
question: String(r.question ?? ""),
|
|
640
|
+
answer: r.answer ?? null,
|
|
641
|
+
sourceTitle: r.source_title ?? null,
|
|
642
|
+
sourceSite: r.source_site ?? null
|
|
643
|
+
})),
|
|
644
|
+
organicResults: organic.map((r) => ({
|
|
645
|
+
position: Number(r.position) || 0,
|
|
646
|
+
title: String(r.title ?? ""),
|
|
647
|
+
url: String(r.url ?? ""),
|
|
648
|
+
domain: String(r.domain ?? ""),
|
|
649
|
+
snippet: r.snippet ?? null
|
|
650
|
+
})),
|
|
651
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
652
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
|
|
653
|
+
durationMs: durationMs ?? null
|
|
654
|
+
}
|
|
655
|
+
};
|
|
433
656
|
}
|
|
434
657
|
function formatSearchSerp(raw, input) {
|
|
435
658
|
const parsed = parseData(raw);
|
|
@@ -467,19 +690,29 @@ ${localRows}` : "";
|
|
|
467
690
|
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
468
691
|
|
|
469
692
|
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
693
|
+
return {
|
|
694
|
+
...oneBlock(full),
|
|
695
|
+
structuredContent: {
|
|
696
|
+
query: input.query,
|
|
697
|
+
location: input.location ?? null,
|
|
698
|
+
organicResults: organic.map((r) => ({
|
|
699
|
+
position: Number(r.position) || 0,
|
|
700
|
+
title: String(r.title ?? ""),
|
|
701
|
+
url: String(r.url ?? ""),
|
|
702
|
+
domain: String(r.domain ?? ""),
|
|
703
|
+
snippet: r.snippet ?? null
|
|
704
|
+
})),
|
|
705
|
+
localPack: localPack.map((b) => ({
|
|
706
|
+
position: Number(b.position) || 0,
|
|
707
|
+
name: String(b.name ?? ""),
|
|
708
|
+
rating: b.rating ?? null,
|
|
709
|
+
reviewCount: b.reviewCount ?? null,
|
|
710
|
+
websiteUrl: b.websiteUrl ?? null
|
|
711
|
+
})),
|
|
712
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
713
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
|
|
714
|
+
}
|
|
715
|
+
};
|
|
483
716
|
}
|
|
484
717
|
function formatExtractUrl(raw, input) {
|
|
485
718
|
const parsed = parseData(raw);
|
|
@@ -548,15 +781,27 @@ ${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
|
548
781
|
**${title}**
|
|
549
782
|
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
550
783
|
const textResult = oneBlock(full);
|
|
784
|
+
const structuredContent = {
|
|
785
|
+
url,
|
|
786
|
+
title: d.title ?? null,
|
|
787
|
+
headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
|
|
788
|
+
schemaBlockCount: schemaCount,
|
|
789
|
+
entityName: kpo?.entityName ?? null,
|
|
790
|
+
entityTypes: kpo?.type ?? [],
|
|
791
|
+
napScore: kpo?.napScore ?? null,
|
|
792
|
+
missingSchemaFields: kpo?.missingFields ?? [],
|
|
793
|
+
screenshotSaved: screenshotPath ?? null
|
|
794
|
+
};
|
|
551
795
|
if (screenshotMeta?.base64) {
|
|
552
796
|
return {
|
|
553
797
|
content: [
|
|
554
798
|
...textResult.content,
|
|
555
799
|
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
556
|
-
]
|
|
800
|
+
],
|
|
801
|
+
structuredContent
|
|
557
802
|
};
|
|
558
803
|
}
|
|
559
|
-
return textResult;
|
|
804
|
+
return { ...textResult, structuredContent };
|
|
560
805
|
}
|
|
561
806
|
function formatMapSiteUrls(raw, input) {
|
|
562
807
|
const parsed = parseData(raw);
|
|
@@ -589,15 +834,19 @@ ${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
|
|
|
589
834
|
- Extract content from all pages: use \`extract_site\`
|
|
590
835
|
- Scrape a single page: use \`extract_url\``
|
|
591
836
|
].filter(Boolean).join("\n");
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
837
|
+
return {
|
|
838
|
+
...oneBlock(full),
|
|
839
|
+
structuredContent: {
|
|
840
|
+
startUrl: d.startUrl ?? input.url,
|
|
841
|
+
totalFound: d.totalFound ?? urls.length,
|
|
842
|
+
truncated: d.truncated === true,
|
|
843
|
+
okCount: ok.length,
|
|
844
|
+
redirectCount: redirects.length,
|
|
845
|
+
brokenCount: broken.length,
|
|
846
|
+
urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
|
|
847
|
+
durationMs: d.durationMs ?? 0
|
|
848
|
+
}
|
|
849
|
+
};
|
|
601
850
|
}
|
|
602
851
|
function formatExtractSite(raw, input) {
|
|
603
852
|
const parsed = parseData(raw);
|
|
@@ -622,14 +871,19 @@ ${pageRows}`,
|
|
|
622
871
|
- Map URLs first: use \`map_site_urls\`
|
|
623
872
|
- Inspect a single page: use \`extract_url\``
|
|
624
873
|
].join("\n");
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
874
|
+
return {
|
|
875
|
+
...oneBlock(full),
|
|
876
|
+
structuredContent: {
|
|
877
|
+
url: input.url,
|
|
878
|
+
pageCount: pages.length,
|
|
879
|
+
pages: pages.map((p) => ({
|
|
880
|
+
url: String(p.url ?? ""),
|
|
881
|
+
title: p.title ?? null,
|
|
882
|
+
schemaTypes: p.kpo?.type ?? []
|
|
883
|
+
})),
|
|
884
|
+
durationMs: d.durationMs ?? 0
|
|
885
|
+
}
|
|
886
|
+
};
|
|
633
887
|
}
|
|
634
888
|
function formatYoutubeHarvest(raw, input) {
|
|
635
889
|
const parsed = parseData(raw);
|
|
@@ -659,16 +913,22 @@ ${videoRows}`,
|
|
|
659
913
|
- Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
|
|
660
914
|
- Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
|
|
661
915
|
].filter(Boolean).join("\n");
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
916
|
+
return {
|
|
917
|
+
...oneBlock(full),
|
|
918
|
+
structuredContent: {
|
|
919
|
+
mode: input.mode,
|
|
920
|
+
videoCount: videos.length,
|
|
921
|
+
channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
|
|
922
|
+
videos: videos.map((v) => ({
|
|
923
|
+
videoId: String(v.videoId ?? ""),
|
|
924
|
+
title: String(v.title ?? ""),
|
|
925
|
+
channelName: v.channelName ?? null,
|
|
926
|
+
views: v.views ?? null,
|
|
927
|
+
duration: v.duration ?? null,
|
|
928
|
+
url: v.url ?? null
|
|
929
|
+
}))
|
|
930
|
+
}
|
|
931
|
+
};
|
|
672
932
|
}
|
|
673
933
|
function formatYoutubeTranscribe(raw, input) {
|
|
674
934
|
const parsed = parseData(raw);
|
|
@@ -698,14 +958,6 @@ ${chunkRows}` : "",
|
|
|
698
958
|
---
|
|
699
959
|
\u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
|
|
700
960
|
].filter(Boolean).join("\n");
|
|
701
|
-
const summary = [
|
|
702
|
-
`**YouTube Transcript: \`${input.videoId}\`** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
703
|
-
`
|
|
704
|
-
**Preview:**
|
|
705
|
-
> ${truncate(text, 300)}`,
|
|
706
|
-
`
|
|
707
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
708
|
-
].join("\n");
|
|
709
961
|
return oneBlock(full);
|
|
710
962
|
}
|
|
711
963
|
function formatFacebookPageIntel(raw, input) {
|
|
@@ -734,19 +986,26 @@ ${adBlocks}`,
|
|
|
734
986
|
- Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
|
|
735
987
|
- Find other advertisers: use \`facebook_ad_search\``
|
|
736
988
|
].filter(Boolean).join("\n");
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
989
|
+
return {
|
|
990
|
+
...oneBlock(full),
|
|
991
|
+
structuredContent: {
|
|
992
|
+
advertiserName: d.advertiserName ?? null,
|
|
993
|
+
totalAds: s.totalAds ?? 0,
|
|
994
|
+
activeCount: s.activeCount ?? 0,
|
|
995
|
+
videoCount: s.videoCount ?? 0,
|
|
996
|
+
imageCount: s.imageCount ?? 0,
|
|
997
|
+
ads: ads.map((ad) => ({
|
|
998
|
+
libraryId: ad.libraryId ?? null,
|
|
999
|
+
status: ad.status ?? null,
|
|
1000
|
+
creativeType: ad.creativeType ?? null,
|
|
1001
|
+
headline: ad.headline ?? null,
|
|
1002
|
+
cta: ad.cta ?? null,
|
|
1003
|
+
startDate: ad.startDate ?? null,
|
|
1004
|
+
videoUrl: ad.videoUrl ?? null,
|
|
1005
|
+
variations: typeof ad.variations === "number" ? ad.variations : null
|
|
1006
|
+
}))
|
|
1007
|
+
}
|
|
1008
|
+
};
|
|
750
1009
|
}
|
|
751
1010
|
function formatFacebookAdSearch(raw, input) {
|
|
752
1011
|
const parsed = parseData(raw);
|
|
@@ -770,15 +1029,18 @@ ${rows}`,
|
|
|
770
1029
|
- Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
|
|
771
1030
|
- Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
|
|
772
1031
|
].join("\n");
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
1032
|
+
return {
|
|
1033
|
+
...oneBlock(full),
|
|
1034
|
+
structuredContent: {
|
|
1035
|
+
query: input.query,
|
|
1036
|
+
advertiserCount: advertisers.length,
|
|
1037
|
+
advertisers: advertisers.map((a) => ({
|
|
1038
|
+
name: a.pageName ?? a.name ?? null,
|
|
1039
|
+
adCount: typeof a.adCount === "number" ? a.adCount : null,
|
|
1040
|
+
libraryId: a.sampleLibraryId ?? a.libraryId ?? null
|
|
1041
|
+
}))
|
|
1042
|
+
}
|
|
1043
|
+
};
|
|
782
1044
|
}
|
|
783
1045
|
function formatCreditsInfo(raw, input) {
|
|
784
1046
|
const parsed = parseData(raw);
|
|
@@ -818,14 +1080,75 @@ ${costRows}` : "",
|
|
|
818
1080
|
|------|-----------|---------|-------------|
|
|
819
1081
|
${ledgerRows}` : ""
|
|
820
1082
|
].filter(Boolean).join("\n");
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
1083
|
+
return {
|
|
1084
|
+
...oneBlock(full),
|
|
1085
|
+
structuredContent: {
|
|
1086
|
+
balanceCredits: typeof balance === "number" ? balance : null,
|
|
1087
|
+
matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
|
|
1088
|
+
costs: costs.map((c) => ({
|
|
1089
|
+
key: c.key,
|
|
1090
|
+
label: c.label,
|
|
1091
|
+
credits: c.credits,
|
|
1092
|
+
unit: c.unit,
|
|
1093
|
+
notes: c.notes ?? null
|
|
1094
|
+
})),
|
|
1095
|
+
ledger: ledger.map((row) => ({
|
|
1096
|
+
createdAt: String(row.created_at ?? ""),
|
|
1097
|
+
operation: String(row.operation ?? ""),
|
|
1098
|
+
credits: row.amount_mc / 1e3,
|
|
1099
|
+
description: row.description ?? null
|
|
1100
|
+
}))
|
|
1101
|
+
}
|
|
1102
|
+
};
|
|
1103
|
+
}
|
|
1104
|
+
function formatMapsSearch(raw, input) {
|
|
1105
|
+
const parsed = parseData(raw);
|
|
1106
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1107
|
+
const d = parsed.data;
|
|
1108
|
+
const results = d.results ?? [];
|
|
1109
|
+
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
1110
|
+
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
1111
|
+
const durationMs = d.durationMs;
|
|
1112
|
+
const rows = results.map((r) => {
|
|
1113
|
+
const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
|
|
1114
|
+
return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
|
|
1115
|
+
}).join("\n");
|
|
1116
|
+
const metadataSection = results.length ? `
|
|
1117
|
+
## Candidate Metadata
|
|
1118
|
+
${results.map((r) => {
|
|
1119
|
+
const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
|
|
1120
|
+
return `### ${r.position}. ${r.name}
|
|
1121
|
+
${meta}`;
|
|
1122
|
+
}).join("\n\n")}` : "";
|
|
1123
|
+
const full = [
|
|
1124
|
+
`# Google Maps Search: "${searchQuery}"`,
|
|
1125
|
+
`**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
|
|
1126
|
+
`
|
|
1127
|
+
## Results
|
|
1128
|
+
| # | Name | Category | Rating | Address | CID | Website | Maps |
|
|
1129
|
+
|---|------|----------|--------|---------|-----|---------|------|
|
|
1130
|
+
${rows}`,
|
|
1131
|
+
metadataSection,
|
|
1132
|
+
`
|
|
1133
|
+
---
|
|
1134
|
+
\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
|
|
1135
|
+
durationMs != null ? `
|
|
1136
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
827
1137
|
].filter(Boolean).join("\n");
|
|
828
|
-
return
|
|
1138
|
+
return {
|
|
1139
|
+
...oneBlock(full),
|
|
1140
|
+
structuredContent: {
|
|
1141
|
+
query: d.query,
|
|
1142
|
+
location: d.location ?? null,
|
|
1143
|
+
searchQuery: d.searchQuery,
|
|
1144
|
+
searchUrl: d.searchUrl,
|
|
1145
|
+
extractedAt: d.extractedAt,
|
|
1146
|
+
requestedMaxResults: requestedMax,
|
|
1147
|
+
resultCount: results.length,
|
|
1148
|
+
results,
|
|
1149
|
+
durationMs: durationMs ?? 0
|
|
1150
|
+
}
|
|
1151
|
+
};
|
|
829
1152
|
}
|
|
830
1153
|
function formatMapsPlaceIntel(raw, input) {
|
|
831
1154
|
const parsed = parseData(raw);
|
|
@@ -925,20 +1248,28 @@ ${entitySection}` : null,
|
|
|
925
1248
|
---
|
|
926
1249
|
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
927
1250
|
].filter(Boolean).join("\n");
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1251
|
+
return {
|
|
1252
|
+
...oneBlock(full),
|
|
1253
|
+
structuredContent: {
|
|
1254
|
+
name,
|
|
1255
|
+
rating: rating ?? null,
|
|
1256
|
+
reviewCount: reviewCount ?? null,
|
|
1257
|
+
category: category ?? null,
|
|
1258
|
+
address: address ?? null,
|
|
1259
|
+
phone: phone ?? null,
|
|
1260
|
+
website: website ?? null,
|
|
1261
|
+
hoursSummary: hoursSummary ?? null,
|
|
1262
|
+
bookingUrl: bookingUrl ?? null,
|
|
1263
|
+
kgmid: kgmid ?? null,
|
|
1264
|
+
cidDecimal: cidDecimal ?? null,
|
|
1265
|
+
cidUrl: cidUrl ?? null,
|
|
1266
|
+
lat: lat ?? null,
|
|
1267
|
+
lng: lng ?? null,
|
|
1268
|
+
reviewsStatus,
|
|
1269
|
+
reviewsCollected: reviews.length,
|
|
1270
|
+
reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
|
|
1271
|
+
}
|
|
1272
|
+
};
|
|
942
1273
|
}
|
|
943
1274
|
function formatFacebookAdTranscribe(raw, input) {
|
|
944
1275
|
const parsed = parseData(raw);
|
|
@@ -968,67 +1299,118 @@ ${chunkRows}` : "",
|
|
|
968
1299
|
---
|
|
969
1300
|
\u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
|
|
970
1301
|
].filter(Boolean).join("\n");
|
|
971
|
-
const summary = [
|
|
972
|
-
`**Facebook Ad Transcript** \u2014 ${text.split(" ").length} words \xB7 ${durSec}s`,
|
|
973
|
-
`
|
|
974
|
-
**Preview:**
|
|
975
|
-
> ${truncate(text, 300)}`,
|
|
976
|
-
`
|
|
977
|
-
\u{1F4A1} Full transcript in artifact above`
|
|
978
|
-
].join("\n");
|
|
979
1302
|
return oneBlock(full);
|
|
980
1303
|
}
|
|
981
1304
|
|
|
982
1305
|
// src/mcp/paa-mcp-server.ts
|
|
983
|
-
function
|
|
984
|
-
|
|
1306
|
+
function liveWebToolAnnotations(title) {
|
|
1307
|
+
return {
|
|
1308
|
+
title,
|
|
1309
|
+
readOnlyHint: true,
|
|
1310
|
+
destructiveHint: false,
|
|
1311
|
+
idempotentHint: false,
|
|
1312
|
+
openWorldHint: true
|
|
1313
|
+
};
|
|
1314
|
+
}
|
|
1315
|
+
function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
1316
|
+
const savesReports = options.savesReportsLocally !== false;
|
|
1317
|
+
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
1318
|
+
const withReportNote = (description) => `${description}${reportNote}`;
|
|
1319
|
+
const server2 = new import_mcp.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
985
1320
|
server2.registerTool("harvest_paa", {
|
|
986
|
-
|
|
987
|
-
|
|
1321
|
+
title: "Google PAA + SERP Harvest",
|
|
1322
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-150 for "full", "deep", "all", or comprehensive research. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1323
|
+
inputSchema: HarvestPaaInputSchema,
|
|
1324
|
+
outputSchema: HarvestPaaOutputSchema,
|
|
1325
|
+
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
988
1326
|
}, async (input) => formatHarvestPaa(await executor2.harvestPaa(input), input));
|
|
989
1327
|
server2.registerTool("search_serp", {
|
|
990
|
-
|
|
991
|
-
|
|
1328
|
+
title: "Google SERP Lookup",
|
|
1329
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
1330
|
+
inputSchema: SearchSerpInputSchema,
|
|
1331
|
+
outputSchema: SearchSerpOutputSchema,
|
|
1332
|
+
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
992
1333
|
}, async (input) => formatSearchSerp(await executor2.searchSerp(input), input));
|
|
993
1334
|
server2.registerTool("extract_url", {
|
|
994
|
-
|
|
995
|
-
|
|
1335
|
+
title: "Single URL Extract",
|
|
1336
|
+
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
1337
|
+
inputSchema: ExtractUrlInputSchema,
|
|
1338
|
+
outputSchema: ExtractUrlOutputSchema,
|
|
1339
|
+
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
996
1340
|
}, async (input) => formatExtractUrl(await executor2.extractUrl(input), input));
|
|
997
1341
|
server2.registerTool("map_site_urls", {
|
|
998
|
-
|
|
999
|
-
|
|
1342
|
+
title: "Site URL Map",
|
|
1343
|
+
description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
|
|
1344
|
+
inputSchema: MapSiteUrlsInputSchema,
|
|
1345
|
+
outputSchema: MapSiteUrlsOutputSchema,
|
|
1346
|
+
annotations: liveWebToolAnnotations("Site URL Map")
|
|
1000
1347
|
}, async (input) => formatMapSiteUrls(await executor2.mapSiteUrls(input), input));
|
|
1001
1348
|
server2.registerTool("extract_site", {
|
|
1002
|
-
|
|
1003
|
-
|
|
1349
|
+
title: "Multi-Page Site Extract",
|
|
1350
|
+
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
1351
|
+
inputSchema: ExtractSiteInputSchema,
|
|
1352
|
+
outputSchema: ExtractSiteOutputSchema,
|
|
1353
|
+
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
1004
1354
|
}, async (input) => formatExtractSite(await executor2.extractSite(input), input));
|
|
1005
1355
|
server2.registerTool("youtube_harvest", {
|
|
1006
|
-
|
|
1007
|
-
|
|
1356
|
+
title: "YouTube Video Harvest",
|
|
1357
|
+
description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
|
|
1358
|
+
inputSchema: YoutubeHarvestInputSchema,
|
|
1359
|
+
outputSchema: YoutubeHarvestOutputSchema,
|
|
1360
|
+
annotations: liveWebToolAnnotations("YouTube Video Harvest")
|
|
1008
1361
|
}, async (input) => formatYoutubeHarvest(await executor2.youtubeHarvest(input), input));
|
|
1009
1362
|
server2.registerTool("youtube_transcribe", {
|
|
1010
|
-
|
|
1011
|
-
|
|
1363
|
+
title: "YouTube Transcription",
|
|
1364
|
+
description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
|
|
1365
|
+
inputSchema: YoutubeTranscribeInputSchema,
|
|
1366
|
+
annotations: liveWebToolAnnotations("YouTube Transcription")
|
|
1012
1367
|
}, async (input) => formatYoutubeTranscribe(await executor2.youtubeTranscribe(input), input));
|
|
1013
1368
|
server2.registerTool("facebook_page_intel", {
|
|
1014
|
-
|
|
1015
|
-
|
|
1369
|
+
title: "Facebook Advertiser Ad Intel",
|
|
1370
|
+
description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
|
|
1371
|
+
inputSchema: FacebookPageIntelInputSchema,
|
|
1372
|
+
outputSchema: FacebookPageIntelOutputSchema,
|
|
1373
|
+
annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
|
|
1016
1374
|
}, async (input) => formatFacebookPageIntel(await executor2.facebookPageIntel(input), input));
|
|
1017
1375
|
server2.registerTool("facebook_ad_search", {
|
|
1018
|
-
|
|
1019
|
-
|
|
1376
|
+
title: "Facebook Ad Library Search",
|
|
1377
|
+
description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
|
|
1378
|
+
inputSchema: FacebookAdSearchInputSchema,
|
|
1379
|
+
outputSchema: FacebookAdSearchOutputSchema,
|
|
1380
|
+
annotations: liveWebToolAnnotations("Facebook Ad Library Search")
|
|
1020
1381
|
}, async (input) => formatFacebookAdSearch(await executor2.facebookAdSearch(input), input));
|
|
1021
1382
|
server2.registerTool("facebook_ad_transcribe", {
|
|
1383
|
+
title: "Facebook Ad Transcription",
|
|
1022
1384
|
description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
|
|
1023
|
-
inputSchema: FacebookAdTranscribeInputSchema
|
|
1385
|
+
inputSchema: FacebookAdTranscribeInputSchema,
|
|
1386
|
+
annotations: liveWebToolAnnotations("Facebook Ad Transcription")
|
|
1024
1387
|
}, async (input) => formatFacebookAdTranscribe(await executor2.facebookAdTranscribe(input), input));
|
|
1025
1388
|
server2.registerTool("maps_place_intel", {
|
|
1026
|
-
|
|
1027
|
-
|
|
1389
|
+
title: "Google Maps Business Profile Details",
|
|
1390
|
+
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
1391
|
+
inputSchema: MapsPlaceIntelInputSchema,
|
|
1392
|
+
outputSchema: MapsPlaceIntelOutputSchema,
|
|
1393
|
+
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
1028
1394
|
}, async (input) => formatMapsPlaceIntel(await executor2.mapsPlaceIntel(input), input));
|
|
1395
|
+
server2.registerTool("maps_search", {
|
|
1396
|
+
title: "Google Maps Business Search",
|
|
1397
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1398
|
+
inputSchema: MapsSearchInputSchema,
|
|
1399
|
+
outputSchema: MapsSearchOutputSchema,
|
|
1400
|
+
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
1401
|
+
}, async (input) => formatMapsSearch(await executor2.mapsSearch(input), input));
|
|
1029
1402
|
server2.registerTool("credits_info", {
|
|
1403
|
+
title: "MCP Scraper Credits & Costs",
|
|
1030
1404
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
1031
|
-
inputSchema: CreditsInfoInputSchema
|
|
1405
|
+
inputSchema: CreditsInfoInputSchema,
|
|
1406
|
+
outputSchema: CreditsInfoOutputSchema,
|
|
1407
|
+
annotations: {
|
|
1408
|
+
title: "MCP Scraper Credits & Costs",
|
|
1409
|
+
readOnlyHint: true,
|
|
1410
|
+
destructiveHint: false,
|
|
1411
|
+
idempotentHint: true,
|
|
1412
|
+
openWorldHint: false
|
|
1413
|
+
}
|
|
1032
1414
|
}, async (input) => formatCreditsInfo(await executor2.creditsInfo(input), input));
|
|
1033
1415
|
return server2;
|
|
1034
1416
|
}
|