mcp-scraper 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -17
- package/dist/bin/api-server.cjs +6 -3
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +1 -1
- package/dist/bin/browser-agent-stdio-server.cjs +8 -5
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
- package/dist/bin/browser-agent-stdio-server.js +7 -356
- package/dist/bin/browser-agent-stdio-server.js.map +1 -1
- package/dist/bin/mcp-scraper-combined-stdio-server.cjs +1990 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.cjs.map +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.d.cts +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.d.ts +1 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.js +51 -0
- package/dist/bin/mcp-scraper-combined-stdio-server.js.map +1 -0
- package/dist/bin/mcp-stdio-server.cjs +7 -4
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -3
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/{chunk-OR7DLLH2.js → chunk-4SIZ4G63.js} +8 -4
- package/dist/chunk-4SIZ4G63.js.map +1 -0
- package/dist/chunk-E6IQRQ2T.js +360 -0
- package/dist/chunk-E6IQRQ2T.js.map +1 -0
- package/dist/chunk-RJ6I52AM.js +7 -0
- package/dist/chunk-RJ6I52AM.js.map +1 -0
- package/dist/{server-CJMX2QUM.js → server-SUSIMF72.js} +3 -3
- package/package.json +4 -2
- package/dist/chunk-OR7DLLH2.js.map +0 -1
- package/dist/chunk-XR65SANX.js +0 -7
- package/dist/chunk-XR65SANX.js.map +0 -1
- /package/dist/{server-CJMX2QUM.js.map → server-SUSIMF72.js.map} +0 -0
|
@@ -0,0 +1,1990 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
// bin/mcp-scraper-combined-stdio-server.ts
|
|
5
|
+
var import_node_fs4 = require("fs");
|
|
6
|
+
var import_node_os3 = require("os");
|
|
7
|
+
var import_node_path4 = require("path");
|
|
8
|
+
var import_mcp3 = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
9
|
+
var import_stdio = require("@modelcontextprotocol/sdk/server/stdio.js");
|
|
10
|
+
|
|
11
|
+
// src/harvest-timeout.ts
|
|
12
|
+
var VERCEL_FUNCTION_MAX_MS = 3e5;
|
|
13
|
+
var CLIENT_OVER_SERVER_MARGIN_MS = 15e3;
|
|
14
|
+
function harvestTimeoutBudget(maxQuestions, serpOnly = false) {
|
|
15
|
+
const requested = Number.isFinite(maxQuestions) && maxQuestions > 0 ? Math.trunc(maxQuestions) : 30;
|
|
16
|
+
let serverMs;
|
|
17
|
+
if (serpOnly || requested <= 50) serverMs = 11e4;
|
|
18
|
+
else if (requested <= 100) serverMs = 18e4;
|
|
19
|
+
else if (requested <= 150) serverMs = 24e4;
|
|
20
|
+
else serverMs = 28e4;
|
|
21
|
+
const clientMs = Math.min(serverMs + CLIENT_OVER_SERVER_MARGIN_MS, VERCEL_FUNCTION_MAX_MS - 5e3);
|
|
22
|
+
return { serverMs, clientMs };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// src/mcp/http-mcp-tool-executor.ts
|
|
26
|
+
var HttpMcpToolExecutor = class {
|
|
27
|
+
baseUrl;
|
|
28
|
+
apiKey;
|
|
29
|
+
timeoutMs;
|
|
30
|
+
httpTimeoutOverrideMs;
|
|
31
|
+
serpIntelligenceTimeoutMs;
|
|
32
|
+
constructor(baseUrl2, apiKey2) {
|
|
33
|
+
this.baseUrl = baseUrl2.replace(/\/$/, "");
|
|
34
|
+
this.apiKey = apiKey2;
|
|
35
|
+
const rawOverride = process.env.MCP_SCRAPER_HTTP_TIMEOUT_MS;
|
|
36
|
+
const parsedOverride = rawOverride === void 0 ? NaN : Number(rawOverride);
|
|
37
|
+
this.httpTimeoutOverrideMs = Number.isFinite(parsedOverride) && parsedOverride > 0 ? parsedOverride : null;
|
|
38
|
+
this.timeoutMs = this.httpTimeoutOverrideMs ?? 11e4;
|
|
39
|
+
const configuredSerpIntelligenceTimeoutMs = Number(process.env.MCP_SCRAPER_SERP_INTELLIGENCE_HTTP_TIMEOUT_MS ?? this.timeoutMs);
|
|
40
|
+
this.serpIntelligenceTimeoutMs = Number.isFinite(configuredSerpIntelligenceTimeoutMs) && configuredSerpIntelligenceTimeoutMs > 0 ? configuredSerpIntelligenceTimeoutMs : this.timeoutMs;
|
|
41
|
+
}
|
|
42
|
+
async call(path, body, timeoutMs = this.timeoutMs) {
|
|
43
|
+
try {
|
|
44
|
+
const res = await fetch(`${this.baseUrl}${path}`, {
|
|
45
|
+
method: "POST",
|
|
46
|
+
headers: {
|
|
47
|
+
"Content-Type": "application/json",
|
|
48
|
+
"x-api-key": this.apiKey
|
|
49
|
+
},
|
|
50
|
+
body: JSON.stringify(body),
|
|
51
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
52
|
+
});
|
|
53
|
+
const data = await res.json();
|
|
54
|
+
if (!res.ok) {
|
|
55
|
+
return { content: [{ type: "text", text: JSON.stringify(data) }], isError: true };
|
|
56
|
+
}
|
|
57
|
+
return { content: [{ type: "text", text: JSON.stringify(data) }] };
|
|
58
|
+
} catch (err) {
|
|
59
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
60
|
+
if (err instanceof DOMException && err.name === "TimeoutError") {
|
|
61
|
+
return {
|
|
62
|
+
content: [{
|
|
63
|
+
type: "text",
|
|
64
|
+
text: JSON.stringify({
|
|
65
|
+
error: "mcp_request_timeout",
|
|
66
|
+
error_type: "timeout",
|
|
67
|
+
retryable: true,
|
|
68
|
+
path,
|
|
69
|
+
timeoutMs,
|
|
70
|
+
message: `MCP Scraper request exceeded ${Math.round(timeoutMs / 1e3)}s and was cancelled. Retry with fewer results or use the async API for deep harvests.`
|
|
71
|
+
})
|
|
72
|
+
}],
|
|
73
|
+
isError: true
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
return { content: [{ type: "text", text: msg }], isError: true };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
harvestPaa(input) {
|
|
80
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(input.maxQuestions ?? 30).clientMs;
|
|
81
|
+
return this.call("/harvest/sync", input, timeoutMs);
|
|
82
|
+
}
|
|
83
|
+
searchSerp(input) {
|
|
84
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? harvestTimeoutBudget(0, true).clientMs;
|
|
85
|
+
return this.call("/harvest/sync", { ...input, serpOnly: true }, timeoutMs);
|
|
86
|
+
}
|
|
87
|
+
extractUrl(input) {
|
|
88
|
+
return this.call("/extract-url", input);
|
|
89
|
+
}
|
|
90
|
+
mapSiteUrls(input) {
|
|
91
|
+
return this.call("/map-urls", input);
|
|
92
|
+
}
|
|
93
|
+
extractSite(input) {
|
|
94
|
+
return this.call("/extract-site", input);
|
|
95
|
+
}
|
|
96
|
+
youtubeHarvest(input) {
|
|
97
|
+
return this.call("/youtube/harvest", input);
|
|
98
|
+
}
|
|
99
|
+
youtubeTranscribe(input) {
|
|
100
|
+
return this.call("/youtube/transcribe", input);
|
|
101
|
+
}
|
|
102
|
+
facebookPageIntel(input) {
|
|
103
|
+
return this.call("/facebook/page-intel", input);
|
|
104
|
+
}
|
|
105
|
+
facebookAdSearch(input) {
|
|
106
|
+
return this.call("/facebook/search", input);
|
|
107
|
+
}
|
|
108
|
+
facebookAdTranscribe(input) {
|
|
109
|
+
return this.call("/facebook/transcribe", input);
|
|
110
|
+
}
|
|
111
|
+
mapsPlaceIntel(input) {
|
|
112
|
+
return this.call("/maps/place", input);
|
|
113
|
+
}
|
|
114
|
+
mapsSearch(input) {
|
|
115
|
+
return this.call("/maps/search", input);
|
|
116
|
+
}
|
|
117
|
+
directoryWorkflow(input) {
|
|
118
|
+
const cityCount = typeof input.maxCities === "number" ? input.maxCities : 25;
|
|
119
|
+
const concurrency = typeof input.concurrency === "number" && input.concurrency > 0 ? input.concurrency : 5;
|
|
120
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? Math.min(9e5, Math.max(18e4, Math.ceil(cityCount / concurrency) * 12e4));
|
|
121
|
+
return this.call("/directory/run", input, timeoutMs);
|
|
122
|
+
}
|
|
123
|
+
creditsInfo(input) {
|
|
124
|
+
return this.call("/billing/credits", input);
|
|
125
|
+
}
|
|
126
|
+
captureSerpSnapshot(input) {
|
|
127
|
+
return this.call("/serp-intelligence/capture", input, this.serpIntelligenceTimeoutMs);
|
|
128
|
+
}
|
|
129
|
+
captureSerpPageSnapshots(input) {
|
|
130
|
+
return this.call("/serp-intelligence/page-snapshots", input, this.serpIntelligenceTimeoutMs);
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
// src/mcp/browser-agent-mcp-server.ts
|
|
135
|
+
var import_mcp = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
136
|
+
var import_node_fs = require("fs");
|
|
137
|
+
var import_node_os = require("os");
|
|
138
|
+
var import_node_path = require("path");
|
|
139
|
+
|
|
140
|
+
// src/version.ts
|
|
141
|
+
var PACKAGE_VERSION = "0.2.3";
|
|
142
|
+
|
|
143
|
+
// src/mcp/browser-agent-tool-schemas.ts
|
|
144
|
+
var import_zod = require("zod");
|
|
145
|
+
var BrowserOpenInputSchema = {
|
|
146
|
+
label: import_zod.z.string().optional().describe("Optional human label for this session, shown in the watch console."),
|
|
147
|
+
url: import_zod.z.string().url().optional().describe("Optional URL to navigate to immediately after opening."),
|
|
148
|
+
profile: import_zod.z.string().optional().describe("Optional saved profile name to load a logged-in session for a site."),
|
|
149
|
+
timeout_seconds: import_zod.z.number().int().min(60).max(259200).optional().describe("How long the session may live before auto-termination. Defaults to 600. The browser idles into a zero-cost standby between actions, so a longer timeout is cheap.")
|
|
150
|
+
};
|
|
151
|
+
var BrowserSessionInputSchema = {
|
|
152
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open.")
|
|
153
|
+
};
|
|
154
|
+
var BrowserGotoInputSchema = {
|
|
155
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
156
|
+
url: import_zod.z.string().url().describe("URL to navigate the browser to.")
|
|
157
|
+
};
|
|
158
|
+
var BrowserClickInputSchema = {
|
|
159
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
160
|
+
x: import_zod.z.number().describe("X coordinate to click, in screenshot pixels. Use the x of an element from the latest screenshot."),
|
|
161
|
+
y: import_zod.z.number().describe("Y coordinate to click, in screenshot pixels."),
|
|
162
|
+
button: import_zod.z.enum(["left", "right", "middle"]).default("left").describe("Mouse button."),
|
|
163
|
+
num_clicks: import_zod.z.number().int().min(1).max(3).optional().describe("Number of clicks, e.g. 2 for double-click.")
|
|
164
|
+
};
|
|
165
|
+
var BrowserTypeInputSchema = {
|
|
166
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
167
|
+
text: import_zod.z.string().describe("Text to type at the current focus. Click a field first to focus it."),
|
|
168
|
+
delay: import_zod.z.number().int().min(0).max(500).optional().describe("Optional per-keystroke delay in ms for human-like typing.")
|
|
169
|
+
};
|
|
170
|
+
var BrowserScrollInputSchema = {
|
|
171
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
172
|
+
delta_y: import_zod.z.number().default(5).describe("Vertical scroll in wheel units. Positive scrolls down, negative up."),
|
|
173
|
+
delta_x: import_zod.z.number().default(0).describe("Horizontal scroll in wheel units."),
|
|
174
|
+
x: import_zod.z.number().optional().describe("X position to scroll at. Defaults to screen center."),
|
|
175
|
+
y: import_zod.z.number().optional().describe("Y position to scroll at. Defaults to screen center.")
|
|
176
|
+
};
|
|
177
|
+
var BrowserPressInputSchema = {
|
|
178
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
179
|
+
keys: import_zod.z.array(import_zod.z.string()).min(1).describe('Keys or combinations to press, e.g. ["Return"], ["Ctrl+a"], ["Ctrl+Shift+Tab"].')
|
|
180
|
+
};
|
|
181
|
+
var BrowserReplayStopInputSchema = {
|
|
182
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
183
|
+
replay_id: import_zod.z.string().describe("The replay id returned by browser_replay_start.")
|
|
184
|
+
};
|
|
185
|
+
var BrowserReplayDownloadInputSchema = {
|
|
186
|
+
session_id: import_zod.z.string().describe("The session id returned by browser_open."),
|
|
187
|
+
replay_id: import_zod.z.string().describe("The replay id returned by browser_replay_start or browser_list_replays."),
|
|
188
|
+
filename: import_zod.z.string().optional().describe("Optional local MP4 filename. Defaults to a timestamped replay filename.")
|
|
189
|
+
};
|
|
190
|
+
var BrowserListInputSchema = {
|
|
191
|
+
include_closed: import_zod.z.boolean().default(false).describe("Include closed sessions in the list.")
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
// src/mcp/browser-agent-mcp-server.ts
|
|
195
|
+
function textResult(value, isError = false) {
|
|
196
|
+
return { content: [{ type: "text", text: JSON.stringify(value) }], isError };
|
|
197
|
+
}
|
|
198
|
+
function outputBaseDir() {
|
|
199
|
+
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path.join)((0, import_node_os.homedir)(), "Downloads", "mcp-scraper");
|
|
200
|
+
}
|
|
201
|
+
function safeFilePart(value) {
|
|
202
|
+
return value.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 120) || "replay";
|
|
203
|
+
}
|
|
204
|
+
function replayFilePath(sessionId, replayId, filename) {
|
|
205
|
+
const requested = filename?.trim();
|
|
206
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
207
|
+
const name = requested ? safeFilePart(requested).replace(/\.mp4$/i, "") : `${stamp}-${safeFilePart(sessionId)}-${safeFilePart(replayId)}`;
|
|
208
|
+
return (0, import_node_path.join)(outputBaseDir(), "browser-replays", `${name}.mp4`);
|
|
209
|
+
}
|
|
210
|
+
function registerBrowserAgentMcpTools(server2, opts) {
|
|
211
|
+
const baseUrl2 = opts.baseUrl.replace(/\/$/, "");
|
|
212
|
+
const consoleBase = (opts.consoleBaseUrl ?? opts.baseUrl).replace(/\/$/, "");
|
|
213
|
+
const timeoutMs = opts.timeoutMs ?? 9e4;
|
|
214
|
+
async function req(method, path, body) {
|
|
215
|
+
try {
|
|
216
|
+
const res = await fetch(`${baseUrl2}${path}`, {
|
|
217
|
+
method,
|
|
218
|
+
headers: { "Content-Type": "application/json", "x-api-key": opts.apiKey },
|
|
219
|
+
body: body ? JSON.stringify(body) : void 0,
|
|
220
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
221
|
+
});
|
|
222
|
+
const data = await res.json().catch(() => ({}));
|
|
223
|
+
return { ok: res.ok, data };
|
|
224
|
+
} catch (err) {
|
|
225
|
+
return { ok: false, data: { error: err instanceof Error ? err.message : String(err) } };
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
async function downloadReplay(sessionId, replayId, filename) {
|
|
229
|
+
const path = `/agent/sessions/${encodeURIComponent(sessionId)}/replays/${encodeURIComponent(replayId)}/download`;
|
|
230
|
+
try {
|
|
231
|
+
const res = await fetch(`${baseUrl2}${path}`, {
|
|
232
|
+
method: "GET",
|
|
233
|
+
headers: { "x-api-key": opts.apiKey },
|
|
234
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
235
|
+
});
|
|
236
|
+
if (!res.ok) {
|
|
237
|
+
const data = await res.json().catch(async () => ({ error: await res.text().catch(() => `HTTP ${res.status}`) }));
|
|
238
|
+
return { ok: false, data };
|
|
239
|
+
}
|
|
240
|
+
const bytes = Buffer.from(await res.arrayBuffer());
|
|
241
|
+
const filePath = replayFilePath(sessionId, replayId, filename);
|
|
242
|
+
(0, import_node_fs.mkdirSync)((0, import_node_path.join)(outputBaseDir(), "browser-replays"), { recursive: true });
|
|
243
|
+
(0, import_node_fs.writeFileSync)(filePath, bytes);
|
|
244
|
+
return {
|
|
245
|
+
ok: true,
|
|
246
|
+
data: {
|
|
247
|
+
replay_id: replayId,
|
|
248
|
+
file_path: filePath,
|
|
249
|
+
bytes: bytes.length,
|
|
250
|
+
mime_type: res.headers.get("content-type") ?? "video/mp4",
|
|
251
|
+
download_url: `${baseUrl2}${path}`
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
} catch (err) {
|
|
255
|
+
return { ok: false, data: { error: err instanceof Error ? err.message : String(err) } };
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
const annotations = (title, readOnly = false) => ({
|
|
259
|
+
title,
|
|
260
|
+
readOnlyHint: readOnly,
|
|
261
|
+
destructiveHint: false,
|
|
262
|
+
idempotentHint: false,
|
|
263
|
+
openWorldHint: true
|
|
264
|
+
});
|
|
265
|
+
server2.registerTool(
|
|
266
|
+
"browser_open",
|
|
267
|
+
{
|
|
268
|
+
title: "Open Browser Session",
|
|
269
|
+
description: "Open a fresh cloud browser you can drive. Returns a session_id used by all other browser_* tools, and a watch_url where a human can watch live or take over. Anti-bot stealth and automatic CAPTCHA/Cloudflare solving are on by default: if a Cloudflare or CAPTCHA challenge appears, do NOT click it \u2014 wait a few seconds and call browser_screenshot again; it is solved automatically. Billing: metered per second of active browser work at ~4 credits per minute; idle and standby time are free. Call browser_close when done to stop the meter. After opening, call browser_screenshot to see the page.",
|
|
270
|
+
inputSchema: BrowserOpenInputSchema,
|
|
271
|
+
annotations: annotations("Open Browser Session")
|
|
272
|
+
},
|
|
273
|
+
async (input) => {
|
|
274
|
+
const open = await req("POST", "/agent/sessions", {
|
|
275
|
+
label: input.label,
|
|
276
|
+
profile: input.profile,
|
|
277
|
+
timeout_seconds: input.timeout_seconds
|
|
278
|
+
});
|
|
279
|
+
if (!open.ok) return textResult(open.data, true);
|
|
280
|
+
const session = open.data;
|
|
281
|
+
if (input.url) {
|
|
282
|
+
await req("POST", `/agent/sessions/${session.session_id}/goto`, { url: input.url });
|
|
283
|
+
}
|
|
284
|
+
return textResult({
|
|
285
|
+
session_id: session.session_id,
|
|
286
|
+
watch_url: `${consoleBase}/console/${session.session_id}`,
|
|
287
|
+
live_view_url: session.live_view_url ?? null,
|
|
288
|
+
hint: "Call browser_screenshot to see the page. Click by the x,y of an element from the snapshot."
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
);
|
|
292
|
+
server2.registerTool(
|
|
293
|
+
"browser_screenshot",
|
|
294
|
+
{
|
|
295
|
+
title: "See Page (Screenshot + Elements)",
|
|
296
|
+
description: "Capture what the browser currently shows. Returns a screenshot image PLUS a text snapshot listing interactive elements with their center x,y coordinates, the page url and title, and visible text. This is your primary way to perceive the page. Click elements by their listed x,y. If a Cloudflare/CAPTCHA challenge is visible, wait and screenshot again rather than clicking it.",
|
|
297
|
+
inputSchema: BrowserSessionInputSchema,
|
|
298
|
+
annotations: annotations("See Page", true)
|
|
299
|
+
},
|
|
300
|
+
async (input) => {
|
|
301
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/screenshot`);
|
|
302
|
+
if (!res.ok) return textResult(res.data, true);
|
|
303
|
+
const { image_base64, mime_type, url, title, elements, text } = res.data;
|
|
304
|
+
const content = [];
|
|
305
|
+
if (image_base64) content.push({ type: "image", data: image_base64, mimeType: mime_type ?? "image/png" });
|
|
306
|
+
content.push({
|
|
307
|
+
type: "text",
|
|
308
|
+
text: JSON.stringify({ url, title, elements, text })
|
|
309
|
+
});
|
|
310
|
+
return { content };
|
|
311
|
+
}
|
|
312
|
+
);
|
|
313
|
+
server2.registerTool(
|
|
314
|
+
"browser_read",
|
|
315
|
+
{
|
|
316
|
+
title: "Read Page Text + Elements",
|
|
317
|
+
description: "Return the page url, title, visible text, and the list of interactive elements (with x,y) without an image. Cheaper than browser_screenshot when you only need to read content or find a target element to click.",
|
|
318
|
+
inputSchema: BrowserSessionInputSchema,
|
|
319
|
+
annotations: annotations("Read Page", true)
|
|
320
|
+
},
|
|
321
|
+
async (input) => {
|
|
322
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/read`);
|
|
323
|
+
return textResult(res.data, !res.ok);
|
|
324
|
+
}
|
|
325
|
+
);
|
|
326
|
+
server2.registerTool(
|
|
327
|
+
"browser_goto",
|
|
328
|
+
{
|
|
329
|
+
title: "Navigate To URL",
|
|
330
|
+
description: "Navigate the browser to a URL. Follow with browser_screenshot to see the result.",
|
|
331
|
+
inputSchema: BrowserGotoInputSchema,
|
|
332
|
+
annotations: annotations("Navigate To URL")
|
|
333
|
+
},
|
|
334
|
+
async (input) => {
|
|
335
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/goto`, { url: input.url });
|
|
336
|
+
return textResult(res.data, !res.ok);
|
|
337
|
+
}
|
|
338
|
+
);
|
|
339
|
+
server2.registerTool(
|
|
340
|
+
"browser_click",
|
|
341
|
+
{
|
|
342
|
+
title: "Click",
|
|
343
|
+
description: "Click at x,y (screenshot pixel coordinates). Use the x,y of a target element from the latest browser_screenshot or browser_read.",
|
|
344
|
+
inputSchema: BrowserClickInputSchema,
|
|
345
|
+
annotations: annotations("Click")
|
|
346
|
+
},
|
|
347
|
+
async (input) => {
|
|
348
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/click`, {
|
|
349
|
+
x: input.x,
|
|
350
|
+
y: input.y,
|
|
351
|
+
button: input.button,
|
|
352
|
+
num_clicks: input.num_clicks
|
|
353
|
+
});
|
|
354
|
+
return textResult(res.data, !res.ok);
|
|
355
|
+
}
|
|
356
|
+
);
|
|
357
|
+
server2.registerTool(
|
|
358
|
+
"browser_type",
|
|
359
|
+
{
|
|
360
|
+
title: "Type Text",
|
|
361
|
+
description: 'Type text at the current focus. Click an input field first to focus it. Use browser_press with ["Return"] to submit.',
|
|
362
|
+
inputSchema: BrowserTypeInputSchema,
|
|
363
|
+
annotations: annotations("Type Text")
|
|
364
|
+
},
|
|
365
|
+
async (input) => {
|
|
366
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/type`, { text: input.text, delay: input.delay });
|
|
367
|
+
return textResult(res.data, !res.ok);
|
|
368
|
+
}
|
|
369
|
+
);
|
|
370
|
+
server2.registerTool(
|
|
371
|
+
"browser_scroll",
|
|
372
|
+
{
|
|
373
|
+
title: "Scroll",
|
|
374
|
+
description: "Scroll the page. Positive delta_y scrolls down. Follow with browser_screenshot to see newly revealed content.",
|
|
375
|
+
inputSchema: BrowserScrollInputSchema,
|
|
376
|
+
annotations: annotations("Scroll")
|
|
377
|
+
},
|
|
378
|
+
async (input) => {
|
|
379
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/scroll`, {
|
|
380
|
+
delta_y: input.delta_y,
|
|
381
|
+
delta_x: input.delta_x,
|
|
382
|
+
x: input.x,
|
|
383
|
+
y: input.y
|
|
384
|
+
});
|
|
385
|
+
return textResult(res.data, !res.ok);
|
|
386
|
+
}
|
|
387
|
+
);
|
|
388
|
+
server2.registerTool(
|
|
389
|
+
"browser_press",
|
|
390
|
+
{
|
|
391
|
+
title: "Press Keys",
|
|
392
|
+
description: 'Press keys or combinations, e.g. ["Return"] to submit, ["Ctrl+a"] to select all, ["Ctrl+Shift+Tab"] to switch tabs.',
|
|
393
|
+
inputSchema: BrowserPressInputSchema,
|
|
394
|
+
annotations: annotations("Press Keys")
|
|
395
|
+
},
|
|
396
|
+
async (input) => {
|
|
397
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/press`, { keys: input.keys });
|
|
398
|
+
return textResult(res.data, !res.ok);
|
|
399
|
+
}
|
|
400
|
+
);
|
|
401
|
+
server2.registerTool(
|
|
402
|
+
"browser_replay_start",
|
|
403
|
+
{
|
|
404
|
+
title: "Start Recording",
|
|
405
|
+
description: "Start recording an MP4 replay of the session. Returns replay_id, view_url when available, and a download_url. Use to capture a task for later review; stop with browser_replay_stop.",
|
|
406
|
+
inputSchema: BrowserSessionInputSchema,
|
|
407
|
+
annotations: annotations("Start Recording")
|
|
408
|
+
},
|
|
409
|
+
async (input) => {
|
|
410
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/replay/start`);
|
|
411
|
+
return textResult(res.data, !res.ok);
|
|
412
|
+
}
|
|
413
|
+
);
|
|
414
|
+
server2.registerTool(
|
|
415
|
+
"browser_replay_stop",
|
|
416
|
+
{
|
|
417
|
+
title: "Stop Recording",
|
|
418
|
+
description: "Stop a replay recording and expose the final view_url and download_url. Use browser_replay_download to save the MP4 locally.",
|
|
419
|
+
inputSchema: BrowserReplayStopInputSchema,
|
|
420
|
+
annotations: annotations("Stop Recording")
|
|
421
|
+
},
|
|
422
|
+
async (input) => {
|
|
423
|
+
const res = await req("POST", `/agent/sessions/${input.session_id}/replay/stop`, { replay_id: input.replay_id });
|
|
424
|
+
return textResult(res.data, !res.ok);
|
|
425
|
+
}
|
|
426
|
+
);
|
|
427
|
+
server2.registerTool(
|
|
428
|
+
"browser_list_replays",
|
|
429
|
+
{
|
|
430
|
+
title: "List Replay Videos",
|
|
431
|
+
description: "List replay recordings for a browser session, including final view_url and authenticated download_url values when available.",
|
|
432
|
+
inputSchema: BrowserSessionInputSchema,
|
|
433
|
+
annotations: annotations("List Replay Videos", true)
|
|
434
|
+
},
|
|
435
|
+
async (input) => {
|
|
436
|
+
const res = await req("GET", `/agent/sessions/${input.session_id}/replays`);
|
|
437
|
+
return textResult(res.data, !res.ok);
|
|
438
|
+
}
|
|
439
|
+
);
|
|
440
|
+
server2.registerTool(
|
|
441
|
+
"browser_replay_download",
|
|
442
|
+
{
|
|
443
|
+
title: "Download Replay MP4",
|
|
444
|
+
description: "Download a replay recording through MCP Scraper and save the MP4 locally under MCP_SCRAPER_OUTPUT_DIR/browser-replays. Use after browser_replay_stop or browser_list_replays.",
|
|
445
|
+
inputSchema: BrowserReplayDownloadInputSchema,
|
|
446
|
+
annotations: annotations("Download Replay MP4", true)
|
|
447
|
+
},
|
|
448
|
+
async (input) => {
|
|
449
|
+
const res = await downloadReplay(input.session_id, input.replay_id, input.filename);
|
|
450
|
+
return textResult(res.data, !res.ok);
|
|
451
|
+
}
|
|
452
|
+
);
|
|
453
|
+
server2.registerTool(
|
|
454
|
+
"browser_close",
|
|
455
|
+
{
|
|
456
|
+
title: "Close Browser Session",
|
|
457
|
+
description: "Close and release the browser session when the task is done.",
|
|
458
|
+
inputSchema: BrowserSessionInputSchema,
|
|
459
|
+
annotations: { title: "Close Browser Session", readOnlyHint: false, destructiveHint: true, idempotentHint: true, openWorldHint: false }
|
|
460
|
+
},
|
|
461
|
+
async (input) => {
|
|
462
|
+
const res = await req("DELETE", `/agent/sessions/${input.session_id}`);
|
|
463
|
+
return textResult(res.data, !res.ok);
|
|
464
|
+
}
|
|
465
|
+
);
|
|
466
|
+
server2.registerTool(
|
|
467
|
+
"browser_list_sessions",
|
|
468
|
+
{
|
|
469
|
+
title: "List Browser Sessions",
|
|
470
|
+
description: "List your browser sessions and their status, with a watch_url for each.",
|
|
471
|
+
inputSchema: BrowserListInputSchema,
|
|
472
|
+
annotations: annotations("List Browser Sessions", true)
|
|
473
|
+
},
|
|
474
|
+
async (input) => {
|
|
475
|
+
const res = await req("GET", `/agent/sessions${input.include_closed ? "?all=1" : ""}`);
|
|
476
|
+
if (!res.ok) return textResult(res.data, true);
|
|
477
|
+
const sessions = (res.data.sessions ?? []).map((s) => ({ ...s, watch_url: `${consoleBase}/console/${s.session_id}` }));
|
|
478
|
+
return textResult({ sessions });
|
|
479
|
+
}
|
|
480
|
+
);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// src/mcp/paa-mcp-server.ts
|
|
484
|
+
var import_mcp2 = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
485
|
+
var import_node_fs3 = require("fs");
|
|
486
|
+
var import_node_path3 = require("path");
|
|
487
|
+
|
|
488
|
+
// src/mcp/mcp-response-formatter.ts
|
|
489
|
+
var import_node_fs2 = require("fs");
|
|
490
|
+
var import_node_os2 = require("os");
|
|
491
|
+
var import_node_path2 = require("path");
|
|
492
|
+
|
|
493
|
+
// src/errors.ts
|
|
494
|
+
function sanitizeVendorName(message) {
|
|
495
|
+
return message.replace(/kernel\.sh\s+sessions?/gi, "sessions").replace(/kernel\.sh\s+session/gi, "this session").replace(/kernel\.sh/gi, "the service").replace(/kernel\s+sessions?/gi, "sessions").replace(/kernel\s+session/gi, "this session").replace(/\bkernel\b/gi, "the service").replace(/ +/g, " ").trim();
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// src/mcp/mcp-response-formatter.ts
|
|
499
|
+
var reportSavingEnabled = true;
|
|
500
|
+
function sanitizeVendorText(text) {
|
|
501
|
+
return sanitizeVendorName(
|
|
502
|
+
text.replace(/kernel_session_id/gi, "browser_session_id").replace(/kernel_delete_succeeded/gi, "session_cleanup_succeeded").replace(/kernel_delete_started/gi, "session_cleanup_started").replace(/kernel_delete_error/gi, "session_cleanup_error").replace(/kernelSessionId/g, "browserSessionId").replace(/kernelProxyId/g, "proxyId").replace(/KERNEL_API_KEY/g, "BROWSER_SERVICE_API_KEY").replace(/"kernel"\s*:/gi, '"browserRuntime":')
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
function slugifyReportName(input) {
|
|
506
|
+
return input.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "mcp-scraper-report";
|
|
507
|
+
}
|
|
508
|
+
function reportTitle(full) {
|
|
509
|
+
const title = full.split("\n").find((line) => line.startsWith("# "));
|
|
510
|
+
return title?.replace(/^#\s+/, "").trim() || "MCP Scraper Report";
|
|
511
|
+
}
|
|
512
|
+
function outputBaseDir2() {
|
|
513
|
+
return process.env.MCP_SCRAPER_OUTPUT_DIR?.trim() || (0, import_node_path2.join)((0, import_node_os2.homedir)(), "Downloads", "mcp-scraper");
|
|
514
|
+
}
|
|
515
|
+
function saveFullReport(full) {
|
|
516
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
517
|
+
const outDir = outputBaseDir2();
|
|
518
|
+
try {
|
|
519
|
+
(0, import_node_fs2.mkdirSync)(outDir, { recursive: true });
|
|
520
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
521
|
+
const file = (0, import_node_path2.join)(outDir, `${stamp}-${slugifyReportName(reportTitle(full))}.md`);
|
|
522
|
+
(0, import_node_fs2.writeFileSync)(file, full, "utf8");
|
|
523
|
+
return file;
|
|
524
|
+
} catch {
|
|
525
|
+
return null;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
function persistScreenshotLocally(base64, url) {
|
|
529
|
+
if (!reportSavingEnabled || process.env.MCP_SCRAPER_SAVE_REPORTS === "false") return null;
|
|
530
|
+
try {
|
|
531
|
+
const dir = (0, import_node_path2.join)(outputBaseDir2(), "screenshots");
|
|
532
|
+
(0, import_node_fs2.mkdirSync)(dir, { recursive: true });
|
|
533
|
+
const stamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
534
|
+
const slug = url.replace(/^https?:\/\//, "").replace(/[^a-z0-9]+/gi, "-").replace(/^-+|-+$/g, "").slice(0, 60);
|
|
535
|
+
const filePath = (0, import_node_path2.join)(dir, `${stamp}-${slug}.png`);
|
|
536
|
+
(0, import_node_fs2.writeFileSync)(filePath, Buffer.from(base64, "base64"));
|
|
537
|
+
return filePath;
|
|
538
|
+
} catch {
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
function oneBlock(content) {
|
|
543
|
+
const filePath = saveFullReport(content);
|
|
544
|
+
const text = filePath ? `${content}
|
|
545
|
+
|
|
546
|
+
\u{1F4C4} Saved: \`${filePath}\`` : content;
|
|
547
|
+
return { content: [{ type: "text", text }] };
|
|
548
|
+
}
|
|
549
|
+
function formatStructuredError(body, fallback) {
|
|
550
|
+
if (body.error === "insufficient_balance") {
|
|
551
|
+
return `Insufficient credits. Balance: ${body.balance_credits} credits. This call requires ${body.required_credits} credits. Top up at ${body.topup_url}`;
|
|
552
|
+
}
|
|
553
|
+
if (body.error === "mcp_request_timeout") {
|
|
554
|
+
return typeof body.message === "string" ? body.message : "MCP Scraper request timed out and was cancelled.";
|
|
555
|
+
}
|
|
556
|
+
if (typeof body.error_code === "string") {
|
|
557
|
+
const message = typeof body.error === "string" ? body.error : typeof body.message === "string" ? body.message : fallback;
|
|
558
|
+
const retryable = body.retryable === true ? " Retryable: yes." : "";
|
|
559
|
+
return `${body.error_code}: ${message}${retryable}${errorAttemptsSection(body)}`;
|
|
560
|
+
}
|
|
561
|
+
if (typeof body.error === "string") return body.error;
|
|
562
|
+
return fallback || "Tool error";
|
|
563
|
+
}
|
|
564
|
+
function parseData(raw) {
|
|
565
|
+
const first = raw.content.find((b) => b.type === "text");
|
|
566
|
+
const text = first?.type === "text" ? first.text : "";
|
|
567
|
+
try {
|
|
568
|
+
const parsed = JSON.parse(text || "{}");
|
|
569
|
+
if (raw.isError || parsed.error || parsed.error_code) return { error: sanitizeVendorText(formatStructuredError(parsed, text)) };
|
|
570
|
+
const data = parsed.result ?? parsed;
|
|
571
|
+
return { data };
|
|
572
|
+
} catch {
|
|
573
|
+
if (raw.isError) return { error: sanitizeVendorText(text || "Tool error") };
|
|
574
|
+
return { error: "Failed to parse tool response" };
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
function entityIdsSection(ids) {
|
|
578
|
+
if (!ids) return "";
|
|
579
|
+
const lines = [];
|
|
580
|
+
if (ids.kgIds?.length) lines.push(`- **Knowledge Graph MID:** ${ids.kgIds.join(", ")}`);
|
|
581
|
+
if (ids.cids?.length) lines.push(`- **CID:** ${ids.cids.join(", ")}`);
|
|
582
|
+
if (ids.gcids?.length) lines.push(`- **GCID:** ${ids.gcids.join(", ")}`);
|
|
583
|
+
return lines.length ? `
|
|
584
|
+
## Entity IDs
|
|
585
|
+
${lines.join("\n")}` : "";
|
|
586
|
+
}
|
|
587
|
+
function truncate(s, max) {
|
|
588
|
+
if (!s) return "";
|
|
589
|
+
return s.length > max ? s.slice(0, max) + "\u2026" : s;
|
|
590
|
+
}
|
|
591
|
+
function cell(s) {
|
|
592
|
+
return String(s ?? "").replace(/\r?\n+/g, " ").replace(/\|/g, "\\|").replace(/\s+/g, " ").trim();
|
|
593
|
+
}
|
|
594
|
+
function debugSection(debug) {
|
|
595
|
+
if (!debug || typeof debug !== "object") return "";
|
|
596
|
+
const request = debug.request ?? {};
|
|
597
|
+
const browser = debug.browser ?? {};
|
|
598
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
599
|
+
const network = browser.networkLocation ?? {};
|
|
600
|
+
const nav = browser.serpNavigation ?? {};
|
|
601
|
+
const proxyResolution = kernel.proxyResolution ?? {};
|
|
602
|
+
const locationEvidence = debug.locationEvidence;
|
|
603
|
+
const candidates = Array.isArray(locationEvidence?.candidates) ? locationEvidence.candidates.slice(0, 4).map((c) => `${c.city}, ${c.regionCode} (${c.count})`).join(", ") : "";
|
|
604
|
+
const lines = [
|
|
605
|
+
"\n## Debug",
|
|
606
|
+
`- Proxy mode: ${request.proxyMode ?? kernel.proxyMode ?? "unknown"} \xB7 requested proxy: ${kernel.requestedProxyIdPresent === true ? `yes (${kernel.requestedProxyIdSuffix ?? "redacted"})` : "no"}`,
|
|
607
|
+
`- Proxy resolution: ${proxyResolution.source ?? "unknown"}${proxyResolution.target ? ` \xB7 ${proxyResolution.target.level ?? "city"} ${proxyResolution.target.city}, ${proxyResolution.target.state}` : ""}${proxyResolution.error ? ` \xB7 ${truncate(proxyResolution.error, 180)}` : ""}`,
|
|
608
|
+
`- Browser session: ${kernel.sessionId ?? "unknown"} \xB7 retrieved proxy: ${kernel.retrievedProxyIdPresent === true ? `yes (${kernel.retrievedProxyIdSuffix ?? "redacted"})` : kernel.retrievedProxyIdPresent === false ? "no" : "unknown"}`,
|
|
609
|
+
`- Browser IP geo: ${[network.ip, network.city, network.region, network.country].filter(Boolean).join(" \xB7 ") || network.error || "unknown"}`,
|
|
610
|
+
`- Google URL: ${truncate(nav.requestedUrl, 240) || "unknown"}`,
|
|
611
|
+
`- Final URL: ${truncate(nav.finalUrl, 240) || "unknown"} \xB7 CAPTCHA: ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 redirected: ${nav.redirected === true ? "yes" : nav.redirected === false ? "no" : "unknown"}`
|
|
612
|
+
];
|
|
613
|
+
if (locationEvidence) {
|
|
614
|
+
lines.push(`- Location evidence: ${locationEvidence.status}${locationEvidence.expected ? ` \xB7 expected ${locationEvidence.expected.city}${locationEvidence.expected.regionCode ? `, ${locationEvidence.expected.regionCode}` : ""}` : ""}${candidates ? ` \xB7 candidates ${candidates}` : ""}`);
|
|
615
|
+
}
|
|
616
|
+
return sanitizeVendorText(lines.join("\n"));
|
|
617
|
+
}
|
|
618
|
+
function errorAttemptsSection(body) {
|
|
619
|
+
const attempts = Array.isArray(body.attempts) ? body.attempts : [];
|
|
620
|
+
if (attempts.length === 0) return "";
|
|
621
|
+
const lines = attempts.slice(0, 5).map((attempt) => {
|
|
622
|
+
const debug = attempt.debug ?? {};
|
|
623
|
+
const browser = debug.browser ?? {};
|
|
624
|
+
const kernel = browser.browserRuntime ?? browser.kernel ?? {};
|
|
625
|
+
const proxyResolution = kernel.proxyResolution ?? {};
|
|
626
|
+
const network = browser.networkLocation ?? {};
|
|
627
|
+
const nav = browser.serpNavigation ?? {};
|
|
628
|
+
const geo = [network.ip, network.city, network.region].filter(Boolean).join(" / ") || "geo unknown";
|
|
629
|
+
const sessionId = attempt.browser_session_id ?? attempt.kernel_session_id ?? kernel.sessionId ?? "unknown";
|
|
630
|
+
const cleanupSucceeded = attempt.session_cleanup_succeeded ?? attempt.kernel_delete_succeeded;
|
|
631
|
+
return `- Attempt ${attempt.attempt_number ?? "?"}: ${attempt.outcome ?? attempt.status ?? "unknown"} \xB7 session ${sessionId} \xB7 proxy ${debug.request?.proxyMode ?? kernel.proxyMode ?? "unknown"}${proxyResolution.source ? `/${proxyResolution.source}` : ""} \xB7 ${geo} \xB7 CAPTCHA ${nav.captchaDetected === true ? "yes" : nav.captchaDetected === false ? "no" : "unknown"} \xB7 cleanup ${cleanupSucceeded === true ? "yes" : cleanupSucceeded === false ? "no" : "unknown"}`;
|
|
632
|
+
});
|
|
633
|
+
return `
|
|
634
|
+
|
|
635
|
+
Attempts:
|
|
636
|
+
${lines.join("\n")}`;
|
|
637
|
+
}
|
|
638
|
+
function formatHarvestPaa(raw, input) {
|
|
639
|
+
const parsed = parseData(raw);
|
|
640
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
641
|
+
const d = parsed.data;
|
|
642
|
+
const flat = d.flat ?? [];
|
|
643
|
+
const organic = d.organicResults ?? [];
|
|
644
|
+
const entityIds = d.entityIds;
|
|
645
|
+
const aiOvw = d.aiOverview;
|
|
646
|
+
const diagnostics = d.diagnostics;
|
|
647
|
+
const durationMs = d.stats?.durationMs;
|
|
648
|
+
const paaRows = flat.map(
|
|
649
|
+
(r, i) => `| ${i + 1} | ${cell(r.question)} | ${cell(truncate(r.answer, 120))} | ${cell(r.source_title || r.source_site || "")} |`
|
|
650
|
+
).join("\n");
|
|
651
|
+
const paaTable = flat.length ? `## People Also Ask (${flat.length} questions)
|
|
652
|
+
| # | Question | Answer | Source |
|
|
653
|
+
|---|----------|--------|--------|
|
|
654
|
+
${paaRows}` : "## People Also Ask\n*Google did not return a People Also Ask block for this query/location. SERP data was extracted successfully when available.*";
|
|
655
|
+
const serpRows = organic.map(
|
|
656
|
+
(r) => `| ${r.position} | ${cell(r.title)} | [${cell(r.domain)}](${r.url}) | ${cell(truncate(r.snippet, 100))} |`
|
|
657
|
+
).join("\n");
|
|
658
|
+
const serpTable = organic.length ? `
|
|
659
|
+
## Organic Results (${organic.length})
|
|
660
|
+
| # | Title | URL | Snippet |
|
|
661
|
+
|---|-------|-----|----------|
|
|
662
|
+
${serpRows}` : "";
|
|
663
|
+
const aiSection = aiOvw?.detected && aiOvw.text ? `
|
|
664
|
+
## AI Overview
|
|
665
|
+
> ${truncate(aiOvw.text, 600)}` : "";
|
|
666
|
+
const statsLine = durationMs ? `
|
|
667
|
+
## Stats
|
|
668
|
+
- Status: ${diagnostics?.completionStatus ?? (flat.length ? "paa_found" : "no_paa")} \xB7 Questions: ${flat.length} \xB7 Duration: ${(durationMs / 1e3).toFixed(1)}s` : "";
|
|
669
|
+
const tips = `
|
|
670
|
+
---
|
|
671
|
+
\u{1F4A1} **Tips**
|
|
672
|
+
- Max questions: \`maxQuestions: 200\` (current: ${input.maxQuestions ?? 30})
|
|
673
|
+
- Organic results only: use \`search_serp\`
|
|
674
|
+
- Dig into a result: use \`extract_url\` on any organic URL`;
|
|
675
|
+
const full = `# PAA Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
676
|
+
|
|
677
|
+
${paaTable}${serpTable}${entityIdsSection(entityIds)}${aiSection}${statsLine}${debugSection(diagnostics?.debug)}${tips}`;
|
|
678
|
+
return {
|
|
679
|
+
...oneBlock(full),
|
|
680
|
+
structuredContent: {
|
|
681
|
+
query: input.query,
|
|
682
|
+
location: input.location ?? null,
|
|
683
|
+
questionCount: flat.length,
|
|
684
|
+
completionStatus: diagnostics?.completionStatus ?? null,
|
|
685
|
+
questions: flat.map((r) => ({
|
|
686
|
+
question: String(r.question ?? ""),
|
|
687
|
+
answer: r.answer ?? null,
|
|
688
|
+
sourceTitle: r.source_title ?? null,
|
|
689
|
+
sourceSite: r.source_site ?? null
|
|
690
|
+
})),
|
|
691
|
+
organicResults: organic.map((r) => ({
|
|
692
|
+
position: Number(r.position) || 0,
|
|
693
|
+
title: String(r.title ?? ""),
|
|
694
|
+
url: String(r.url ?? ""),
|
|
695
|
+
domain: String(r.domain ?? ""),
|
|
696
|
+
snippet: r.snippet ?? null
|
|
697
|
+
})),
|
|
698
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
699
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null,
|
|
700
|
+
durationMs: durationMs ?? null
|
|
701
|
+
}
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
function formatSearchSerp(raw, input) {
|
|
705
|
+
const parsed = parseData(raw);
|
|
706
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
707
|
+
const d = parsed.data;
|
|
708
|
+
const organic = d.organicResults ?? [];
|
|
709
|
+
const localPack = d.localPack ?? [];
|
|
710
|
+
const entityIds = d.entityIds;
|
|
711
|
+
const aiOvw = d.aiOverview;
|
|
712
|
+
const diagnostics = d.diagnostics;
|
|
713
|
+
const serpRows = organic.map(
|
|
714
|
+
(r) => `| ${r.position} | ${cell(r.title)} | [${cell(r.domain)}](${r.url}) | ${cell(truncate(r.snippet, 100))} |`
|
|
715
|
+
).join("\n");
|
|
716
|
+
const serpTable = organic.length ? `## Organic Results (${organic.length})
|
|
717
|
+
| # | Title | URL | Snippet |
|
|
718
|
+
|---|-------|-----|----------|
|
|
719
|
+
${serpRows}` : "## Organic Results\n*None found*";
|
|
720
|
+
const localRows = localPack.map(
|
|
721
|
+
(b) => `| ${b.position} | ${cell(b.name)} | ${b.rating ?? "\u2014"} (${b.reviewCount ?? "0"}) | ${b.websiteUrl ? `[link](${b.websiteUrl})` : "\u2014"} |`
|
|
722
|
+
).join("\n");
|
|
723
|
+
const localSection = localPack.length ? `
|
|
724
|
+
## Local Pack (${localPack.length})
|
|
725
|
+
| # | Name | Rating | Website |
|
|
726
|
+
|---|------|--------|---------|
|
|
727
|
+
${localRows}` : "";
|
|
728
|
+
const aiSection = aiOvw?.detected && aiOvw.text ? `
|
|
729
|
+
## AI Overview
|
|
730
|
+
> ${truncate(aiOvw.text, 600)}` : "";
|
|
731
|
+
const tips = `
|
|
732
|
+
---
|
|
733
|
+
\u{1F4A1} **Tips**
|
|
734
|
+
- Get PAA questions: use \`harvest_paa\` for this query
|
|
735
|
+
- Scrape any result: use \`extract_url\`
|
|
736
|
+
- Business entity IDs (CID/GCID/KG MID) shown above if found`;
|
|
737
|
+
const full = `# SERP Report: "${input.query}"${input.location ? ` \xB7 ${input.location}` : ""}
|
|
738
|
+
|
|
739
|
+
${serpTable}${localSection}${entityIdsSection(entityIds)}${aiSection}${debugSection(diagnostics?.debug)}${tips}`;
|
|
740
|
+
return {
|
|
741
|
+
...oneBlock(full),
|
|
742
|
+
structuredContent: {
|
|
743
|
+
query: input.query,
|
|
744
|
+
location: input.location ?? null,
|
|
745
|
+
organicResults: organic.map((r) => ({
|
|
746
|
+
position: Number(r.position) || 0,
|
|
747
|
+
title: String(r.title ?? ""),
|
|
748
|
+
url: String(r.url ?? ""),
|
|
749
|
+
domain: String(r.domain ?? ""),
|
|
750
|
+
snippet: r.snippet ?? null
|
|
751
|
+
})),
|
|
752
|
+
localPack: localPack.map((b) => ({
|
|
753
|
+
position: Number(b.position) || 0,
|
|
754
|
+
name: String(b.name ?? ""),
|
|
755
|
+
rating: b.rating ?? null,
|
|
756
|
+
reviewCount: b.reviewCount ?? null,
|
|
757
|
+
websiteUrl: b.websiteUrl ?? null
|
|
758
|
+
})),
|
|
759
|
+
aiOverview: aiOvw ? { detected: aiOvw.detected === true, text: aiOvw.text ?? null } : null,
|
|
760
|
+
entityIds: entityIds ? { kgIds: entityIds.kgIds ?? [], cids: entityIds.cids ?? [], gcids: entityIds.gcids ?? [] } : null
|
|
761
|
+
}
|
|
762
|
+
};
|
|
763
|
+
}
|
|
764
|
+
function formatExtractUrl(raw, input) {
|
|
765
|
+
const parsed = parseData(raw);
|
|
766
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
767
|
+
const d = parsed.data;
|
|
768
|
+
const url = d.url ?? input.url;
|
|
769
|
+
const title = d.title ?? "Untitled";
|
|
770
|
+
const headings = d.headings ?? [];
|
|
771
|
+
const kpo = d.kpo;
|
|
772
|
+
const bodyMd = d.bodyMarkdown ?? "";
|
|
773
|
+
const schema = d.schema;
|
|
774
|
+
const screenshotMeta = d.screenshot;
|
|
775
|
+
const screenshotPath = screenshotMeta?.base64 ? persistScreenshotLocally(screenshotMeta.base64, url) : null;
|
|
776
|
+
const branding = d.branding;
|
|
777
|
+
const media = d.media;
|
|
778
|
+
const h1Lines = headings.filter((h) => h.level === 1).map((h) => `- ${h.text}`).join("\n");
|
|
779
|
+
const h2Lines = headings.filter((h) => h.level === 2).map((h) => ` - ${h.text}`).join("\n");
|
|
780
|
+
const headingSection = h1Lines || h2Lines ? `
|
|
781
|
+
## Heading Structure
|
|
782
|
+
${[h1Lines, h2Lines].filter(Boolean).join("\n")}` : "";
|
|
783
|
+
const kpoSection = kpo ? [
|
|
784
|
+
`
|
|
785
|
+
## Entity / Schema`,
|
|
786
|
+
kpo.entityName ? `- **Entity:** ${kpo.entityName}` : "",
|
|
787
|
+
kpo.type?.length ? `- **@type:** ${kpo.type.join(", ")}` : "",
|
|
788
|
+
kpo.napScore !== void 0 ? `- **NAP Score:** ${kpo.napScore}/5` : "",
|
|
789
|
+
kpo.address ? `- **Address:** ${kpo.address}` : "",
|
|
790
|
+
kpo.phone ? `- **Phone:** ${kpo.phone}` : "",
|
|
791
|
+
kpo.email ? `- **Email:** ${kpo.email}` : "",
|
|
792
|
+
kpo.faqCount ? `- **FAQ items:** ${kpo.faqCount}` : "",
|
|
793
|
+
kpo.sameAs?.length ? `- **sameAs:** ${kpo.sameAs.slice(0, 5).join(", ")}` : "",
|
|
794
|
+
kpo.missingFields?.length ? `
|
|
795
|
+
**Missing schema fields:** ${kpo.missingFields.slice(0, 5).join(", ")}` : ""
|
|
796
|
+
].filter(Boolean).join("\n") : "";
|
|
797
|
+
const bodySection = bodyMd ? `
|
|
798
|
+
## Page Content
|
|
799
|
+
${bodyMd.slice(0, 3e3)}${bodyMd.length > 3e3 ? "\n\n*(truncated)*" : ""}` : "";
|
|
800
|
+
const screenshotSection = screenshotMeta ? `
|
|
801
|
+
## Screenshot
|
|
802
|
+
- **File:** ${screenshotPath ?? "(returned inline only \u2014 disk write unavailable in this environment)"}
|
|
803
|
+
- **Size:** ${(screenshotMeta.sizeBytes / 1024).toFixed(1)} KB
|
|
804
|
+
- **Device:** ${screenshotMeta.device}` : "";
|
|
805
|
+
const brandingSection = branding ? [
|
|
806
|
+
`
|
|
807
|
+
## Branding`,
|
|
808
|
+
branding.colorScheme ? `- **Color scheme:** ${branding.colorScheme}` : "",
|
|
809
|
+
`- **Colors:**${Object.entries(branding.colors ?? {}).filter(([, v]) => v).map(([k, v]) => ` ${k}=${v}`).join(",") || " (none extracted)"}`,
|
|
810
|
+
`- **Fonts:**${Object.entries(branding.fonts ?? {}).filter(([, v]) => v).map(([k, v]) => ` ${k}=${v}`).join(",") || " (none extracted)"}`,
|
|
811
|
+
branding.assets?.logo ? `- **Logo:** ${branding.assets.logo}` : "",
|
|
812
|
+
branding.assets?.favicon ? `- **Favicon:** ${branding.assets.favicon}` : ""
|
|
813
|
+
].filter(Boolean).join("\n") : "";
|
|
814
|
+
const mediaSection = media ? [
|
|
815
|
+
`
|
|
816
|
+
## Media Assets`,
|
|
817
|
+
`- **Found:** ${media.totalFound} total, ${media.filteredCount} filtered (ads/noise), ${media.assets.length} downloaded`,
|
|
818
|
+
media.outputDir ? `- **Saved to:** ${media.outputDir}` : ""
|
|
819
|
+
].filter(Boolean).join("\n") : "";
|
|
820
|
+
const schemaCount = Array.isArray(schema) ? schema.length : 0;
|
|
821
|
+
const tips = `
|
|
822
|
+
---
|
|
823
|
+
\u{1F4A1} **Tips**
|
|
824
|
+
- Crawl entire site: use \`extract_site\`
|
|
825
|
+
- Map all URLs: use \`map_site_urls\`
|
|
826
|
+
- ${schemaCount} JSON-LD schema block(s) detected`;
|
|
827
|
+
const full = `# URL Extract: ${url}
|
|
828
|
+
**${title}**
|
|
829
|
+
${headingSection}${kpoSection}${brandingSection}${bodySection}${screenshotSection}${mediaSection}${tips}`;
|
|
830
|
+
const textResult2 = oneBlock(full);
|
|
831
|
+
const structuredContent = {
|
|
832
|
+
url,
|
|
833
|
+
title: d.title ?? null,
|
|
834
|
+
headings: headings.map((h) => ({ level: Number(h.level) || 0, text: String(h.text ?? "") })),
|
|
835
|
+
schemaBlockCount: schemaCount,
|
|
836
|
+
entityName: kpo?.entityName ?? null,
|
|
837
|
+
entityTypes: kpo?.type ?? [],
|
|
838
|
+
napScore: kpo?.napScore ?? null,
|
|
839
|
+
missingSchemaFields: kpo?.missingFields ?? [],
|
|
840
|
+
screenshotSaved: screenshotPath ?? null
|
|
841
|
+
};
|
|
842
|
+
if (screenshotMeta?.base64) {
|
|
843
|
+
return {
|
|
844
|
+
content: [
|
|
845
|
+
...textResult2.content,
|
|
846
|
+
{ type: "image", data: screenshotMeta.base64, mimeType: "image/png" }
|
|
847
|
+
],
|
|
848
|
+
structuredContent
|
|
849
|
+
};
|
|
850
|
+
}
|
|
851
|
+
return { ...textResult2, structuredContent };
|
|
852
|
+
}
|
|
853
|
+
function formatMapSiteUrls(raw, input) {
|
|
854
|
+
const parsed = parseData(raw);
|
|
855
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
856
|
+
const d = parsed.data;
|
|
857
|
+
const urls = d.urls ?? [];
|
|
858
|
+
const ok = urls.filter((u) => (u.status ?? 0) >= 200 && (u.status ?? 0) < 300);
|
|
859
|
+
const broken = urls.filter((u) => u.status !== null && u.status >= 400);
|
|
860
|
+
const redirects = urls.filter((u) => u.status !== null && u.status >= 300 && u.status < 400);
|
|
861
|
+
const urlRows = urls.slice(0, 200).map((u, i) => `| ${i + 1} | ${u.url} | ${u.status ?? "\u2014"} |`).join("\n");
|
|
862
|
+
const full = [
|
|
863
|
+
`# URL Map: ${input.url}`,
|
|
864
|
+
`**${d.totalFound} URLs** \xB7 ${(d.durationMs / 1e3).toFixed(1)}s${d.truncated ? " \xB7 *truncated*" : ""}`,
|
|
865
|
+
`
|
|
866
|
+
## Summary
|
|
867
|
+
- \u2705 2xx: ${ok.length}
|
|
868
|
+
- \u{1F500} 3xx: ${redirects.length}
|
|
869
|
+
- \u274C 4xx+: ${broken.length}`,
|
|
870
|
+
`
|
|
871
|
+
## URL Inventory
|
|
872
|
+
| # | URL | Status |
|
|
873
|
+
|---|-----|--------|
|
|
874
|
+
${urlRows}`,
|
|
875
|
+
broken.length ? `
|
|
876
|
+
## Broken URLs
|
|
877
|
+
${broken.map((u) => `- ${u.url} (${u.status})`).join("\n")}` : "",
|
|
878
|
+
`
|
|
879
|
+
---
|
|
880
|
+
\u{1F4A1} **Tips**
|
|
881
|
+
- Extract content from all pages: use \`extract_site\`
|
|
882
|
+
- Scrape a single page: use \`extract_url\``
|
|
883
|
+
].filter(Boolean).join("\n");
|
|
884
|
+
return {
|
|
885
|
+
...oneBlock(full),
|
|
886
|
+
structuredContent: {
|
|
887
|
+
startUrl: d.startUrl ?? input.url,
|
|
888
|
+
totalFound: d.totalFound ?? urls.length,
|
|
889
|
+
truncated: d.truncated === true,
|
|
890
|
+
okCount: ok.length,
|
|
891
|
+
redirectCount: redirects.length,
|
|
892
|
+
brokenCount: broken.length,
|
|
893
|
+
urls: urls.map((u) => ({ url: u.url, status: u.status ?? null })),
|
|
894
|
+
durationMs: d.durationMs ?? 0
|
|
895
|
+
}
|
|
896
|
+
};
|
|
897
|
+
}
|
|
898
|
+
function formatExtractSite(raw, input) {
|
|
899
|
+
const parsed = parseData(raw);
|
|
900
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
901
|
+
const d = parsed.data;
|
|
902
|
+
const pages = d.pages ?? [];
|
|
903
|
+
const pageRows = pages.map((p, i) => {
|
|
904
|
+
const schemaInfo = p.kpo?.type?.join(", ") ?? (Array.isArray(p.schema) && p.schema.length ? `${p.schema.length} block(s)` : "\u2014");
|
|
905
|
+
return `| ${i + 1} | ${cell(p.title ?? "Untitled")} | ${p.url} | ${schemaInfo} |`;
|
|
906
|
+
}).join("\n");
|
|
907
|
+
const full = [
|
|
908
|
+
`# Site Extract: ${input.url}`,
|
|
909
|
+
`**${pages.length} pages** \xB7 ${((d.durationMs ?? 0) / 1e3).toFixed(1)}s`,
|
|
910
|
+
`
|
|
911
|
+
## Pages
|
|
912
|
+
| # | Title | URL | Schema |
|
|
913
|
+
|---|-------|-----|--------|
|
|
914
|
+
${pageRows}`,
|
|
915
|
+
`
|
|
916
|
+
---
|
|
917
|
+
\u{1F4A1} **Tips**
|
|
918
|
+
- Map URLs first: use \`map_site_urls\`
|
|
919
|
+
- Inspect a single page: use \`extract_url\``
|
|
920
|
+
].join("\n");
|
|
921
|
+
return {
|
|
922
|
+
...oneBlock(full),
|
|
923
|
+
structuredContent: {
|
|
924
|
+
url: input.url,
|
|
925
|
+
pageCount: pages.length,
|
|
926
|
+
pages: pages.map((p) => ({
|
|
927
|
+
url: String(p.url ?? ""),
|
|
928
|
+
title: p.title ?? null,
|
|
929
|
+
schemaTypes: p.kpo?.type ?? []
|
|
930
|
+
})),
|
|
931
|
+
durationMs: d.durationMs ?? 0
|
|
932
|
+
}
|
|
933
|
+
};
|
|
934
|
+
}
|
|
935
|
+
function formatYoutubeHarvest(raw, input) {
|
|
936
|
+
const parsed = parseData(raw);
|
|
937
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
938
|
+
const d = parsed.data;
|
|
939
|
+
const videos = d.videos ?? [];
|
|
940
|
+
const label = input.mode === "channel" ? input.channelHandle ?? "channel" : `"${input.query ?? ""}"`;
|
|
941
|
+
const videoRows = videos.map(
|
|
942
|
+
(v, i) => `| ${i + 1} | ${cell(truncate(v.title, 70))} | ${cell(v.channelName)} | ${v.views ?? "\u2014"} | ${v.duration ?? "\u2014"} | \`${v.videoId}\` |`
|
|
943
|
+
).join("\n");
|
|
944
|
+
const channelSection = d.channelMeta ? `
|
|
945
|
+
## Channel
|
|
946
|
+
- **Name:** ${d.channelMeta.title ?? "\u2014"}
|
|
947
|
+
- **Subscribers:** ${d.channelMeta.subscriberCount ?? "\u2014"}` : "";
|
|
948
|
+
const full = [
|
|
949
|
+
`# YouTube Harvest: ${label}`,
|
|
950
|
+
`**${videos.length} videos** \xB7 ${(d.stats.durationMs / 1e3).toFixed(1)}s`,
|
|
951
|
+
channelSection,
|
|
952
|
+
`
|
|
953
|
+
## Videos
|
|
954
|
+
| # | Title | Channel | Views | Duration | Video ID |
|
|
955
|
+
|---|-------|---------|-------|----------|----------|
|
|
956
|
+
${videoRows}`,
|
|
957
|
+
`
|
|
958
|
+
---
|
|
959
|
+
\u{1F4A1} **Tips**
|
|
960
|
+
- Transcribe a video: use \`youtube_transcribe\` with the \`videoId\` above
|
|
961
|
+
- Switch mode: \`mode: "channel"\` with \`channelHandle\` or \`mode: "search"\` with \`query\``
|
|
962
|
+
].filter(Boolean).join("\n");
|
|
963
|
+
return {
|
|
964
|
+
...oneBlock(full),
|
|
965
|
+
structuredContent: {
|
|
966
|
+
mode: input.mode,
|
|
967
|
+
videoCount: videos.length,
|
|
968
|
+
channel: d.channelMeta ? { title: d.channelMeta.title ?? null, subscriberCount: d.channelMeta.subscriberCount ?? null } : null,
|
|
969
|
+
videos: videos.map((v) => ({
|
|
970
|
+
videoId: String(v.videoId ?? ""),
|
|
971
|
+
title: String(v.title ?? ""),
|
|
972
|
+
channelName: v.channelName ?? null,
|
|
973
|
+
views: v.views ?? null,
|
|
974
|
+
duration: v.duration ?? null,
|
|
975
|
+
url: v.url ?? null
|
|
976
|
+
}))
|
|
977
|
+
}
|
|
978
|
+
};
|
|
979
|
+
}
|
|
980
|
+
function formatYoutubeTranscribe(raw, input) {
|
|
981
|
+
const parsed = parseData(raw);
|
|
982
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
983
|
+
const d = parsed.data;
|
|
984
|
+
const text = d.text ?? "";
|
|
985
|
+
const chunks = d.chunks ?? [];
|
|
986
|
+
const durSec = d.durationMs ? (d.durationMs / 1e3).toFixed(0) : "\u2014";
|
|
987
|
+
const chunkRows = chunks.slice(0, 50).map((c) => {
|
|
988
|
+
const sec = Number.isFinite(c.timestamp[0]) ? Math.floor(c.timestamp[0]) : 0;
|
|
989
|
+
const mm = String(Math.floor(sec / 60)).padStart(2, "0");
|
|
990
|
+
const ss = String(sec % 60).padStart(2, "0");
|
|
991
|
+
return `| ${mm}:${ss} | ${cell(truncate(c.text, 120))} |`;
|
|
992
|
+
}).join("\n");
|
|
993
|
+
const full = [
|
|
994
|
+
`# YouTube Transcript: \`${input.videoId}\``,
|
|
995
|
+
`**Duration:** ${durSec}s \xB7 **${text.split(" ").length} words**`,
|
|
996
|
+
`
|
|
997
|
+
## Full Transcript
|
|
998
|
+
${text}`,
|
|
999
|
+
chunks.length ? `
|
|
1000
|
+
## Timestamped Chunks
|
|
1001
|
+
| Time | Text |
|
|
1002
|
+
|------|------|
|
|
1003
|
+
${chunkRows}` : "",
|
|
1004
|
+
`
|
|
1005
|
+
---
|
|
1006
|
+
\u{1F4A1} Harvest more from this channel: use \`youtube_harvest\` with \`mode: "channel"\``
|
|
1007
|
+
].filter(Boolean).join("\n");
|
|
1008
|
+
return oneBlock(full);
|
|
1009
|
+
}
|
|
1010
|
+
function formatFacebookPageIntel(raw, input) {
|
|
1011
|
+
const parsed = parseData(raw);
|
|
1012
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1013
|
+
const d = parsed.data;
|
|
1014
|
+
const advertiser = d.advertiserName ?? input.query ?? input.pageId ?? input.libraryId ?? "Advertiser";
|
|
1015
|
+
const ads = d.ads ?? [];
|
|
1016
|
+
const s = d.summary ?? { totalAds: 0, activeCount: 0, videoCount: 0, imageCount: 0 };
|
|
1017
|
+
const adBlocks = ads.map((ad, i) => [
|
|
1018
|
+
`### Ad ${i + 1}${ad.libraryId ? ` \xB7 \`${ad.libraryId}\`` : ""} \u2014 ${ad.status ?? "\u2014"} \xB7 ${ad.creativeType ?? "\u2014"} \xB7 ${ad.startDate ?? "\u2014"}`,
|
|
1019
|
+
ad.headline ? `**Headline:** ${ad.headline}` : "",
|
|
1020
|
+
ad.primaryText ? `**Copy:** ${truncate(ad.primaryText, 200)}` : "",
|
|
1021
|
+
ad.cta ? `**CTA:** ${ad.cta}` : "",
|
|
1022
|
+
ad.videoUrl ? `**Video URL:** \`${ad.videoUrl}\`` : "",
|
|
1023
|
+
ad.variations ? `**Variations:** ${ad.variations}` : ""
|
|
1024
|
+
].filter(Boolean).join("\n")).join("\n\n---\n\n");
|
|
1025
|
+
const full = [
|
|
1026
|
+
`# Facebook Ad Intel: ${advertiser}`,
|
|
1027
|
+
`**${s.totalAds} ads** \xB7 ${s.activeCount} active \xB7 ${s.videoCount} video \xB7 ${s.imageCount} image`,
|
|
1028
|
+
`
|
|
1029
|
+
${adBlocks}`,
|
|
1030
|
+
`
|
|
1031
|
+
---
|
|
1032
|
+
\u{1F4A1} **Tips**
|
|
1033
|
+
- Transcribe video ads: use \`facebook_ad_transcribe\` with the \`videoUrl\` above
|
|
1034
|
+
- Find other advertisers: use \`facebook_ad_search\``
|
|
1035
|
+
].filter(Boolean).join("\n");
|
|
1036
|
+
return {
|
|
1037
|
+
...oneBlock(full),
|
|
1038
|
+
structuredContent: {
|
|
1039
|
+
advertiserName: d.advertiserName ?? null,
|
|
1040
|
+
totalAds: s.totalAds ?? 0,
|
|
1041
|
+
activeCount: s.activeCount ?? 0,
|
|
1042
|
+
videoCount: s.videoCount ?? 0,
|
|
1043
|
+
imageCount: s.imageCount ?? 0,
|
|
1044
|
+
ads: ads.map((ad) => ({
|
|
1045
|
+
libraryId: ad.libraryId ?? null,
|
|
1046
|
+
status: ad.status ?? null,
|
|
1047
|
+
creativeType: ad.creativeType ?? null,
|
|
1048
|
+
headline: ad.headline ?? null,
|
|
1049
|
+
cta: ad.cta ?? null,
|
|
1050
|
+
startDate: ad.startDate ?? null,
|
|
1051
|
+
videoUrl: ad.videoUrl ?? null,
|
|
1052
|
+
variations: typeof ad.variations === "number" ? ad.variations : null
|
|
1053
|
+
}))
|
|
1054
|
+
}
|
|
1055
|
+
};
|
|
1056
|
+
}
|
|
1057
|
+
function formatFacebookAdSearch(raw, input) {
|
|
1058
|
+
const parsed = parseData(raw);
|
|
1059
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1060
|
+
const d = parsed.data;
|
|
1061
|
+
const advertisers = d.results ?? d.advertisers ?? [];
|
|
1062
|
+
const rows = advertisers.map(
|
|
1063
|
+
(a, i) => `| ${i + 1} | ${cell(a.pageName ?? a.name)} | ${a.adCount ?? "\u2014"} | \`${a.sampleLibraryId ?? a.libraryId ?? "\u2014"}\` |`
|
|
1064
|
+
).join("\n");
|
|
1065
|
+
const full = [
|
|
1066
|
+
`# Facebook Ad Library Search: "${input.query}"`,
|
|
1067
|
+
`**${advertisers.length} advertisers found**`,
|
|
1068
|
+
`
|
|
1069
|
+
## Advertisers
|
|
1070
|
+
| # | Name | Ad Count | Library ID |
|
|
1071
|
+
|---|------|----------|------------|
|
|
1072
|
+
${rows}`,
|
|
1073
|
+
`
|
|
1074
|
+
---
|
|
1075
|
+
\u{1F4A1} **Tips**
|
|
1076
|
+
- Scan all ads: use \`facebook_page_intel\` with \`libraryId\`
|
|
1077
|
+
- Or pass the advertiser name as \`query\` in \`facebook_page_intel\``
|
|
1078
|
+
].join("\n");
|
|
1079
|
+
return {
|
|
1080
|
+
...oneBlock(full),
|
|
1081
|
+
structuredContent: {
|
|
1082
|
+
query: input.query,
|
|
1083
|
+
advertiserCount: advertisers.length,
|
|
1084
|
+
advertisers: advertisers.map((a) => ({
|
|
1085
|
+
name: a.pageName ?? a.name ?? null,
|
|
1086
|
+
adCount: typeof a.adCount === "number" ? a.adCount : null,
|
|
1087
|
+
libraryId: a.sampleLibraryId ?? a.libraryId ?? null
|
|
1088
|
+
}))
|
|
1089
|
+
}
|
|
1090
|
+
};
|
|
1091
|
+
}
|
|
1092
|
+
function formatCreditsInfo(raw, input) {
|
|
1093
|
+
const parsed = parseData(raw);
|
|
1094
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1095
|
+
const d = parsed.data;
|
|
1096
|
+
const balance = d.balance_credits;
|
|
1097
|
+
const costs = d.costs ?? [];
|
|
1098
|
+
const matched = d.matched_cost;
|
|
1099
|
+
const ledger = d.ledger ?? [];
|
|
1100
|
+
const costRows = costs.map((c) => {
|
|
1101
|
+
const notes = c.notes ? ` ${c.notes}` : "";
|
|
1102
|
+
return `| ${c.label} | ${c.credits} | ${c.unit}${notes} |`;
|
|
1103
|
+
}).join("\n");
|
|
1104
|
+
const ledgerRows = ledger.map((row) => {
|
|
1105
|
+
const credits = row.amount_mc / 1e3;
|
|
1106
|
+
return `| ${row.created_at} | ${row.operation} | ${credits} | ${row.description ?? ""} |`;
|
|
1107
|
+
}).join("\n");
|
|
1108
|
+
const matchedSection = matched ? `
|
|
1109
|
+
## Matched Cost
|
|
1110
|
+
**${matched.label}:** ${matched.credits} credits ${matched.unit}${matched.notes ? `
|
|
1111
|
+
|
|
1112
|
+
${matched.notes}` : ""}` : input.item ? `
|
|
1113
|
+
## Matched Cost
|
|
1114
|
+
No exact cost match found for "${input.item}". See the full cost table below.` : "";
|
|
1115
|
+
const full = [
|
|
1116
|
+
`# Credits`,
|
|
1117
|
+
`**Balance:** ${balance ?? "unknown"} credits`,
|
|
1118
|
+
matchedSection,
|
|
1119
|
+
costs.length ? `
|
|
1120
|
+
## Cost Table
|
|
1121
|
+
| Item | Credits | Unit |
|
|
1122
|
+
|------|---------|------|
|
|
1123
|
+
${costRows}` : "",
|
|
1124
|
+
ledger.length ? `
|
|
1125
|
+
## Recent Ledger
|
|
1126
|
+
| Date | Operation | Credits | Description |
|
|
1127
|
+
|------|-----------|---------|-------------|
|
|
1128
|
+
${ledgerRows}` : ""
|
|
1129
|
+
].filter(Boolean).join("\n");
|
|
1130
|
+
return {
|
|
1131
|
+
...oneBlock(full),
|
|
1132
|
+
structuredContent: {
|
|
1133
|
+
balanceCredits: typeof balance === "number" ? balance : null,
|
|
1134
|
+
matchedCost: matched ? { label: matched.label, credits: matched.credits, unit: matched.unit, notes: matched.notes ?? null } : null,
|
|
1135
|
+
costs: costs.map((c) => ({
|
|
1136
|
+
key: c.key,
|
|
1137
|
+
label: c.label,
|
|
1138
|
+
credits: c.credits,
|
|
1139
|
+
unit: c.unit,
|
|
1140
|
+
notes: c.notes ?? null
|
|
1141
|
+
})),
|
|
1142
|
+
ledger: ledger.map((row) => ({
|
|
1143
|
+
createdAt: String(row.created_at ?? ""),
|
|
1144
|
+
operation: String(row.operation ?? ""),
|
|
1145
|
+
credits: row.amount_mc / 1e3,
|
|
1146
|
+
description: row.description ?? null
|
|
1147
|
+
}))
|
|
1148
|
+
}
|
|
1149
|
+
};
|
|
1150
|
+
}
|
|
1151
|
+
function formatMapsSearch(raw, input) {
|
|
1152
|
+
const parsed = parseData(raw);
|
|
1153
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1154
|
+
const d = parsed.data;
|
|
1155
|
+
const results = d.results ?? [];
|
|
1156
|
+
const normalizedResults = results.map((result) => ({
|
|
1157
|
+
...result,
|
|
1158
|
+
phone: result.phone ?? null,
|
|
1159
|
+
hoursStatus: result.hoursStatus ?? null
|
|
1160
|
+
}));
|
|
1161
|
+
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
1162
|
+
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
1163
|
+
const durationMs = d.durationMs;
|
|
1164
|
+
const rows = results.map((r) => {
|
|
1165
|
+
const rating = [r.rating, r.reviewCount ? `(${r.reviewCount})` : null].filter(Boolean).join(" ");
|
|
1166
|
+
return `| ${r.position} | ${cell(r.name)} | ${cell(r.category)} | ${cell(rating)} | ${cell(r.address)} | ${r.cidDecimal ? `\`${r.cidDecimal}\`` : "\u2014"} | ${r.websiteUrl ? `[site](${r.websiteUrl})` : "\u2014"} | [maps](${r.placeUrl}) |`;
|
|
1167
|
+
}).join("\n");
|
|
1168
|
+
const metadataSection = results.length ? `
|
|
1169
|
+
## Candidate Metadata
|
|
1170
|
+
${results.map((r) => {
|
|
1171
|
+
const meta = r.metadata?.length ? r.metadata.slice(0, 8).map((m) => ` - ${m}`).join("\n") : " - none";
|
|
1172
|
+
return `### ${r.position}. ${r.name}
|
|
1173
|
+
${meta}`;
|
|
1174
|
+
}).join("\n\n")}` : "";
|
|
1175
|
+
const full = [
|
|
1176
|
+
`# Google Maps Search: "${searchQuery}"`,
|
|
1177
|
+
`**Returned:** ${results.length} profile candidate${results.length === 1 ? "" : "s"} \xB7 **Requested max:** ${requestedMax} \xB7 **Limit:** 50`,
|
|
1178
|
+
`
|
|
1179
|
+
## Results
|
|
1180
|
+
| # | Name | Category | Rating | Address | CID | Website | Maps |
|
|
1181
|
+
|---|------|----------|--------|---------|-----|---------|------|
|
|
1182
|
+
${rows}`,
|
|
1183
|
+
metadataSection,
|
|
1184
|
+
`
|
|
1185
|
+
---
|
|
1186
|
+
\u{1F4A1} **Next step:** use \`maps_place_intel\` with a selected business name and location to hydrate full hours, phone, review topics, and optional review cards.`,
|
|
1187
|
+
durationMs != null ? `
|
|
1188
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
1189
|
+
].filter(Boolean).join("\n");
|
|
1190
|
+
return {
|
|
1191
|
+
...oneBlock(full),
|
|
1192
|
+
structuredContent: {
|
|
1193
|
+
query: d.query,
|
|
1194
|
+
location: d.location ?? null,
|
|
1195
|
+
searchQuery: d.searchQuery,
|
|
1196
|
+
searchUrl: d.searchUrl,
|
|
1197
|
+
extractedAt: d.extractedAt,
|
|
1198
|
+
requestedMaxResults: requestedMax,
|
|
1199
|
+
resultCount: results.length,
|
|
1200
|
+
results: normalizedResults,
|
|
1201
|
+
durationMs: durationMs ?? 0
|
|
1202
|
+
}
|
|
1203
|
+
};
|
|
1204
|
+
}
|
|
1205
|
+
function formatDirectoryWorkflow(raw, input) {
|
|
1206
|
+
const parsed = parseData(raw);
|
|
1207
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1208
|
+
const d = parsed.data;
|
|
1209
|
+
const cities = (d.cities ?? []).map((city) => ({
|
|
1210
|
+
...city,
|
|
1211
|
+
results: city.results.map((result) => ({
|
|
1212
|
+
...result,
|
|
1213
|
+
phone: result.phone ?? null,
|
|
1214
|
+
hoursStatus: result.hoursStatus ?? null
|
|
1215
|
+
}))
|
|
1216
|
+
}));
|
|
1217
|
+
const warnings = d.warnings ?? [];
|
|
1218
|
+
const csvPath = d.csvPath ?? null;
|
|
1219
|
+
const totalResultCount = d.totalResultCount ?? cities.reduce((sum, city) => sum + city.resultCount, 0);
|
|
1220
|
+
const durationMs = d.durationMs;
|
|
1221
|
+
const marketRows = cities.map((city) => {
|
|
1222
|
+
const zips = city.zips?.length ? city.zips.slice(0, 8).join(" ") + (city.zips.length > 8 ? ` +${city.zips.length - 8}` : "") : "\u2014";
|
|
1223
|
+
return `| ${cell(city.city)} | ${city.population.toLocaleString()} | ${city.zips?.length ?? 0} | ${city.resultCount} | ${city.status} | ${cell(zips)} |`;
|
|
1224
|
+
}).join("\n");
|
|
1225
|
+
const businessRows = cities.flatMap((city) => city.results.slice(0, 3).map((result) => ({ city, result }))).map(({ city, result }) => {
|
|
1226
|
+
const rating = [result.rating, result.reviewCount ? `(${result.reviewCount})` : null].filter(Boolean).join(" ");
|
|
1227
|
+
return `| ${cell(city.city)} | ${result.position} | ${cell(result.name)} | ${cell(result.category)} | ${cell(rating)} | ${result.websiteUrl ? `[site](${result.websiteUrl})` : "\u2014"} | [maps](${result.placeUrl}) |`;
|
|
1228
|
+
}).join("\n");
|
|
1229
|
+
const warningText = warnings.length ? `
|
|
1230
|
+
## Warnings
|
|
1231
|
+
${warnings.map((w) => `- ${w}`).join("\n")}` : "";
|
|
1232
|
+
const csvText = csvPath ? `
|
|
1233
|
+
**CSV:** \`${csvPath}\`` : "";
|
|
1234
|
+
const full = [
|
|
1235
|
+
`# Directory Workflow: ${input.query}`,
|
|
1236
|
+
`**Markets:** ${cities.length} \xB7 **Maps results:** ${totalResultCount} \xB7 **State:** ${d.state ?? input.state ?? "US"} \xB7 **Population threshold:** ${d.minPopulation ?? input.minPopulation ?? 1e5}`,
|
|
1237
|
+
csvText,
|
|
1238
|
+
`
|
|
1239
|
+
## Markets
|
|
1240
|
+
| City | Population | ZIPs | Maps Results | Status | ZIP Sample |
|
|
1241
|
+
|---|---:|---:|---:|---|---|
|
|
1242
|
+
${marketRows}`,
|
|
1243
|
+
businessRows ? `
|
|
1244
|
+
## Top Candidates By City
|
|
1245
|
+
| City | # | Name | Category | Rating | Website | Maps |
|
|
1246
|
+
|---|---:|---|---|---|---|---|
|
|
1247
|
+
${businessRows}` : null,
|
|
1248
|
+
warningText,
|
|
1249
|
+
`
|
|
1250
|
+
## Sources
|
|
1251
|
+
- Population: ${d.censusSourceUrl ?? "Census Population Estimates Program"}
|
|
1252
|
+
- ZIP groups: ${d.usZipsSourcePath ?? "not configured"}`,
|
|
1253
|
+
durationMs != null ? `
|
|
1254
|
+
*Completed in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
1255
|
+
].filter(Boolean).join("\n");
|
|
1256
|
+
return {
|
|
1257
|
+
...oneBlock(full),
|
|
1258
|
+
structuredContent: {
|
|
1259
|
+
query: d.query,
|
|
1260
|
+
state: d.state,
|
|
1261
|
+
minPopulation: d.minPopulation,
|
|
1262
|
+
populationYear: d.populationYear,
|
|
1263
|
+
maxResultsPerCity: d.maxResultsPerCity,
|
|
1264
|
+
concurrency: d.concurrency,
|
|
1265
|
+
censusSourceUrl: d.censusSourceUrl,
|
|
1266
|
+
usZipsSourcePath: d.usZipsSourcePath ?? null,
|
|
1267
|
+
warnings,
|
|
1268
|
+
extractedAt: d.extractedAt,
|
|
1269
|
+
selectedCityCount: d.selectedCityCount,
|
|
1270
|
+
totalResultCount,
|
|
1271
|
+
csvPath,
|
|
1272
|
+
cities,
|
|
1273
|
+
durationMs: durationMs ?? 0
|
|
1274
|
+
}
|
|
1275
|
+
};
|
|
1276
|
+
}
|
|
1277
|
+
function formatMapsPlaceIntel(raw, input) {
|
|
1278
|
+
const parsed = parseData(raw);
|
|
1279
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1280
|
+
const d = parsed.data;
|
|
1281
|
+
const name = d.name ?? input.businessName;
|
|
1282
|
+
const rating = d.rating;
|
|
1283
|
+
const reviewCount = d.reviewCount;
|
|
1284
|
+
const category = d.category;
|
|
1285
|
+
const address = d.address;
|
|
1286
|
+
const phone = d.phoneDisplay;
|
|
1287
|
+
const website = d.website;
|
|
1288
|
+
const hoursSummary = d.hoursSummary;
|
|
1289
|
+
const plusCode = d.plusCode;
|
|
1290
|
+
const bookingUrl = d.bookingUrl;
|
|
1291
|
+
const kgmid = d.kgmid;
|
|
1292
|
+
const cidDecimal = d.cidDecimal;
|
|
1293
|
+
const cidUrl = d.cidUrl;
|
|
1294
|
+
const lat = d.lat;
|
|
1295
|
+
const lng = d.lng;
|
|
1296
|
+
const durationMs = d.durationMs;
|
|
1297
|
+
const histogram = d.reviewHistogram ?? [];
|
|
1298
|
+
const topics = d.reviewTopics ?? [];
|
|
1299
|
+
const about = d.aboutAttributes ?? [];
|
|
1300
|
+
const reviews = d.reviews ?? [];
|
|
1301
|
+
const reviewsStatus = d.reviewsStatus ?? "not_requested";
|
|
1302
|
+
const hoursTable = d.hoursTable ?? [];
|
|
1303
|
+
const ratingLine = [rating, reviewCount ? `(${reviewCount} reviews)` : null].filter(Boolean).join(" ");
|
|
1304
|
+
const basicLines = [
|
|
1305
|
+
address ? `- **Address:** ${address}` : null,
|
|
1306
|
+
phone ? `- **Phone:** ${phone}` : null,
|
|
1307
|
+
website ? `- **Website:** ${website}` : null,
|
|
1308
|
+
hoursSummary ? `- **Hours:** ${hoursSummary}` : null,
|
|
1309
|
+
plusCode ? `- **Plus Code:** ${plusCode}` : null,
|
|
1310
|
+
bookingUrl ? `- **Book:** ${bookingUrl}` : null
|
|
1311
|
+
].filter(Boolean).join("\n");
|
|
1312
|
+
const hoursSection = hoursTable.length ? `
|
|
1313
|
+
## Hours
|
|
1314
|
+
| Day | Hours |
|
|
1315
|
+
|-----|-------|
|
|
1316
|
+
${hoursTable.map((r) => `| ${r.day} | ${r.hours} |`).join("\n")}` : "";
|
|
1317
|
+
const histSection = histogram.length ? `
|
|
1318
|
+
## Rating Distribution
|
|
1319
|
+
| Stars | Count |
|
|
1320
|
+
|-------|-------|
|
|
1321
|
+
${histogram.map((r) => `| ${"\u2605".repeat(r.stars)}${"\u2606".repeat(5 - r.stars)} | ${r.count} |`).join("\n")}` : "";
|
|
1322
|
+
const topicsSection = topics.length ? `
|
|
1323
|
+
## Review Topics
|
|
1324
|
+
${topics.map((t) => `- **${t.label}:** ${t.count} mentions`).join("\n")}` : "";
|
|
1325
|
+
const aboutBySection = {};
|
|
1326
|
+
for (const a of about) {
|
|
1327
|
+
if (!aboutBySection[a.section]) aboutBySection[a.section] = [];
|
|
1328
|
+
aboutBySection[a.section].push(a.attribute);
|
|
1329
|
+
}
|
|
1330
|
+
const aboutSection = Object.keys(aboutBySection).length ? `
|
|
1331
|
+
## About
|
|
1332
|
+
${Object.entries(aboutBySection).map(([s, attrs]) => `**${s}**
|
|
1333
|
+
${attrs.map((a) => `- ${a}`).join("\n")}`).join("\n\n")}` : "";
|
|
1334
|
+
const entitySection = [
|
|
1335
|
+
kgmid ? `- **KGMID:** \`${kgmid}\`` : null,
|
|
1336
|
+
cidDecimal ? `- **CID:** \`${cidDecimal}\`` : null,
|
|
1337
|
+
cidUrl ? `- **Maps CID URL:** ${cidUrl}` : null,
|
|
1338
|
+
lat != null && lng != null ? `- **Coordinates:** ${lat}, ${lng}` : null
|
|
1339
|
+
].filter(Boolean).join("\n");
|
|
1340
|
+
const reviewsSection = (() => {
|
|
1341
|
+
if (reviewsStatus === "not_requested") return "";
|
|
1342
|
+
if (reviewsStatus === "unavailable") return "\n## Reviews\n> Reviews could not be retrieved this run \u2014 retry with `includeReviews: true`.";
|
|
1343
|
+
if (reviewsStatus === "none_exist") return "\n## Reviews\n*This business has no reviews on Google Maps.*";
|
|
1344
|
+
if (reviews.length === 0) return "\n## Reviews\n*0 reviews collected.*";
|
|
1345
|
+
return `
|
|
1346
|
+
## Reviews (${reviews.length})
|
|
1347
|
+
${reviews.map((r, i) => {
|
|
1348
|
+
const starsN = parseInt(r.stars ?? "0");
|
|
1349
|
+
const stars = "\u2605".repeat(starsN) + "\u2606".repeat(5 - starsN);
|
|
1350
|
+
return `### ${i + 1}. ${r.author ?? "Anonymous"} \u2014 ${stars}
|
|
1351
|
+
*${r.date ?? ""}*
|
|
1352
|
+
|
|
1353
|
+
${r.text ?? ""}`;
|
|
1354
|
+
}).join("\n\n")}`;
|
|
1355
|
+
})();
|
|
1356
|
+
const full = [
|
|
1357
|
+
`# ${name}`,
|
|
1358
|
+
category ? `*${category}*` : null,
|
|
1359
|
+
ratingLine ? `
|
|
1360
|
+
**Rating:** ${ratingLine}` : null,
|
|
1361
|
+
basicLines ? `
|
|
1362
|
+
${basicLines}` : null,
|
|
1363
|
+
hoursSection,
|
|
1364
|
+
histSection,
|
|
1365
|
+
topicsSection,
|
|
1366
|
+
aboutSection,
|
|
1367
|
+
entitySection ? `
|
|
1368
|
+
## Entity IDs
|
|
1369
|
+
${entitySection}` : null,
|
|
1370
|
+
reviewsSection,
|
|
1371
|
+
durationMs != null ? `
|
|
1372
|
+
---
|
|
1373
|
+
*Extracted in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
1374
|
+
].filter(Boolean).join("\n");
|
|
1375
|
+
return {
|
|
1376
|
+
...oneBlock(full),
|
|
1377
|
+
structuredContent: {
|
|
1378
|
+
name,
|
|
1379
|
+
rating: rating ?? null,
|
|
1380
|
+
reviewCount: reviewCount ?? null,
|
|
1381
|
+
category: category ?? null,
|
|
1382
|
+
address: address ?? null,
|
|
1383
|
+
phone: phone ?? null,
|
|
1384
|
+
website: website ?? null,
|
|
1385
|
+
hoursSummary: hoursSummary ?? null,
|
|
1386
|
+
bookingUrl: bookingUrl ?? null,
|
|
1387
|
+
kgmid: kgmid ?? null,
|
|
1388
|
+
cidDecimal: cidDecimal ?? null,
|
|
1389
|
+
cidUrl: cidUrl ?? null,
|
|
1390
|
+
lat: lat ?? null,
|
|
1391
|
+
lng: lng ?? null,
|
|
1392
|
+
reviewsStatus,
|
|
1393
|
+
reviewsCollected: reviews.length,
|
|
1394
|
+
reviewTopics: topics.map((t) => ({ label: String(t.label ?? ""), count: String(t.count ?? "") }))
|
|
1395
|
+
}
|
|
1396
|
+
};
|
|
1397
|
+
}
|
|
1398
|
+
function formatFacebookAdTranscribe(raw, input) {
|
|
1399
|
+
const parsed = parseData(raw);
|
|
1400
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
1401
|
+
const d = parsed.data;
|
|
1402
|
+
const text = d.text ?? "";
|
|
1403
|
+
const chunks = d.chunks ?? [];
|
|
1404
|
+
const durSec = d.durationMs ? (d.durationMs / 1e3).toFixed(0) : "\u2014";
|
|
1405
|
+
const chunkRows = chunks.slice(0, 50).map((c) => {
|
|
1406
|
+
const sec = Number.isFinite(c.timestamp[0]) ? Math.floor(c.timestamp[0]) : 0;
|
|
1407
|
+
const mm = String(Math.floor(sec / 60)).padStart(2, "0");
|
|
1408
|
+
const ss = String(sec % 60).padStart(2, "0");
|
|
1409
|
+
return `| ${mm}:${ss} | ${cell(truncate(c.text, 120))} |`;
|
|
1410
|
+
}).join("\n");
|
|
1411
|
+
const full = [
|
|
1412
|
+
`# Facebook Ad Transcript`,
|
|
1413
|
+
`**Duration:** ${durSec}s \xB7 **${text.split(" ").length} words**`,
|
|
1414
|
+
`
|
|
1415
|
+
## Full Transcript
|
|
1416
|
+
${text}`,
|
|
1417
|
+
chunks.length ? `
|
|
1418
|
+
## Timestamped Chunks
|
|
1419
|
+
| Time | Text |
|
|
1420
|
+
|------|------|
|
|
1421
|
+
${chunkRows}` : "",
|
|
1422
|
+
`
|
|
1423
|
+
---
|
|
1424
|
+
\u{1F4A1} Get more ads from this advertiser: use \`facebook_page_intel\``
|
|
1425
|
+
].filter(Boolean).join("\n");
|
|
1426
|
+
return oneBlock(full);
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
// src/mcp/mcp-tool-schemas.ts
|
|
1430
|
+
var import_zod2 = require("zod");
|
|
1431
|
+
var HarvestPaaInputSchema = {
|
|
1432
|
+
query: import_zod2.z.string().min(1).describe('Core search topic only. If the user says "best hvac company in Denver CO", use query="best hvac company" and location="Denver, CO". Do not include the location in query when it can be separated.'),
|
|
1433
|
+
location: import_zod2.z.string().optional().describe('City, region, or country for geo-targeted results, inferred from the user request when present, e.g. "Denver, CO", "Tokyo, Japan", "London, UK".'),
|
|
1434
|
+
maxQuestions: import_zod2.z.number().int().min(1).max(200).default(30).describe("Number of PAA questions to extract. Default 30. Maximum 200. Use 10 for quick probes, 30 for normal research, 100-200 when the user asks for everything/full/deep research. Larger harvests get a longer server time budget (151-200 questions \u2192 up to 280s). Credits are charged by extracted question; unused request hold is refunded."),
|
|
1435
|
+
gl: import_zod2.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
|
|
1436
|
+
hl: import_zod2.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
|
|
1437
|
+
device: import_zod2.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
1438
|
+
proxyMode: import_zod2.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
1439
|
+
proxyZip: import_zod2.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1440
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
|
|
1441
|
+
};
|
|
1442
|
+
var ExtractUrlInputSchema = {
|
|
1443
|
+
url: import_zod2.z.string().url().describe("Public http/https URL to extract. Use this when the user provides one specific page URL."),
|
|
1444
|
+
screenshot: import_zod2.z.boolean().default(false).describe("Also capture a full-page screenshot of the URL. Saved to ~/Downloads/mcp-scraper/screenshots/ and returned inline. Use when the user asks to see or capture the page visually."),
|
|
1445
|
+
screenshotDevice: import_zod2.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport for screenshot. desktop = 1440\xD7900. mobile = 390\xD7844. Default desktop."),
|
|
1446
|
+
extractBranding: import_zod2.z.boolean().default(false).describe("Extract brand colors, fonts, logo, and favicon using a rendered browser session. Returns colorScheme (light/dark), colors (primary/accent/background/text/heading as hex), fonts (heading/body family names), and assets (logo URL, favicon URL). Use when the user asks about brand colors, site theme, or brand assets."),
|
|
1447
|
+
downloadMedia: import_zod2.z.boolean().default(false).describe("Extract and download all page media (images, video, audio) to ~/Downloads/mcp-scraper/media/. Ad networks, tracking pixels, and noise URLs are filtered automatically. Use when the user asks to download or harvest assets from a page."),
|
|
1448
|
+
mediaTypes: import_zod2.z.array(import_zod2.z.enum(["image", "video", "audio"])).default(["image", "video", "audio"]).describe("Which media types to download. Default all three."),
|
|
1449
|
+
allowLocal: import_zod2.z.boolean().default(false).describe("Allow localhost and private-network URLs. For local development only.")
|
|
1450
|
+
};
|
|
1451
|
+
var MapSiteUrlsInputSchema = {
|
|
1452
|
+
url: import_zod2.z.string().url().describe("Public website URL or domain to crawl for internal URLs. Use before extract_site when the user asks to audit/map/crawl a site."),
|
|
1453
|
+
maxUrls: import_zod2.z.number().int().min(1).max(500).optional().describe("Maximum URLs to discover. Use 100 for normal maps, higher when the user asks for a full inventory.")
|
|
1454
|
+
};
|
|
1455
|
+
var ExtractSiteInputSchema = {
|
|
1456
|
+
url: import_zod2.z.string().url().describe("Public website URL or domain to extract across multiple pages. Use when the user asks for a site audit, website crawl, or full-site content/schema extraction."),
|
|
1457
|
+
maxPages: import_zod2.z.number().int().min(1).max(50).optional().describe("Maximum pages to extract. Use 50 when the user asks for full results or a complete crawl within MCP limits.")
|
|
1458
|
+
};
|
|
1459
|
+
var YoutubeHarvestInputSchema = {
|
|
1460
|
+
mode: import_zod2.z.enum(["search", "channel"]).describe("Use search for topic/keyword requests. Use channel when the user provides @handle, channel ID, or channel URL."),
|
|
1461
|
+
query: import_zod2.z.string().optional().describe("Required when mode is search. The YouTube search topic in the user\u2019s words."),
|
|
1462
|
+
channelHandle: import_zod2.z.string().optional().describe("YouTube channel handle, channel ID, or URL. Examples: @mkbhd, UC..., https://youtube.com/@mkbhd."),
|
|
1463
|
+
maxVideos: import_zod2.z.number().int().min(1).max(500).default(50).describe("Number of videos to return. Default 50. Increase when user asks for full channel/history.")
|
|
1464
|
+
};
|
|
1465
|
+
var YoutubeTranscribeInputSchema = {
|
|
1466
|
+
videoId: import_zod2.z.string().min(1).describe("YouTube video ID, e.g. dQw4w9WgXcQ")
|
|
1467
|
+
};
|
|
1468
|
+
var FacebookPageIntelInputSchema = {
|
|
1469
|
+
pageId: import_zod2.z.string().optional(),
|
|
1470
|
+
libraryId: import_zod2.z.string().optional(),
|
|
1471
|
+
query: import_zod2.z.string().optional().describe("Advertiser or brand name when pageId/libraryId is not known. One of pageId, libraryId, or query is required."),
|
|
1472
|
+
maxAds: import_zod2.z.number().int().min(1).max(200).default(50),
|
|
1473
|
+
country: import_zod2.z.string().length(2).default("US")
|
|
1474
|
+
};
|
|
1475
|
+
var FacebookAdSearchInputSchema = {
|
|
1476
|
+
query: import_zod2.z.string().min(1).describe("Advertiser, brand, competitor, niche, or keyword to search in Facebook Ad Library."),
|
|
1477
|
+
country: import_zod2.z.string().length(2).default("US"),
|
|
1478
|
+
maxResults: import_zod2.z.number().int().min(1).max(20).default(10)
|
|
1479
|
+
};
|
|
1480
|
+
var FacebookAdTranscribeInputSchema = {
|
|
1481
|
+
videoUrl: import_zod2.z.string().url().describe("Facebook CDN video URL from a facebook_page_intel result")
|
|
1482
|
+
};
|
|
1483
|
+
var MapsPlaceIntelInputSchema = {
|
|
1484
|
+
businessName: import_zod2.z.string().min(1).describe('Business name only. If user says "Elite Roofing Denver CO", use businessName="Elite Roofing" and location="Denver, CO".'),
|
|
1485
|
+
location: import_zod2.z.string().min(1).describe('City/region/country where the business should be searched, e.g. "Denver, CO". Infer from the user request when possible.'),
|
|
1486
|
+
gl: import_zod2.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
1487
|
+
hl: import_zod2.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
1488
|
+
includeReviews: import_zod2.z.boolean().default(false).describe("Whether to fetch individual review cards"),
|
|
1489
|
+
maxReviews: import_zod2.z.number().int().min(1).max(500).default(50).describe("Max review cards to return (requires includeReviews: true)")
|
|
1490
|
+
};
|
|
1491
|
+
var MapsSearchInputSchema = {
|
|
1492
|
+
query: import_zod2.z.string().min(1).describe('Business category, niche, keyword, or search term. If the user says "roofers in Denver CO", use query="roofers" and location="Denver, CO". Do not put the location here when it can be separated.'),
|
|
1493
|
+
location: import_zod2.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
1494
|
+
gl: import_zod2.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
1495
|
+
hl: import_zod2.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
1496
|
+
maxResults: import_zod2.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more."),
|
|
1497
|
+
proxyMode: import_zod2.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state Maps searches; it creates a fresh residential proxy ID when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1498
|
+
proxyZip: import_zod2.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or city-center ZIP."),
|
|
1499
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics when debugging Maps localization, CAPTCHA, or proxy behavior.")
|
|
1500
|
+
};
|
|
1501
|
+
var DirectoryWorkflowInputSchema = {
|
|
1502
|
+
query: import_zod2.z.string().min(1).describe("Business category, niche, or keyword to search on Google Maps for every selected market, e.g. roofers, dentists, med spas. Do not include the city here."),
|
|
1503
|
+
state: import_zod2.z.string().min(2).default("TN").describe("US state abbreviation or state name used to select Census places, e.g. TN or Tennessee."),
|
|
1504
|
+
minPopulation: import_zod2.z.number().int().min(0).default(1e5).describe('Minimum Census place population for market selection. Use 100000 for "cities above 100k population".'),
|
|
1505
|
+
populationYear: import_zod2.z.number().int().min(2020).max(2025).default(2025).describe("Census population estimate year from the 2020-2025 Population Estimates Program city/place dataset."),
|
|
1506
|
+
maxCities: import_zod2.z.number().int().min(1).max(100).default(25).describe("Maximum number of markets to process after sorting by population descending."),
|
|
1507
|
+
maxResultsPerCity: import_zod2.z.number().int().min(1).max(50).default(50).describe("Google Maps business/profile candidates to collect for each city. Maximum 50."),
|
|
1508
|
+
concurrency: import_zod2.z.number().int().min(1).max(5).default(5).describe("How many city Maps searches to run in parallel. Use 5 for broad directory batches unless debugging."),
|
|
1509
|
+
includeZipGroups: import_zod2.z.boolean().default(true).describe("Attach ZIP groups from a configured US ZIPS CSV when available. Set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath in local/test mode."),
|
|
1510
|
+
usZipsCsvPath: import_zod2.z.string().optional().describe("Local/test-only path to a US ZIPS CSV with state_abbr, zipcode, county, city columns, such as Lead Magician tools/analytics/data/uszips.csv. Deployed APIs should use MCP_SCRAPER_USZIPS_CSV_PATH instead."),
|
|
1511
|
+
saveCsv: import_zod2.z.boolean().default(true).describe("Save a directory-ready CSV to the MCP Scraper output directory and return its path. CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, CID fields, population, and ZIP groups."),
|
|
1512
|
+
proxyMode: import_zod2.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode for every city Maps search. Use location by default for US city/state batches; it creates fresh residential proxy IDs when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1513
|
+
proxyZip: import_zod2.z.string().regex(/^\d{5}$/).optional().describe("Optional ZIP override for proxy targeting. Normally omit it so each city can use its Lead Magician ZIP group or city/state location."),
|
|
1514
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics in each Maps browser session when supported.")
|
|
1515
|
+
};
|
|
1516
|
+
var NullableString = import_zod2.z.string().nullable();
|
|
1517
|
+
var MapsSearchOutputSchema = {
|
|
1518
|
+
query: import_zod2.z.string(),
|
|
1519
|
+
location: import_zod2.z.string().nullable(),
|
|
1520
|
+
searchQuery: import_zod2.z.string(),
|
|
1521
|
+
searchUrl: import_zod2.z.string().url(),
|
|
1522
|
+
extractedAt: import_zod2.z.string(),
|
|
1523
|
+
requestedMaxResults: import_zod2.z.number().int().min(1).max(50),
|
|
1524
|
+
resultCount: import_zod2.z.number().int().min(0).max(50),
|
|
1525
|
+
results: import_zod2.z.array(import_zod2.z.object({
|
|
1526
|
+
position: import_zod2.z.number().int().min(1),
|
|
1527
|
+
name: import_zod2.z.string(),
|
|
1528
|
+
placeUrl: import_zod2.z.string().url(),
|
|
1529
|
+
cid: NullableString,
|
|
1530
|
+
cidDecimal: NullableString,
|
|
1531
|
+
rating: NullableString,
|
|
1532
|
+
reviewCount: NullableString,
|
|
1533
|
+
category: NullableString,
|
|
1534
|
+
address: NullableString,
|
|
1535
|
+
phone: NullableString,
|
|
1536
|
+
hoursStatus: NullableString,
|
|
1537
|
+
websiteUrl: NullableString,
|
|
1538
|
+
directionsUrl: NullableString,
|
|
1539
|
+
metadata: import_zod2.z.array(import_zod2.z.string())
|
|
1540
|
+
})),
|
|
1541
|
+
durationMs: import_zod2.z.number().int().min(0)
|
|
1542
|
+
};
|
|
1543
|
+
var DirectoryMapsBusinessOutput = import_zod2.z.object({
|
|
1544
|
+
position: import_zod2.z.number().int().min(1),
|
|
1545
|
+
name: import_zod2.z.string(),
|
|
1546
|
+
placeUrl: import_zod2.z.string().url(),
|
|
1547
|
+
cid: NullableString,
|
|
1548
|
+
cidDecimal: NullableString,
|
|
1549
|
+
rating: NullableString,
|
|
1550
|
+
reviewCount: NullableString,
|
|
1551
|
+
category: NullableString,
|
|
1552
|
+
address: NullableString,
|
|
1553
|
+
phone: NullableString,
|
|
1554
|
+
hoursStatus: NullableString,
|
|
1555
|
+
websiteUrl: NullableString,
|
|
1556
|
+
directionsUrl: NullableString,
|
|
1557
|
+
metadata: import_zod2.z.array(import_zod2.z.string())
|
|
1558
|
+
});
|
|
1559
|
+
var DirectoryWorkflowOutputSchema = {
|
|
1560
|
+
query: import_zod2.z.string(),
|
|
1561
|
+
state: import_zod2.z.string(),
|
|
1562
|
+
minPopulation: import_zod2.z.number().int().min(0),
|
|
1563
|
+
populationYear: import_zod2.z.number().int().min(2020).max(2025),
|
|
1564
|
+
maxResultsPerCity: import_zod2.z.number().int().min(1).max(50),
|
|
1565
|
+
concurrency: import_zod2.z.number().int().min(1).max(5),
|
|
1566
|
+
censusSourceUrl: import_zod2.z.string().url(),
|
|
1567
|
+
usZipsSourcePath: NullableString,
|
|
1568
|
+
warnings: import_zod2.z.array(import_zod2.z.string()),
|
|
1569
|
+
extractedAt: import_zod2.z.string(),
|
|
1570
|
+
selectedCityCount: import_zod2.z.number().int().min(0),
|
|
1571
|
+
totalResultCount: import_zod2.z.number().int().min(0),
|
|
1572
|
+
csvPath: NullableString,
|
|
1573
|
+
cities: import_zod2.z.array(import_zod2.z.object({
|
|
1574
|
+
city: import_zod2.z.string(),
|
|
1575
|
+
state: import_zod2.z.string(),
|
|
1576
|
+
location: import_zod2.z.string(),
|
|
1577
|
+
cityKey: import_zod2.z.string(),
|
|
1578
|
+
censusName: import_zod2.z.string(),
|
|
1579
|
+
population: import_zod2.z.number().int().min(0),
|
|
1580
|
+
populationYear: import_zod2.z.number().int().min(2020).max(2025),
|
|
1581
|
+
zips: import_zod2.z.array(import_zod2.z.string()),
|
|
1582
|
+
counties: import_zod2.z.array(import_zod2.z.string()),
|
|
1583
|
+
status: import_zod2.z.enum(["ok", "empty", "failed"]),
|
|
1584
|
+
error: NullableString,
|
|
1585
|
+
resultCount: import_zod2.z.number().int().min(0),
|
|
1586
|
+
durationMs: import_zod2.z.number().int().min(0),
|
|
1587
|
+
results: import_zod2.z.array(DirectoryMapsBusinessOutput)
|
|
1588
|
+
})),
|
|
1589
|
+
durationMs: import_zod2.z.number().int().min(0)
|
|
1590
|
+
};
|
|
1591
|
+
var OrganicResultOutput = import_zod2.z.object({
|
|
1592
|
+
position: import_zod2.z.number().int(),
|
|
1593
|
+
title: import_zod2.z.string(),
|
|
1594
|
+
url: import_zod2.z.string(),
|
|
1595
|
+
domain: import_zod2.z.string(),
|
|
1596
|
+
snippet: NullableString
|
|
1597
|
+
});
|
|
1598
|
+
var AiOverviewOutput = import_zod2.z.object({
|
|
1599
|
+
detected: import_zod2.z.boolean(),
|
|
1600
|
+
text: NullableString
|
|
1601
|
+
}).nullable();
|
|
1602
|
+
var EntityIdsOutput = import_zod2.z.object({
|
|
1603
|
+
kgIds: import_zod2.z.array(import_zod2.z.string()),
|
|
1604
|
+
cids: import_zod2.z.array(import_zod2.z.string()),
|
|
1605
|
+
gcids: import_zod2.z.array(import_zod2.z.string())
|
|
1606
|
+
}).nullable();
|
|
1607
|
+
var HarvestPaaOutputSchema = {
|
|
1608
|
+
query: import_zod2.z.string(),
|
|
1609
|
+
location: NullableString,
|
|
1610
|
+
questionCount: import_zod2.z.number().int().min(0),
|
|
1611
|
+
completionStatus: NullableString,
|
|
1612
|
+
questions: import_zod2.z.array(import_zod2.z.object({
|
|
1613
|
+
question: import_zod2.z.string(),
|
|
1614
|
+
answer: NullableString,
|
|
1615
|
+
sourceTitle: NullableString,
|
|
1616
|
+
sourceSite: NullableString
|
|
1617
|
+
})),
|
|
1618
|
+
organicResults: import_zod2.z.array(OrganicResultOutput),
|
|
1619
|
+
aiOverview: AiOverviewOutput,
|
|
1620
|
+
entityIds: EntityIdsOutput,
|
|
1621
|
+
durationMs: import_zod2.z.number().min(0).nullable()
|
|
1622
|
+
};
|
|
1623
|
+
var SearchSerpOutputSchema = {
|
|
1624
|
+
query: import_zod2.z.string(),
|
|
1625
|
+
location: NullableString,
|
|
1626
|
+
organicResults: import_zod2.z.array(OrganicResultOutput),
|
|
1627
|
+
localPack: import_zod2.z.array(import_zod2.z.object({
|
|
1628
|
+
position: import_zod2.z.number().int(),
|
|
1629
|
+
name: import_zod2.z.string(),
|
|
1630
|
+
rating: NullableString,
|
|
1631
|
+
reviewCount: NullableString,
|
|
1632
|
+
websiteUrl: NullableString
|
|
1633
|
+
})),
|
|
1634
|
+
aiOverview: AiOverviewOutput,
|
|
1635
|
+
entityIds: EntityIdsOutput
|
|
1636
|
+
};
|
|
1637
|
+
var ExtractUrlOutputSchema = {
|
|
1638
|
+
url: import_zod2.z.string(),
|
|
1639
|
+
title: NullableString,
|
|
1640
|
+
headings: import_zod2.z.array(import_zod2.z.object({
|
|
1641
|
+
level: import_zod2.z.number().int(),
|
|
1642
|
+
text: import_zod2.z.string()
|
|
1643
|
+
})),
|
|
1644
|
+
schemaBlockCount: import_zod2.z.number().int().min(0),
|
|
1645
|
+
entityName: NullableString,
|
|
1646
|
+
entityTypes: import_zod2.z.array(import_zod2.z.string()),
|
|
1647
|
+
napScore: import_zod2.z.number().nullable(),
|
|
1648
|
+
missingSchemaFields: import_zod2.z.array(import_zod2.z.string()),
|
|
1649
|
+
screenshotSaved: NullableString
|
|
1650
|
+
};
|
|
1651
|
+
var ExtractSiteOutputSchema = {
|
|
1652
|
+
url: import_zod2.z.string(),
|
|
1653
|
+
pageCount: import_zod2.z.number().int().min(0),
|
|
1654
|
+
pages: import_zod2.z.array(import_zod2.z.object({
|
|
1655
|
+
url: import_zod2.z.string(),
|
|
1656
|
+
title: NullableString,
|
|
1657
|
+
schemaTypes: import_zod2.z.array(import_zod2.z.string())
|
|
1658
|
+
})),
|
|
1659
|
+
durationMs: import_zod2.z.number().min(0)
|
|
1660
|
+
};
|
|
1661
|
+
var MapsPlaceIntelOutputSchema = {
|
|
1662
|
+
name: import_zod2.z.string(),
|
|
1663
|
+
rating: NullableString,
|
|
1664
|
+
reviewCount: NullableString,
|
|
1665
|
+
category: NullableString,
|
|
1666
|
+
address: NullableString,
|
|
1667
|
+
phone: NullableString,
|
|
1668
|
+
website: NullableString,
|
|
1669
|
+
hoursSummary: NullableString,
|
|
1670
|
+
bookingUrl: NullableString,
|
|
1671
|
+
kgmid: NullableString,
|
|
1672
|
+
cidDecimal: NullableString,
|
|
1673
|
+
cidUrl: NullableString,
|
|
1674
|
+
lat: import_zod2.z.number().nullable(),
|
|
1675
|
+
lng: import_zod2.z.number().nullable(),
|
|
1676
|
+
reviewsStatus: import_zod2.z.string(),
|
|
1677
|
+
reviewsCollected: import_zod2.z.number().int().min(0),
|
|
1678
|
+
reviewTopics: import_zod2.z.array(import_zod2.z.object({
|
|
1679
|
+
label: import_zod2.z.string(),
|
|
1680
|
+
count: import_zod2.z.string()
|
|
1681
|
+
}))
|
|
1682
|
+
};
|
|
1683
|
+
var CreditsInfoOutputSchema = {
|
|
1684
|
+
balanceCredits: import_zod2.z.number().nullable(),
|
|
1685
|
+
matchedCost: import_zod2.z.object({
|
|
1686
|
+
label: import_zod2.z.string(),
|
|
1687
|
+
credits: import_zod2.z.number(),
|
|
1688
|
+
unit: import_zod2.z.string(),
|
|
1689
|
+
notes: NullableString
|
|
1690
|
+
}).nullable(),
|
|
1691
|
+
costs: import_zod2.z.array(import_zod2.z.object({
|
|
1692
|
+
key: import_zod2.z.string(),
|
|
1693
|
+
label: import_zod2.z.string(),
|
|
1694
|
+
credits: import_zod2.z.number(),
|
|
1695
|
+
unit: import_zod2.z.string(),
|
|
1696
|
+
notes: NullableString
|
|
1697
|
+
})),
|
|
1698
|
+
ledger: import_zod2.z.array(import_zod2.z.object({
|
|
1699
|
+
createdAt: import_zod2.z.string(),
|
|
1700
|
+
operation: import_zod2.z.string(),
|
|
1701
|
+
credits: import_zod2.z.number(),
|
|
1702
|
+
description: NullableString
|
|
1703
|
+
}))
|
|
1704
|
+
};
|
|
1705
|
+
var MapSiteUrlsOutputSchema = {
|
|
1706
|
+
startUrl: import_zod2.z.string(),
|
|
1707
|
+
totalFound: import_zod2.z.number().int().min(0),
|
|
1708
|
+
truncated: import_zod2.z.boolean(),
|
|
1709
|
+
okCount: import_zod2.z.number().int().min(0),
|
|
1710
|
+
redirectCount: import_zod2.z.number().int().min(0),
|
|
1711
|
+
brokenCount: import_zod2.z.number().int().min(0),
|
|
1712
|
+
urls: import_zod2.z.array(import_zod2.z.object({
|
|
1713
|
+
url: import_zod2.z.string(),
|
|
1714
|
+
status: import_zod2.z.number().int().nullable()
|
|
1715
|
+
})),
|
|
1716
|
+
durationMs: import_zod2.z.number().min(0)
|
|
1717
|
+
};
|
|
1718
|
+
var YoutubeHarvestOutputSchema = {
|
|
1719
|
+
mode: import_zod2.z.string(),
|
|
1720
|
+
videoCount: import_zod2.z.number().int().min(0),
|
|
1721
|
+
channel: import_zod2.z.object({
|
|
1722
|
+
title: NullableString,
|
|
1723
|
+
subscriberCount: NullableString
|
|
1724
|
+
}).nullable(),
|
|
1725
|
+
videos: import_zod2.z.array(import_zod2.z.object({
|
|
1726
|
+
videoId: import_zod2.z.string(),
|
|
1727
|
+
title: import_zod2.z.string(),
|
|
1728
|
+
channelName: NullableString,
|
|
1729
|
+
views: NullableString,
|
|
1730
|
+
duration: NullableString,
|
|
1731
|
+
url: NullableString
|
|
1732
|
+
}))
|
|
1733
|
+
};
|
|
1734
|
+
var FacebookAdSearchOutputSchema = {
|
|
1735
|
+
query: import_zod2.z.string(),
|
|
1736
|
+
advertiserCount: import_zod2.z.number().int().min(0),
|
|
1737
|
+
advertisers: import_zod2.z.array(import_zod2.z.object({
|
|
1738
|
+
name: NullableString,
|
|
1739
|
+
adCount: import_zod2.z.number().int().nullable(),
|
|
1740
|
+
libraryId: NullableString
|
|
1741
|
+
}))
|
|
1742
|
+
};
|
|
1743
|
+
var FacebookPageIntelOutputSchema = {
|
|
1744
|
+
advertiserName: NullableString,
|
|
1745
|
+
totalAds: import_zod2.z.number().int().min(0),
|
|
1746
|
+
activeCount: import_zod2.z.number().int().min(0),
|
|
1747
|
+
videoCount: import_zod2.z.number().int().min(0),
|
|
1748
|
+
imageCount: import_zod2.z.number().int().min(0),
|
|
1749
|
+
ads: import_zod2.z.array(import_zod2.z.object({
|
|
1750
|
+
libraryId: NullableString,
|
|
1751
|
+
status: NullableString,
|
|
1752
|
+
creativeType: NullableString,
|
|
1753
|
+
headline: NullableString,
|
|
1754
|
+
cta: NullableString,
|
|
1755
|
+
startDate: NullableString,
|
|
1756
|
+
videoUrl: NullableString,
|
|
1757
|
+
variations: import_zod2.z.number().int().nullable()
|
|
1758
|
+
}))
|
|
1759
|
+
};
|
|
1760
|
+
var CreditsInfoInputSchema = {
|
|
1761
|
+
item: import_zod2.z.string().optional().describe('Optional tool, action, or feature to look up, e.g. "maps reviews", "extract_url", or "YouTube transcription"'),
|
|
1762
|
+
includeLedger: import_zod2.z.boolean().default(false).describe("Whether to include recent credit ledger entries")
|
|
1763
|
+
};
|
|
1764
|
+
var SearchSerpInputSchema = {
|
|
1765
|
+
query: import_zod2.z.string().min(1).describe('Core search topic only. Separate location when possible. If user says "best dentist in Brooklyn NY serp", use query="best dentist" and location="Brooklyn, NY".'),
|
|
1766
|
+
location: import_zod2.z.string().optional().describe("City, region, or country for geo-targeted results, inferred from user request when present."),
|
|
1767
|
+
gl: import_zod2.z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
|
|
1768
|
+
hl: import_zod2.z.string().default("en").describe("Google interface/content language inferred from user request."),
|
|
1769
|
+
device: import_zod2.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
1770
|
+
proxyMode: import_zod2.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
1771
|
+
proxyZip: import_zod2.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1772
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
|
|
1773
|
+
pages: import_zod2.z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
|
|
1774
|
+
};
|
|
1775
|
+
var CaptureSerpSnapshotInputSchema = {
|
|
1776
|
+
query: import_zod2.z.string().min(1).describe("Core search query to capture as a structured SERP Intelligence snapshot. Separate the place into location when the user gives a city, region, country, or ZIP."),
|
|
1777
|
+
location: import_zod2.z.string().optional().describe("City, region, country, or service area used for localized Google results. MCP Scraper records location evidence; UULE alone is not proof of localization."),
|
|
1778
|
+
gl: import_zod2.z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
|
|
1779
|
+
hl: import_zod2.z.string().default("en").describe("Google interface/content language inferred from the user request."),
|
|
1780
|
+
device: import_zod2.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
|
|
1781
|
+
proxyMode: import_zod2.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized US residential evidence; it creates a fresh proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static residential proxy, and none only for direct-network debugging."),
|
|
1782
|
+
proxyZip: import_zod2.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1783
|
+
pages: import_zod2.z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
|
|
1784
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
|
|
1785
|
+
includePageSnapshots: import_zod2.z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
|
|
1786
|
+
pageSnapshotLimit: import_zod2.z.number().int().min(0).max(10).default(0).describe("Maximum ranking-page snapshots to capture when includePageSnapshots is true. Use 0 when only SERP evidence is needed.")
|
|
1787
|
+
};
|
|
1788
|
+
var ScreenshotInputSchema = {
|
|
1789
|
+
url: import_zod2.z.string().url().describe("URL to capture as a full-page screenshot. Use http or https. Pass allowLocal: true to capture localhost or private-network URLs during development."),
|
|
1790
|
+
device: import_zod2.z.enum(["desktop", "mobile"]).default("desktop").describe("Viewport profile. desktop = 1440\xD7900. mobile = 390\xD7844. Use desktop by default; use mobile when the user asks for a mobile view."),
|
|
1791
|
+
allowLocal: import_zod2.z.boolean().default(false).describe("Allow localhost and private-network URLs (127.x, 192.168.x, 10.x, etc.). For local development only \u2014 not for production use.")
|
|
1792
|
+
};
|
|
1793
|
+
var CaptureSerpPageSnapshotsInputSchema = {
|
|
1794
|
+
urls: import_zod2.z.array(import_zod2.z.string().url()).min(1).max(25).describe("Public HTTP/HTTPS URLs to capture as SERP Intelligence page snapshots. Do not pass localhost, private IPs, file URLs, or internal admin URLs."),
|
|
1795
|
+
targets: import_zod2.z.array(import_zod2.z.object({
|
|
1796
|
+
url: import_zod2.z.string().url().describe("Public HTTP/HTTPS URL to capture."),
|
|
1797
|
+
sourceKind: import_zod2.z.enum(["organic", "ai_citation", "local_pack_website", "configured_target", "site_subject"]).default("configured_target").describe("Why this page is being captured for SERP Intelligence evidence."),
|
|
1798
|
+
sourcePosition: import_zod2.z.number().int().min(1).optional().describe("Ranking or citation position when the page came from SERP evidence.")
|
|
1799
|
+
}).strict()).min(1).max(25).optional().describe("Structured page snapshot targets. Use this instead of urls when source kind or position should be preserved."),
|
|
1800
|
+
maxConcurrency: import_zod2.z.number().int().min(1).max(5).default(2).describe("Parallel page captures. Use 2 normally; higher values can increase proxy/browser pressure."),
|
|
1801
|
+
timeoutMs: import_zod2.z.number().int().min(1e3).max(6e4).default(15e3).describe("Per-page capture timeout in milliseconds. Increase for slow pages; timeout artifacts are returned as structured capture failures."),
|
|
1802
|
+
debug: import_zod2.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics for page snapshot debugging. Use true for capture, network, or proxy troubleshooting.")
|
|
1803
|
+
};
|
|
1804
|
+
|
|
1805
|
+
// src/mcp/paa-mcp-server.ts
|
|
1806
|
+
function liveWebToolAnnotations(title) {
|
|
1807
|
+
return {
|
|
1808
|
+
title,
|
|
1809
|
+
readOnlyHint: true,
|
|
1810
|
+
destructiveHint: false,
|
|
1811
|
+
idempotentHint: false,
|
|
1812
|
+
openWorldHint: true
|
|
1813
|
+
};
|
|
1814
|
+
}
|
|
1815
|
+
function listSavedReports() {
|
|
1816
|
+
try {
|
|
1817
|
+
const dir = outputBaseDir2();
|
|
1818
|
+
return (0, import_node_fs3.readdirSync)(dir).filter((f) => f.endsWith(".md")).map((f) => ({ filename: f, mtimeMs: (0, import_node_fs3.statSync)((0, import_node_path3.join)(dir, f)).mtimeMs })).sort((a, b) => b.mtimeMs - a.mtimeMs).slice(0, 100);
|
|
1819
|
+
} catch {
|
|
1820
|
+
return [];
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
function registerSavedReportResources(server2) {
|
|
1824
|
+
server2.registerResource(
|
|
1825
|
+
"saved-report",
|
|
1826
|
+
new import_mcp2.ResourceTemplate("report://{filename}", {
|
|
1827
|
+
list: () => ({
|
|
1828
|
+
resources: listSavedReports().map((r) => ({
|
|
1829
|
+
uri: `report://${encodeURIComponent(r.filename)}`,
|
|
1830
|
+
name: r.filename,
|
|
1831
|
+
mimeType: "text/markdown"
|
|
1832
|
+
}))
|
|
1833
|
+
})
|
|
1834
|
+
}),
|
|
1835
|
+
{
|
|
1836
|
+
title: "Saved MCP Scraper Reports",
|
|
1837
|
+
description: "Markdown research reports saved by previous MCP Scraper tool calls. Read a report to reuse prior research without re-scraping or spending credits.",
|
|
1838
|
+
mimeType: "text/markdown"
|
|
1839
|
+
},
|
|
1840
|
+
async (uri, variables) => {
|
|
1841
|
+
const requested = Array.isArray(variables.filename) ? variables.filename[0] : variables.filename;
|
|
1842
|
+
const filename = (0, import_node_path3.basename)(decodeURIComponent(String(requested ?? "")));
|
|
1843
|
+
if (!filename.endsWith(".md")) throw new Error("Only saved .md reports can be read");
|
|
1844
|
+
const text = (0, import_node_fs3.readFileSync)((0, import_node_path3.join)(outputBaseDir2(), filename), "utf8");
|
|
1845
|
+
return { contents: [{ uri: uri.href, mimeType: "text/markdown", text }] };
|
|
1846
|
+
}
|
|
1847
|
+
);
|
|
1848
|
+
}
|
|
1849
|
+
function registerPaaExtractorMcpTools(server2, executor, options = {}) {
|
|
1850
|
+
const savesReports = options.savesReportsLocally !== false;
|
|
1851
|
+
const reportNote = savesReports ? " Saves a full Markdown report locally." : " Reports are returned inline; no files are saved on this hosted endpoint.";
|
|
1852
|
+
const withReportNote = (description) => `${description}${reportNote}`;
|
|
1853
|
+
if (savesReports) registerSavedReportResources(server2);
|
|
1854
|
+
server2.registerTool("harvest_paa", {
|
|
1855
|
+
title: "Google PAA + SERP Harvest",
|
|
1856
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). For US local SERPs, leave proxyMode as location so the service uses fresh residential proxy IDs across retries and rejects wrong-location evidence instead of returning a bad market. Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1857
|
+
inputSchema: HarvestPaaInputSchema,
|
|
1858
|
+
outputSchema: HarvestPaaOutputSchema,
|
|
1859
|
+
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
1860
|
+
}, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
|
|
1861
|
+
server2.registerTool("search_serp", {
|
|
1862
|
+
title: "Google SERP Lookup",
|
|
1863
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. For US city/state rankings, keep proxyMode as location and pass proxyZip when a city-center ZIP is known; location mode uses fresh residential proxy IDs and retries CAPTCHA, proxy tunnel failures, and wrong-location evidence before returning."),
|
|
1864
|
+
inputSchema: SearchSerpInputSchema,
|
|
1865
|
+
outputSchema: SearchSerpOutputSchema,
|
|
1866
|
+
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
1867
|
+
}, async (input) => formatSearchSerp(await executor.searchSerp(input), input));
|
|
1868
|
+
server2.registerTool("extract_url", {
|
|
1869
|
+
title: "Single URL Extract",
|
|
1870
|
+
description: withReportNote("Extract structured data from one public URL: page content as Markdown, heading structure, JSON-LD schema, entity details, NAP score, metadata, and missing schema fields. Use when the user provides a single URL or asks to inspect/scrape one page."),
|
|
1871
|
+
inputSchema: ExtractUrlInputSchema,
|
|
1872
|
+
outputSchema: ExtractUrlOutputSchema,
|
|
1873
|
+
annotations: liveWebToolAnnotations("Single URL Extract")
|
|
1874
|
+
}, async (input) => formatExtractUrl(await executor.extractUrl(input), input));
|
|
1875
|
+
server2.registerTool("map_site_urls", {
|
|
1876
|
+
title: "Site URL Map",
|
|
1877
|
+
description: withReportNote("Map/crawl a public website to build a URL inventory with HTTP status codes, broken links, redirects, and site scope. Use before extract_site for audits or when the user asks for a sitemap/URL inventory."),
|
|
1878
|
+
inputSchema: MapSiteUrlsInputSchema,
|
|
1879
|
+
outputSchema: MapSiteUrlsOutputSchema,
|
|
1880
|
+
annotations: liveWebToolAnnotations("Site URL Map")
|
|
1881
|
+
}, async (input) => formatMapSiteUrls(await executor.mapSiteUrls(input), input));
|
|
1882
|
+
server2.registerTool("extract_site", {
|
|
1883
|
+
title: "Multi-Page Site Extract",
|
|
1884
|
+
description: withReportNote("Run multi-page extraction across a public website. Returns per-page titles, H1s, metadata, headings, schema/entity data, canonical URLs, and content. Use for website audits, competitor audits, and full-site extraction."),
|
|
1885
|
+
inputSchema: ExtractSiteInputSchema,
|
|
1886
|
+
outputSchema: ExtractSiteOutputSchema,
|
|
1887
|
+
annotations: liveWebToolAnnotations("Multi-Page Site Extract")
|
|
1888
|
+
}, async (input) => formatExtractSite(await executor.extractSite(input), input));
|
|
1889
|
+
server2.registerTool("youtube_harvest", {
|
|
1890
|
+
title: "YouTube Video Harvest",
|
|
1891
|
+
description: withReportNote('Harvest YouTube video metadata by search query or channel handle/ID/URL. Use mode "search" for keyword/topic requests and mode "channel" for @handles, channel IDs, or channel URLs. Returns titles, views, dates, durations, URLs, thumbnails, and videoIds for follow-up transcription.'),
|
|
1892
|
+
inputSchema: YoutubeHarvestInputSchema,
|
|
1893
|
+
outputSchema: YoutubeHarvestOutputSchema,
|
|
1894
|
+
annotations: liveWebToolAnnotations("YouTube Video Harvest")
|
|
1895
|
+
}, async (input) => formatYoutubeHarvest(await executor.youtubeHarvest(input), input));
|
|
1896
|
+
server2.registerTool("youtube_transcribe", {
|
|
1897
|
+
title: "YouTube Transcription",
|
|
1898
|
+
description: withReportNote("Fetch and transcribe captions from a YouTube video. Returns full transcript, timestamped chunks, and word count. Pass a videoId from youtube_harvest results or infer it from a YouTube URL if the user provided one."),
|
|
1899
|
+
inputSchema: YoutubeTranscribeInputSchema,
|
|
1900
|
+
annotations: liveWebToolAnnotations("YouTube Transcription")
|
|
1901
|
+
}, async (input) => formatYoutubeTranscribe(await executor.youtubeTranscribe(input), input));
|
|
1902
|
+
server2.registerTool("facebook_page_intel", {
|
|
1903
|
+
title: "Facebook Advertiser Ad Intel",
|
|
1904
|
+
description: withReportNote("Harvest ads from a Facebook advertiser. Returns ad copy, headlines, CTAs, creative type, status, landing URLs, and video URLs ready for transcription. Accepts pageId, libraryId, or a brand/advertiser name as query. Use after facebook_ad_search when possible."),
|
|
1905
|
+
inputSchema: FacebookPageIntelInputSchema,
|
|
1906
|
+
outputSchema: FacebookPageIntelOutputSchema,
|
|
1907
|
+
annotations: liveWebToolAnnotations("Facebook Advertiser Ad Intel")
|
|
1908
|
+
}, async (input) => formatFacebookPageIntel(await executor.facebookPageIntel(input), input));
|
|
1909
|
+
server2.registerTool("facebook_ad_search", {
|
|
1910
|
+
title: "Facebook Ad Library Search",
|
|
1911
|
+
description: withReportNote("Search Facebook Ad Library by brand, advertiser, competitor, niche, or keyword. Returns advertisers with ad counts and library IDs. Use to discover competitors, then pass libraryId to facebook_page_intel."),
|
|
1912
|
+
inputSchema: FacebookAdSearchInputSchema,
|
|
1913
|
+
outputSchema: FacebookAdSearchOutputSchema,
|
|
1914
|
+
annotations: liveWebToolAnnotations("Facebook Ad Library Search")
|
|
1915
|
+
}, async (input) => formatFacebookAdSearch(await executor.facebookAdSearch(input), input));
|
|
1916
|
+
server2.registerTool("facebook_ad_transcribe", {
|
|
1917
|
+
title: "Facebook Ad Transcription",
|
|
1918
|
+
description: "Transcribe audio from a Facebook ad video. Returns full transcript and timestamped chunks. Use the videoUrl value from facebook_page_intel results.",
|
|
1919
|
+
inputSchema: FacebookAdTranscribeInputSchema,
|
|
1920
|
+
annotations: liveWebToolAnnotations("Facebook Ad Transcription")
|
|
1921
|
+
}, async (input) => formatFacebookAdTranscribe(await executor.facebookAdTranscribe(input), input));
|
|
1922
|
+
server2.registerTool("maps_place_intel", {
|
|
1923
|
+
title: "Google Maps Business Profile Details",
|
|
1924
|
+
description: withReportNote('Extract Google Maps business intelligence for one known/named business: rating, review count, category, address, phone, website, hours, booking URL, review histogram, review topics, about attributes, entity IDs, and optional review cards. Do not use this for category searches, local market prospect lists, or requests for multiple GMB/GBP profiles; use maps_search first for those. Split business name from location (e.g. "Elite Roofing Denver CO" => businessName "Elite Roofing", location "Denver, CO"). Pass includeReviews true when the user asks for reviews/customer pain.'),
|
|
1925
|
+
inputSchema: MapsPlaceIntelInputSchema,
|
|
1926
|
+
outputSchema: MapsPlaceIntelOutputSchema,
|
|
1927
|
+
annotations: liveWebToolAnnotations("Google Maps Business Profile Details")
|
|
1928
|
+
}, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
|
|
1929
|
+
server2.registerTool("maps_search", {
|
|
1930
|
+
title: "Google Maps Business Search",
|
|
1931
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." For US city/state Maps searches, keep proxyMode as location so the browser service can create a fresh residential proxy ID for that market; pass proxyZip only when a specific ZIP or city-center ZIP is known. Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1932
|
+
inputSchema: MapsSearchInputSchema,
|
|
1933
|
+
outputSchema: MapsSearchOutputSchema,
|
|
1934
|
+
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
1935
|
+
}, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
|
|
1936
|
+
server2.registerTool("directory_workflow", {
|
|
1937
|
+
title: "Directory Workflow: Markets + Maps",
|
|
1938
|
+
description: withReportNote('Build directory/prospecting datasets by selecting US city markets from the free Census Population Estimates city/place dataset, optionally joining configured US ZIPS/Lead Magician ZIP groups, then running Google Maps business searches for each city in parallel. Use this when the user wants "all cities over 100k population in a state", "build a directory CSV", "find markets then get Maps data", or similar location-database + Maps workflows. Set minPopulation, state, query, maxResultsPerCity, and concurrency. Use concurrency up to 5 for parallel city sessions. Keep proxyMode as location so each city can use a fresh residential proxy ID when the browser service is available; retryable city failures use fresh proxies across attempts. Saved CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, cid, cid_decimal, city population, and ZIP groups. This workflow captures star ratings from Maps list cards, not profile review counts; use maps_place_intel only when a selected profile needs deeper review details. For local Lead Magician ZIP enrichment, set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath only in local/test mode.'),
|
|
1939
|
+
inputSchema: DirectoryWorkflowInputSchema,
|
|
1940
|
+
outputSchema: DirectoryWorkflowOutputSchema,
|
|
1941
|
+
annotations: liveWebToolAnnotations("Directory Workflow: Markets + Maps")
|
|
1942
|
+
}, async (input) => formatDirectoryWorkflow(await executor.directoryWorkflow(input), input));
|
|
1943
|
+
server2.registerTool("credits_info", {
|
|
1944
|
+
title: "MCP Scraper Credits & Costs",
|
|
1945
|
+
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
1946
|
+
inputSchema: CreditsInfoInputSchema,
|
|
1947
|
+
outputSchema: CreditsInfoOutputSchema,
|
|
1948
|
+
annotations: {
|
|
1949
|
+
title: "MCP Scraper Credits & Costs",
|
|
1950
|
+
readOnlyHint: true,
|
|
1951
|
+
destructiveHint: false,
|
|
1952
|
+
idempotentHint: true,
|
|
1953
|
+
openWorldHint: false
|
|
1954
|
+
}
|
|
1955
|
+
}, async (input) => formatCreditsInfo(await executor.creditsInfo(input), input));
|
|
1956
|
+
}
|
|
1957
|
+
|
|
1958
|
+
// bin/mcp-scraper-combined-stdio-server.ts
|
|
1959
|
+
function readApiKeyFile() {
|
|
1960
|
+
const explicitPath = process.env.MCP_SCRAPER_KEY_PATH?.trim();
|
|
1961
|
+
const paths = [explicitPath, (0, import_node_path4.join)((0, import_node_os3.homedir)(), ".mcp-scraper-key")].filter(Boolean);
|
|
1962
|
+
for (const path of paths) {
|
|
1963
|
+
try {
|
|
1964
|
+
const value = (0, import_node_fs4.readFileSync)(path, "utf8").trim();
|
|
1965
|
+
if (value) return value;
|
|
1966
|
+
} catch {
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
return void 0;
|
|
1970
|
+
}
|
|
1971
|
+
var apiKey = (process.env.MCP_SCRAPER_API_KEY ?? process.env.MCP_SCRAPER_KEY ?? process.env.MCP_API_KEY ?? readApiKeyFile())?.trim();
|
|
1972
|
+
if (!apiKey) {
|
|
1973
|
+
process.stderr.write("MCP_SCRAPER_API_KEY env var or ~/.mcp-scraper-key is required\n");
|
|
1974
|
+
process.exit(1);
|
|
1975
|
+
}
|
|
1976
|
+
var baseUrl = process.env.MCP_SCRAPER_BASE_URL?.trim() || process.env.MCP_BASE_URL?.trim() || "https://mcpscraper.dev";
|
|
1977
|
+
var consoleBaseUrl = process.env.BROWSER_AGENT_CONSOLE_URL?.trim() || baseUrl;
|
|
1978
|
+
var server = new import_mcp3.McpServer({ name: "mcp-scraper", version: PACKAGE_VERSION });
|
|
1979
|
+
registerPaaExtractorMcpTools(server, new HttpMcpToolExecutor(baseUrl, apiKey));
|
|
1980
|
+
registerBrowserAgentMcpTools(server, { baseUrl, apiKey, consoleBaseUrl });
|
|
1981
|
+
var transport = new import_stdio.StdioServerTransport();
|
|
1982
|
+
async function main() {
|
|
1983
|
+
await server.connect(transport);
|
|
1984
|
+
}
|
|
1985
|
+
main().catch((err) => {
|
|
1986
|
+
process.stderr.write(`${err instanceof Error ? err.message : String(err)}
|
|
1987
|
+
`);
|
|
1988
|
+
process.exit(1);
|
|
1989
|
+
});
|
|
1990
|
+
//# sourceMappingURL=mcp-scraper-combined-stdio-server.cjs.map
|