mcp-scraper 0.1.9 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -8
- package/dist/bin/api-server.cjs +5615 -3733
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/browser-agent-stdio-server.cjs +391 -0
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -0
- package/dist/bin/browser-agent-stdio-server.d.cts +1 -0
- package/dist/bin/browser-agent-stdio-server.d.ts +1 -0
- package/dist/bin/browser-agent-stdio-server.js +390 -0
- package/dist/bin/browser-agent-stdio-server.js.map +1 -0
- package/dist/bin/mcp-stdio-server.cjs +170 -12
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -2
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +223 -74
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -2
- package/dist/{chunk-ZK456YXN.js → chunk-IQOCZGJJ.js} +58 -4
- package/dist/chunk-IQOCZGJJ.js.map +1 -0
- package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
- package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
- package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
- package/dist/chunk-MY3S7EX7.js.map +1 -0
- package/dist/{chunk-JNC32DMS.js → chunk-OR7DLLH2.js} +175 -16
- package/dist/chunk-OR7DLLH2.js.map +1 -0
- package/dist/chunk-XR65SANX.js +7 -0
- package/dist/chunk-XR65SANX.js.map +1 -0
- package/dist/index.cjs +223 -74
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -2
- package/dist/{server-MTXAJG5J.js → server-CJMX2QUM.js} +1655 -194
- package/dist/server-CJMX2QUM.js.map +1 -0
- package/dist/{worker-AUCXFHEL.js → worker-NAKGTIF5.js} +4 -4
- package/docs/specs/api-forge-spec.md +234 -0
- package/docs/specs/deferred-work-spec.md +74 -0
- package/docs/specs/oauth-mcp-spec.md +213 -0
- package/package.json +3 -2
- package/dist/chunk-JNC32DMS.js.map +0 -1
- package/dist/chunk-TM22BLWP.js.map +0 -1
- package/dist/chunk-ZK456YXN.js.map +0 -1
- package/dist/server-MTXAJG5J.js.map +0 -1
- /package/dist/{worker-AUCXFHEL.js.map → worker-NAKGTIF5.js.map} +0 -0
|
@@ -113,6 +113,12 @@ var HttpMcpToolExecutor = class {
|
|
|
113
113
|
mapsSearch(input) {
|
|
114
114
|
return this.call("/maps/search", input);
|
|
115
115
|
}
|
|
116
|
+
directoryWorkflow(input) {
|
|
117
|
+
const cityCount = typeof input.maxCities === "number" ? input.maxCities : 25;
|
|
118
|
+
const concurrency = typeof input.concurrency === "number" && input.concurrency > 0 ? input.concurrency : 5;
|
|
119
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? Math.min(9e5, Math.max(18e4, Math.ceil(cityCount / concurrency) * 12e4));
|
|
120
|
+
return this.call("/directory/run", input, timeoutMs);
|
|
121
|
+
}
|
|
116
122
|
creditsInfo(input) {
|
|
117
123
|
return this.call("/billing/credits", input);
|
|
118
124
|
}
|
|
@@ -130,7 +136,7 @@ var import_node_fs2 = require("fs");
|
|
|
130
136
|
var import_node_path2 = require("path");
|
|
131
137
|
|
|
132
138
|
// src/version.ts
|
|
133
|
-
var PACKAGE_VERSION = "0.1
|
|
139
|
+
var PACKAGE_VERSION = "0.2.1";
|
|
134
140
|
|
|
135
141
|
// src/mcp/mcp-response-formatter.ts
|
|
136
142
|
var import_node_fs = require("fs");
|
|
@@ -800,6 +806,11 @@ function formatMapsSearch(raw, input) {
|
|
|
800
806
|
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
801
807
|
const d = parsed.data;
|
|
802
808
|
const results = d.results ?? [];
|
|
809
|
+
const normalizedResults = results.map((result) => ({
|
|
810
|
+
...result,
|
|
811
|
+
phone: result.phone ?? null,
|
|
812
|
+
hoursStatus: result.hoursStatus ?? null
|
|
813
|
+
}));
|
|
803
814
|
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
804
815
|
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
805
816
|
const durationMs = d.durationMs;
|
|
@@ -839,7 +850,79 @@ ${rows}`,
|
|
|
839
850
|
extractedAt: d.extractedAt,
|
|
840
851
|
requestedMaxResults: requestedMax,
|
|
841
852
|
resultCount: results.length,
|
|
842
|
-
results,
|
|
853
|
+
results: normalizedResults,
|
|
854
|
+
durationMs: durationMs ?? 0
|
|
855
|
+
}
|
|
856
|
+
};
|
|
857
|
+
}
|
|
858
|
+
function formatDirectoryWorkflow(raw, input) {
|
|
859
|
+
const parsed = parseData(raw);
|
|
860
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
861
|
+
const d = parsed.data;
|
|
862
|
+
const cities = (d.cities ?? []).map((city) => ({
|
|
863
|
+
...city,
|
|
864
|
+
results: city.results.map((result) => ({
|
|
865
|
+
...result,
|
|
866
|
+
phone: result.phone ?? null,
|
|
867
|
+
hoursStatus: result.hoursStatus ?? null
|
|
868
|
+
}))
|
|
869
|
+
}));
|
|
870
|
+
const warnings = d.warnings ?? [];
|
|
871
|
+
const csvPath = d.csvPath ?? null;
|
|
872
|
+
const totalResultCount = d.totalResultCount ?? cities.reduce((sum, city) => sum + city.resultCount, 0);
|
|
873
|
+
const durationMs = d.durationMs;
|
|
874
|
+
const marketRows = cities.map((city) => {
|
|
875
|
+
const zips = city.zips?.length ? city.zips.slice(0, 8).join(" ") + (city.zips.length > 8 ? ` +${city.zips.length - 8}` : "") : "\u2014";
|
|
876
|
+
return `| ${cell(city.city)} | ${city.population.toLocaleString()} | ${city.zips?.length ?? 0} | ${city.resultCount} | ${city.status} | ${cell(zips)} |`;
|
|
877
|
+
}).join("\n");
|
|
878
|
+
const businessRows = cities.flatMap((city) => city.results.slice(0, 3).map((result) => ({ city, result }))).map(({ city, result }) => {
|
|
879
|
+
const rating = [result.rating, result.reviewCount ? `(${result.reviewCount})` : null].filter(Boolean).join(" ");
|
|
880
|
+
return `| ${cell(city.city)} | ${result.position} | ${cell(result.name)} | ${cell(result.category)} | ${cell(rating)} | ${result.websiteUrl ? `[site](${result.websiteUrl})` : "\u2014"} | [maps](${result.placeUrl}) |`;
|
|
881
|
+
}).join("\n");
|
|
882
|
+
const warningText = warnings.length ? `
|
|
883
|
+
## Warnings
|
|
884
|
+
${warnings.map((w) => `- ${w}`).join("\n")}` : "";
|
|
885
|
+
const csvText = csvPath ? `
|
|
886
|
+
**CSV:** \`${csvPath}\`` : "";
|
|
887
|
+
const full = [
|
|
888
|
+
`# Directory Workflow: ${input.query}`,
|
|
889
|
+
`**Markets:** ${cities.length} \xB7 **Maps results:** ${totalResultCount} \xB7 **State:** ${d.state ?? input.state ?? "US"} \xB7 **Population threshold:** ${d.minPopulation ?? input.minPopulation ?? 1e5}`,
|
|
890
|
+
csvText,
|
|
891
|
+
`
|
|
892
|
+
## Markets
|
|
893
|
+
| City | Population | ZIPs | Maps Results | Status | ZIP Sample |
|
|
894
|
+
|---|---:|---:|---:|---|---|
|
|
895
|
+
${marketRows}`,
|
|
896
|
+
businessRows ? `
|
|
897
|
+
## Top Candidates By City
|
|
898
|
+
| City | # | Name | Category | Rating | Website | Maps |
|
|
899
|
+
|---|---:|---|---|---|---|---|
|
|
900
|
+
${businessRows}` : null,
|
|
901
|
+
warningText,
|
|
902
|
+
`
|
|
903
|
+
## Sources
|
|
904
|
+
- Population: ${d.censusSourceUrl ?? "Census Population Estimates Program"}
|
|
905
|
+
- ZIP groups: ${d.usZipsSourcePath ?? "not configured"}`,
|
|
906
|
+
durationMs != null ? `
|
|
907
|
+
*Completed in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
908
|
+
].filter(Boolean).join("\n");
|
|
909
|
+
return {
|
|
910
|
+
...oneBlock(full),
|
|
911
|
+
structuredContent: {
|
|
912
|
+
query: d.query,
|
|
913
|
+
state: d.state,
|
|
914
|
+
minPopulation: d.minPopulation,
|
|
915
|
+
populationYear: d.populationYear,
|
|
916
|
+
maxResultsPerCity: d.maxResultsPerCity,
|
|
917
|
+
concurrency: d.concurrency,
|
|
918
|
+
censusSourceUrl: d.censusSourceUrl,
|
|
919
|
+
usZipsSourcePath: d.usZipsSourcePath ?? null,
|
|
920
|
+
warnings,
|
|
921
|
+
extractedAt: d.extractedAt,
|
|
922
|
+
selectedCityCount: d.selectedCityCount,
|
|
923
|
+
totalResultCount,
|
|
924
|
+
csvPath,
|
|
925
|
+
cities,
|
|
843
926
|
durationMs: durationMs ?? 0
|
|
844
927
|
}
|
|
845
928
|
};
|
|
@@ -1005,8 +1088,8 @@ var HarvestPaaInputSchema = {
|
|
|
1005
1088
|
gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
|
|
1006
1089
|
hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
|
|
1007
1090
|
device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
1008
|
-
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default
|
|
1009
|
-
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use
|
|
1091
|
+
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
1092
|
+
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1010
1093
|
debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
|
|
1011
1094
|
};
|
|
1012
1095
|
var ExtractUrlInputSchema = {
|
|
@@ -1063,7 +1146,25 @@ var MapsSearchInputSchema = {
|
|
|
1063
1146
|
location: import_zod.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
1064
1147
|
gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
1065
1148
|
hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
1066
|
-
maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
1149
|
+
maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more."),
|
|
1150
|
+
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state Maps searches; it creates a fresh residential proxy ID when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1151
|
+
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or city-center ZIP."),
|
|
1152
|
+
debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics when debugging Maps localization, CAPTCHA, or proxy behavior.")
|
|
1153
|
+
};
|
|
1154
|
+
var DirectoryWorkflowInputSchema = {
|
|
1155
|
+
query: import_zod.z.string().min(1).describe("Business category, niche, or keyword to search on Google Maps for every selected market, e.g. roofers, dentists, med spas. Do not include the city here."),
|
|
1156
|
+
state: import_zod.z.string().min(2).default("TN").describe("US state abbreviation or state name used to select Census places, e.g. TN or Tennessee."),
|
|
1157
|
+
minPopulation: import_zod.z.number().int().min(0).default(1e5).describe('Minimum Census place population for market selection. Use 100000 for "cities above 100k population".'),
|
|
1158
|
+
populationYear: import_zod.z.number().int().min(2020).max(2025).default(2025).describe("Census population estimate year from the 2020-2025 Population Estimates Program city/place dataset."),
|
|
1159
|
+
maxCities: import_zod.z.number().int().min(1).max(100).default(25).describe("Maximum number of markets to process after sorting by population descending."),
|
|
1160
|
+
maxResultsPerCity: import_zod.z.number().int().min(1).max(50).default(50).describe("Google Maps business/profile candidates to collect for each city. Maximum 50."),
|
|
1161
|
+
concurrency: import_zod.z.number().int().min(1).max(5).default(5).describe("How many city Maps searches to run in parallel. Use 5 for broad directory batches unless debugging."),
|
|
1162
|
+
includeZipGroups: import_zod.z.boolean().default(true).describe("Attach ZIP groups from a configured US ZIPS CSV when available. Set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath in local/test mode."),
|
|
1163
|
+
usZipsCsvPath: import_zod.z.string().optional().describe("Local/test-only path to a US ZIPS CSV with state_abbr, zipcode, county, city columns, such as Lead Magician tools/analytics/data/uszips.csv. Deployed APIs should use MCP_SCRAPER_USZIPS_CSV_PATH instead."),
|
|
1164
|
+
saveCsv: import_zod.z.boolean().default(true).describe("Save a directory-ready CSV to the MCP Scraper output directory and return its path. CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, CID fields, population, and ZIP groups."),
|
|
1165
|
+
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode for every city Maps search. Use location by default for US city/state batches; it creates fresh residential proxy IDs when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1166
|
+
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional ZIP override for proxy targeting. Normally omit it so each city can use its Lead Magician ZIP group or city/state location."),
|
|
1167
|
+
debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics in each Maps browser session when supported.")
|
|
1067
1168
|
};
|
|
1068
1169
|
var NullableString = import_zod.z.string().nullable();
|
|
1069
1170
|
var MapsSearchOutputSchema = {
|
|
@@ -1084,12 +1185,62 @@ var MapsSearchOutputSchema = {
|
|
|
1084
1185
|
reviewCount: NullableString,
|
|
1085
1186
|
category: NullableString,
|
|
1086
1187
|
address: NullableString,
|
|
1188
|
+
phone: NullableString,
|
|
1189
|
+
hoursStatus: NullableString,
|
|
1087
1190
|
websiteUrl: NullableString,
|
|
1088
1191
|
directionsUrl: NullableString,
|
|
1089
1192
|
metadata: import_zod.z.array(import_zod.z.string())
|
|
1090
1193
|
})),
|
|
1091
1194
|
durationMs: import_zod.z.number().int().min(0)
|
|
1092
1195
|
};
|
|
1196
|
+
var DirectoryMapsBusinessOutput = import_zod.z.object({
|
|
1197
|
+
position: import_zod.z.number().int().min(1),
|
|
1198
|
+
name: import_zod.z.string(),
|
|
1199
|
+
placeUrl: import_zod.z.string().url(),
|
|
1200
|
+
cid: NullableString,
|
|
1201
|
+
cidDecimal: NullableString,
|
|
1202
|
+
rating: NullableString,
|
|
1203
|
+
reviewCount: NullableString,
|
|
1204
|
+
category: NullableString,
|
|
1205
|
+
address: NullableString,
|
|
1206
|
+
phone: NullableString,
|
|
1207
|
+
hoursStatus: NullableString,
|
|
1208
|
+
websiteUrl: NullableString,
|
|
1209
|
+
directionsUrl: NullableString,
|
|
1210
|
+
metadata: import_zod.z.array(import_zod.z.string())
|
|
1211
|
+
});
|
|
1212
|
+
var DirectoryWorkflowOutputSchema = {
|
|
1213
|
+
query: import_zod.z.string(),
|
|
1214
|
+
state: import_zod.z.string(),
|
|
1215
|
+
minPopulation: import_zod.z.number().int().min(0),
|
|
1216
|
+
populationYear: import_zod.z.number().int().min(2020).max(2025),
|
|
1217
|
+
maxResultsPerCity: import_zod.z.number().int().min(1).max(50),
|
|
1218
|
+
concurrency: import_zod.z.number().int().min(1).max(5),
|
|
1219
|
+
censusSourceUrl: import_zod.z.string().url(),
|
|
1220
|
+
usZipsSourcePath: NullableString,
|
|
1221
|
+
warnings: import_zod.z.array(import_zod.z.string()),
|
|
1222
|
+
extractedAt: import_zod.z.string(),
|
|
1223
|
+
selectedCityCount: import_zod.z.number().int().min(0),
|
|
1224
|
+
totalResultCount: import_zod.z.number().int().min(0),
|
|
1225
|
+
csvPath: NullableString,
|
|
1226
|
+
cities: import_zod.z.array(import_zod.z.object({
|
|
1227
|
+
city: import_zod.z.string(),
|
|
1228
|
+
state: import_zod.z.string(),
|
|
1229
|
+
location: import_zod.z.string(),
|
|
1230
|
+
cityKey: import_zod.z.string(),
|
|
1231
|
+
censusName: import_zod.z.string(),
|
|
1232
|
+
population: import_zod.z.number().int().min(0),
|
|
1233
|
+
populationYear: import_zod.z.number().int().min(2020).max(2025),
|
|
1234
|
+
zips: import_zod.z.array(import_zod.z.string()),
|
|
1235
|
+
counties: import_zod.z.array(import_zod.z.string()),
|
|
1236
|
+
status: import_zod.z.enum(["ok", "empty", "failed"]),
|
|
1237
|
+
error: NullableString,
|
|
1238
|
+
resultCount: import_zod.z.number().int().min(0),
|
|
1239
|
+
durationMs: import_zod.z.number().int().min(0),
|
|
1240
|
+
results: import_zod.z.array(DirectoryMapsBusinessOutput)
|
|
1241
|
+
})),
|
|
1242
|
+
durationMs: import_zod.z.number().int().min(0)
|
|
1243
|
+
};
|
|
1093
1244
|
var OrganicResultOutput = import_zod.z.object({
|
|
1094
1245
|
position: import_zod.z.number().int(),
|
|
1095
1246
|
title: import_zod.z.string(),
|
|
@@ -1269,8 +1420,8 @@ var SearchSerpInputSchema = {
|
|
|
1269
1420
|
gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
|
|
1270
1421
|
hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from user request."),
|
|
1271
1422
|
device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
1272
|
-
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default
|
|
1273
|
-
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use
|
|
1423
|
+
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
1424
|
+
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1274
1425
|
debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
|
|
1275
1426
|
pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
|
|
1276
1427
|
};
|
|
@@ -1280,8 +1431,8 @@ var CaptureSerpSnapshotInputSchema = {
|
|
|
1280
1431
|
gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
|
|
1281
1432
|
hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request."),
|
|
1282
1433
|
device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
|
|
1283
|
-
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized residential proxy
|
|
1284
|
-
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed."),
|
|
1434
|
+
proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized US residential evidence; it creates a fresh proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static residential proxy, and none only for direct-network debugging."),
|
|
1435
|
+
proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1285
1436
|
pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
|
|
1286
1437
|
debug: import_zod.z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
|
|
1287
1438
|
includePageSnapshots: import_zod.z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
|
|
@@ -1356,14 +1507,14 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1356
1507
|
if (savesReports) registerSavedReportResources(server2);
|
|
1357
1508
|
server2.registerTool("harvest_paa", {
|
|
1358
1509
|
title: "Google PAA + SERP Harvest",
|
|
1359
|
-
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1510
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). For US local SERPs, leave proxyMode as location so the service uses fresh residential proxy IDs across retries and rejects wrong-location evidence instead of returning a bad market. Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1360
1511
|
inputSchema: HarvestPaaInputSchema,
|
|
1361
1512
|
outputSchema: HarvestPaaOutputSchema,
|
|
1362
1513
|
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
1363
1514
|
}, async (input) => formatHarvestPaa(await executor2.harvestPaa(input), input));
|
|
1364
1515
|
server2.registerTool("search_serp", {
|
|
1365
1516
|
title: "Google SERP Lookup",
|
|
1366
|
-
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
1517
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. For US city/state rankings, keep proxyMode as location and pass proxyZip when a city-center ZIP is known; location mode uses fresh residential proxy IDs and retries CAPTCHA, proxy tunnel failures, and wrong-location evidence before returning."),
|
|
1367
1518
|
inputSchema: SearchSerpInputSchema,
|
|
1368
1519
|
outputSchema: SearchSerpOutputSchema,
|
|
1369
1520
|
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
@@ -1431,11 +1582,18 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
|
|
|
1431
1582
|
}, async (input) => formatMapsPlaceIntel(await executor2.mapsPlaceIntel(input), input));
|
|
1432
1583
|
server2.registerTool("maps_search", {
|
|
1433
1584
|
title: "Google Maps Business Search",
|
|
1434
|
-
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1585
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." For US city/state Maps searches, keep proxyMode as location so the browser service can create a fresh residential proxy ID for that market; pass proxyZip only when a specific ZIP or city-center ZIP is known. Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1435
1586
|
inputSchema: MapsSearchInputSchema,
|
|
1436
1587
|
outputSchema: MapsSearchOutputSchema,
|
|
1437
1588
|
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
1438
1589
|
}, async (input) => formatMapsSearch(await executor2.mapsSearch(input), input));
|
|
1590
|
+
server2.registerTool("directory_workflow", {
|
|
1591
|
+
title: "Directory Workflow: Markets + Maps",
|
|
1592
|
+
description: withReportNote('Build directory/prospecting datasets by selecting US city markets from the free Census Population Estimates city/place dataset, optionally joining configured US ZIPS/Lead Magician ZIP groups, then running Google Maps business searches for each city in parallel. Use this when the user wants "all cities over 100k population in a state", "build a directory CSV", "find markets then get Maps data", or similar location-database + Maps workflows. Set minPopulation, state, query, maxResultsPerCity, and concurrency. Use concurrency up to 5 for parallel city sessions. Keep proxyMode as location so each city can use a fresh residential proxy ID when the browser service is available; retryable city failures use fresh proxies across attempts. Saved CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, cid, cid_decimal, city population, and ZIP groups. This workflow captures star ratings from Maps list cards, not profile review counts; use maps_place_intel only when a selected profile needs deeper review details. For local Lead Magician ZIP enrichment, set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath only in local/test mode.'),
|
|
1593
|
+
inputSchema: DirectoryWorkflowInputSchema,
|
|
1594
|
+
outputSchema: DirectoryWorkflowOutputSchema,
|
|
1595
|
+
annotations: liveWebToolAnnotations("Directory Workflow: Markets + Maps")
|
|
1596
|
+
}, async (input) => formatDirectoryWorkflow(await executor2.directoryWorkflow(input), input));
|
|
1439
1597
|
server2.registerTool("credits_info", {
|
|
1440
1598
|
title: "MCP Scraper Credits & Costs",
|
|
1441
1599
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|