mcp-scraper 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +74 -8
  2. package/dist/bin/api-server.cjs +4691 -3614
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/browser-agent-stdio-server.cjs +85 -8
  6. package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
  7. package/dist/bin/browser-agent-stdio-server.js +83 -6
  8. package/dist/bin/browser-agent-stdio-server.js.map +1 -1
  9. package/dist/bin/mcp-stdio-server.cjs +170 -12
  10. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  11. package/dist/bin/mcp-stdio-server.js +3 -3
  12. package/dist/bin/paa-harvest.cjs +223 -74
  13. package/dist/bin/paa-harvest.cjs.map +1 -1
  14. package/dist/bin/paa-harvest.js +2 -2
  15. package/dist/{chunk-GXBT5CDU.js → chunk-IQOCZGJJ.js} +39 -2
  16. package/dist/chunk-IQOCZGJJ.js.map +1 -0
  17. package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
  18. package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
  19. package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
  20. package/dist/chunk-MY3S7EX7.js.map +1 -0
  21. package/dist/{chunk-BMVQB3WN.js → chunk-OR7DLLH2.js} +173 -14
  22. package/dist/chunk-OR7DLLH2.js.map +1 -0
  23. package/dist/chunk-XR65SANX.js +7 -0
  24. package/dist/chunk-XR65SANX.js.map +1 -0
  25. package/dist/index.cjs +223 -74
  26. package/dist/index.cjs.map +1 -1
  27. package/dist/index.d.cts +1 -0
  28. package/dist/index.d.ts +1 -0
  29. package/dist/index.js +2 -2
  30. package/dist/{server-ASCMKUQ5.js → server-CJMX2QUM.js} +880 -181
  31. package/dist/server-CJMX2QUM.js.map +1 -0
  32. package/dist/{worker-KJ4A7WIR.js → worker-NAKGTIF5.js} +4 -4
  33. package/package.json +1 -1
  34. package/dist/chunk-2BS7BUEE.js +0 -7
  35. package/dist/chunk-2BS7BUEE.js.map +0 -1
  36. package/dist/chunk-BMVQB3WN.js.map +0 -1
  37. package/dist/chunk-GXBT5CDU.js.map +0 -1
  38. package/dist/chunk-TM22BLWP.js.map +0 -1
  39. package/dist/server-ASCMKUQ5.js.map +0 -1
  40. /package/dist/{worker-KJ4A7WIR.js.map → worker-NAKGTIF5.js.map} +0 -0
@@ -113,6 +113,12 @@ var HttpMcpToolExecutor = class {
113
113
  mapsSearch(input) {
114
114
  return this.call("/maps/search", input);
115
115
  }
116
+ directoryWorkflow(input) {
117
+ const cityCount = typeof input.maxCities === "number" ? input.maxCities : 25;
118
+ const concurrency = typeof input.concurrency === "number" && input.concurrency > 0 ? input.concurrency : 5;
119
+ const timeoutMs = this.httpTimeoutOverrideMs ?? Math.min(9e5, Math.max(18e4, Math.ceil(cityCount / concurrency) * 12e4));
120
+ return this.call("/directory/run", input, timeoutMs);
121
+ }
116
122
  creditsInfo(input) {
117
123
  return this.call("/billing/credits", input);
118
124
  }
@@ -130,7 +136,7 @@ var import_node_fs2 = require("fs");
130
136
  var import_node_path2 = require("path");
131
137
 
132
138
  // src/version.ts
133
- var PACKAGE_VERSION = "0.2.0";
139
+ var PACKAGE_VERSION = "0.2.1";
134
140
 
135
141
  // src/mcp/mcp-response-formatter.ts
136
142
  var import_node_fs = require("fs");
@@ -800,6 +806,11 @@ function formatMapsSearch(raw, input) {
800
806
  if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
801
807
  const d = parsed.data;
802
808
  const results = d.results ?? [];
809
+ const normalizedResults = results.map((result) => ({
810
+ ...result,
811
+ phone: result.phone ?? null,
812
+ hoursStatus: result.hoursStatus ?? null
813
+ }));
803
814
  const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
804
815
  const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
805
816
  const durationMs = d.durationMs;
@@ -839,7 +850,79 @@ ${rows}`,
839
850
  extractedAt: d.extractedAt,
840
851
  requestedMaxResults: requestedMax,
841
852
  resultCount: results.length,
842
- results,
853
+ results: normalizedResults,
854
+ durationMs: durationMs ?? 0
855
+ }
856
+ };
857
+ }
858
+ function formatDirectoryWorkflow(raw, input) {
859
+ const parsed = parseData(raw);
860
+ if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
861
+ const d = parsed.data;
862
+ const cities = (d.cities ?? []).map((city) => ({
863
+ ...city,
864
+ results: city.results.map((result) => ({
865
+ ...result,
866
+ phone: result.phone ?? null,
867
+ hoursStatus: result.hoursStatus ?? null
868
+ }))
869
+ }));
870
+ const warnings = d.warnings ?? [];
871
+ const csvPath = d.csvPath ?? null;
872
+ const totalResultCount = d.totalResultCount ?? cities.reduce((sum, city) => sum + city.resultCount, 0);
873
+ const durationMs = d.durationMs;
874
+ const marketRows = cities.map((city) => {
875
+ const zips = city.zips?.length ? city.zips.slice(0, 8).join(" ") + (city.zips.length > 8 ? ` +${city.zips.length - 8}` : "") : "\u2014";
876
+ return `| ${cell(city.city)} | ${city.population.toLocaleString()} | ${city.zips?.length ?? 0} | ${city.resultCount} | ${city.status} | ${cell(zips)} |`;
877
+ }).join("\n");
878
+ const businessRows = cities.flatMap((city) => city.results.slice(0, 3).map((result) => ({ city, result }))).map(({ city, result }) => {
879
+ const rating = [result.rating, result.reviewCount ? `(${result.reviewCount})` : null].filter(Boolean).join(" ");
880
+ return `| ${cell(city.city)} | ${result.position} | ${cell(result.name)} | ${cell(result.category)} | ${cell(rating)} | ${result.websiteUrl ? `[site](${result.websiteUrl})` : "\u2014"} | [maps](${result.placeUrl}) |`;
881
+ }).join("\n");
882
+ const warningText = warnings.length ? `
883
+ ## Warnings
884
+ ${warnings.map((w) => `- ${w}`).join("\n")}` : "";
885
+ const csvText = csvPath ? `
886
+ **CSV:** \`${csvPath}\`` : "";
887
+ const full = [
888
+ `# Directory Workflow: ${input.query}`,
889
+ `**Markets:** ${cities.length} \xB7 **Maps results:** ${totalResultCount} \xB7 **State:** ${d.state ?? input.state ?? "US"} \xB7 **Population threshold:** ${d.minPopulation ?? input.minPopulation ?? 1e5}`,
890
+ csvText,
891
+ `
892
+ ## Markets
893
+ | City | Population | ZIPs | Maps Results | Status | ZIP Sample |
894
+ |---|---:|---:|---:|---|---|
895
+ ${marketRows}`,
896
+ businessRows ? `
897
+ ## Top Candidates By City
898
+ | City | # | Name | Category | Rating | Website | Maps |
899
+ |---|---:|---|---|---|---|---|
900
+ ${businessRows}` : null,
901
+ warningText,
902
+ `
903
+ ## Sources
904
+ - Population: ${d.censusSourceUrl ?? "Census Population Estimates Program"}
905
+ - ZIP groups: ${d.usZipsSourcePath ?? "not configured"}`,
906
+ durationMs != null ? `
907
+ *Completed in ${(durationMs / 1e3).toFixed(1)}s*` : null
908
+ ].filter(Boolean).join("\n");
909
+ return {
910
+ ...oneBlock(full),
911
+ structuredContent: {
912
+ query: d.query,
913
+ state: d.state,
914
+ minPopulation: d.minPopulation,
915
+ populationYear: d.populationYear,
916
+ maxResultsPerCity: d.maxResultsPerCity,
917
+ concurrency: d.concurrency,
918
+ censusSourceUrl: d.censusSourceUrl,
919
+ usZipsSourcePath: d.usZipsSourcePath ?? null,
920
+ warnings,
921
+ extractedAt: d.extractedAt,
922
+ selectedCityCount: d.selectedCityCount,
923
+ totalResultCount,
924
+ csvPath,
925
+ cities,
843
926
  durationMs: durationMs ?? 0
844
927
  }
845
928
  };
@@ -1005,8 +1088,8 @@ var HarvestPaaInputSchema = {
1005
1088
  gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
1006
1089
  hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
1007
1090
  device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
1008
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
1009
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
1091
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
1092
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
1010
1093
  debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
1011
1094
  };
1012
1095
  var ExtractUrlInputSchema = {
@@ -1063,7 +1146,25 @@ var MapsSearchInputSchema = {
1063
1146
  location: import_zod.z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
1064
1147
  gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location."),
1065
1148
  hl: import_zod.z.string().length(2).default("en").describe("Language inferred from user request."),
1066
- maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
1149
+ maxResults: import_zod.z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more."),
1150
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state Maps searches; it creates a fresh residential proxy ID when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
1151
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or city-center ZIP."),
1152
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics when debugging Maps localization, CAPTCHA, or proxy behavior.")
1153
+ };
1154
+ var DirectoryWorkflowInputSchema = {
1155
+ query: import_zod.z.string().min(1).describe("Business category, niche, or keyword to search on Google Maps for every selected market, e.g. roofers, dentists, med spas. Do not include the city here."),
1156
+ state: import_zod.z.string().min(2).default("TN").describe("US state abbreviation or state name used to select Census places, e.g. TN or Tennessee."),
1157
+ minPopulation: import_zod.z.number().int().min(0).default(1e5).describe('Minimum Census place population for market selection. Use 100000 for "cities above 100k population".'),
1158
+ populationYear: import_zod.z.number().int().min(2020).max(2025).default(2025).describe("Census population estimate year from the 2020-2025 Population Estimates Program city/place dataset."),
1159
+ maxCities: import_zod.z.number().int().min(1).max(100).default(25).describe("Maximum number of markets to process after sorting by population descending."),
1160
+ maxResultsPerCity: import_zod.z.number().int().min(1).max(50).default(50).describe("Google Maps business/profile candidates to collect for each city. Maximum 50."),
1161
+ concurrency: import_zod.z.number().int().min(1).max(5).default(5).describe("How many city Maps searches to run in parallel. Use 5 for broad directory batches unless debugging."),
1162
+ includeZipGroups: import_zod.z.boolean().default(true).describe("Attach ZIP groups from a configured US ZIPS CSV when available. Set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath in local/test mode."),
1163
+ usZipsCsvPath: import_zod.z.string().optional().describe("Local/test-only path to a US ZIPS CSV with state_abbr, zipcode, county, city columns, such as Lead Magician tools/analytics/data/uszips.csv. Deployed APIs should use MCP_SCRAPER_USZIPS_CSV_PATH instead."),
1164
+ saveCsv: import_zod.z.boolean().default(true).describe("Save a directory-ready CSV to the MCP Scraper output directory and return its path. CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, CID fields, population, and ZIP groups."),
1165
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode for every city Maps search. Use location by default for US city/state batches; it creates fresh residential proxy IDs when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
1166
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional ZIP override for proxy targeting. Normally omit it so each city can use its Lead Magician ZIP group or city/state location."),
1167
+ debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics in each Maps browser session when supported.")
1067
1168
  };
1068
1169
  var NullableString = import_zod.z.string().nullable();
1069
1170
  var MapsSearchOutputSchema = {
@@ -1084,12 +1185,62 @@ var MapsSearchOutputSchema = {
1084
1185
  reviewCount: NullableString,
1085
1186
  category: NullableString,
1086
1187
  address: NullableString,
1188
+ phone: NullableString,
1189
+ hoursStatus: NullableString,
1087
1190
  websiteUrl: NullableString,
1088
1191
  directionsUrl: NullableString,
1089
1192
  metadata: import_zod.z.array(import_zod.z.string())
1090
1193
  })),
1091
1194
  durationMs: import_zod.z.number().int().min(0)
1092
1195
  };
1196
+ var DirectoryMapsBusinessOutput = import_zod.z.object({
1197
+ position: import_zod.z.number().int().min(1),
1198
+ name: import_zod.z.string(),
1199
+ placeUrl: import_zod.z.string().url(),
1200
+ cid: NullableString,
1201
+ cidDecimal: NullableString,
1202
+ rating: NullableString,
1203
+ reviewCount: NullableString,
1204
+ category: NullableString,
1205
+ address: NullableString,
1206
+ phone: NullableString,
1207
+ hoursStatus: NullableString,
1208
+ websiteUrl: NullableString,
1209
+ directionsUrl: NullableString,
1210
+ metadata: import_zod.z.array(import_zod.z.string())
1211
+ });
1212
+ var DirectoryWorkflowOutputSchema = {
1213
+ query: import_zod.z.string(),
1214
+ state: import_zod.z.string(),
1215
+ minPopulation: import_zod.z.number().int().min(0),
1216
+ populationYear: import_zod.z.number().int().min(2020).max(2025),
1217
+ maxResultsPerCity: import_zod.z.number().int().min(1).max(50),
1218
+ concurrency: import_zod.z.number().int().min(1).max(5),
1219
+ censusSourceUrl: import_zod.z.string().url(),
1220
+ usZipsSourcePath: NullableString,
1221
+ warnings: import_zod.z.array(import_zod.z.string()),
1222
+ extractedAt: import_zod.z.string(),
1223
+ selectedCityCount: import_zod.z.number().int().min(0),
1224
+ totalResultCount: import_zod.z.number().int().min(0),
1225
+ csvPath: NullableString,
1226
+ cities: import_zod.z.array(import_zod.z.object({
1227
+ city: import_zod.z.string(),
1228
+ state: import_zod.z.string(),
1229
+ location: import_zod.z.string(),
1230
+ cityKey: import_zod.z.string(),
1231
+ censusName: import_zod.z.string(),
1232
+ population: import_zod.z.number().int().min(0),
1233
+ populationYear: import_zod.z.number().int().min(2020).max(2025),
1234
+ zips: import_zod.z.array(import_zod.z.string()),
1235
+ counties: import_zod.z.array(import_zod.z.string()),
1236
+ status: import_zod.z.enum(["ok", "empty", "failed"]),
1237
+ error: NullableString,
1238
+ resultCount: import_zod.z.number().int().min(0),
1239
+ durationMs: import_zod.z.number().int().min(0),
1240
+ results: import_zod.z.array(DirectoryMapsBusinessOutput)
1241
+ })),
1242
+ durationMs: import_zod.z.number().int().min(0)
1243
+ };
1093
1244
  var OrganicResultOutput = import_zod.z.object({
1094
1245
  position: import_zod.z.number().int(),
1095
1246
  title: import_zod.z.string(),
@@ -1269,8 +1420,8 @@ var SearchSerpInputSchema = {
1269
1420
  gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
1270
1421
  hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from user request."),
1271
1422
  device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
1272
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default so city/state searches create or reuse a matching residential proxy. Use configured for the static configured proxy. Use none only for direct-network debugging."),
1273
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use only when the user gives a specific ZIP or city-center proxy targeting needs to be forced."),
1423
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
1424
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
1274
1425
  debug: import_zod.z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
1275
1426
  pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
1276
1427
  };
@@ -1280,8 +1431,8 @@ var CaptureSerpSnapshotInputSchema = {
1280
1431
  gl: import_zod.z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
1281
1432
  hl: import_zod.z.string().default("en").describe("Google interface/content language inferred from the user request."),
1282
1433
  device: import_zod.z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
1283
- proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized residential proxy targeting, configured for the static residential proxy, and none only for direct-network debugging."),
1284
- proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed."),
1434
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized US residential evidence; it creates a fresh proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static residential proxy, and none only for direct-network debugging."),
1435
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed. With proxyMode location this ZIP is used for each fresh proxy attempt."),
1285
1436
  pages: import_zod.z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
1286
1437
  debug: import_zod.z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
1287
1438
  includePageSnapshots: import_zod.z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
@@ -1356,14 +1507,14 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1356
1507
  if (savesReports) registerSavedReportResources(server2);
1357
1508
  server2.registerTool("harvest_paa", {
1358
1509
  title: "Google PAA + SERP Harvest",
1359
- description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
1510
+ description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). For US local SERPs, leave proxyMode as location so the service uses fresh residential proxy IDs across retries and rejects wrong-location evidence instead of returning a bad market. Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
1360
1511
  inputSchema: HarvestPaaInputSchema,
1361
1512
  outputSchema: HarvestPaaOutputSchema,
1362
1513
  annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
1363
1514
  }, async (input) => formatHarvestPaa(await executor2.harvestPaa(input), input));
1364
1515
  server2.registerTool("search_serp", {
1365
1516
  title: "Google SERP Lookup",
1366
- description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
1517
+ description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. For US city/state rankings, keep proxyMode as location and pass proxyZip when a city-center ZIP is known; location mode uses fresh residential proxy IDs and retries CAPTCHA, proxy tunnel failures, and wrong-location evidence before returning."),
1367
1518
  inputSchema: SearchSerpInputSchema,
1368
1519
  outputSchema: SearchSerpOutputSchema,
1369
1520
  annotations: liveWebToolAnnotations("Google SERP Lookup")
@@ -1431,11 +1582,18 @@ function buildPaaExtractorMcpServer(executor2, options = {}) {
1431
1582
  }, async (input) => formatMapsPlaceIntel(await executor2.mapsPlaceIntel(input), input));
1432
1583
  server2.registerTool("maps_search", {
1433
1584
  title: "Google Maps Business Search",
1434
- description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
1585
+ description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." For US city/state Maps searches, keep proxyMode as location so the browser service can create a fresh residential proxy ID for that market; pass proxyZip only when a specific ZIP or city-center ZIP is known. Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
1435
1586
  inputSchema: MapsSearchInputSchema,
1436
1587
  outputSchema: MapsSearchOutputSchema,
1437
1588
  annotations: liveWebToolAnnotations("Google Maps Business Search")
1438
1589
  }, async (input) => formatMapsSearch(await executor2.mapsSearch(input), input));
1590
+ server2.registerTool("directory_workflow", {
1591
+ title: "Directory Workflow: Markets + Maps",
1592
+ description: withReportNote('Build directory/prospecting datasets by selecting US city markets from the free Census Population Estimates city/place dataset, optionally joining configured US ZIPS/Lead Magician ZIP groups, then running Google Maps business searches for each city in parallel. Use this when the user wants "all cities over 100k population in a state", "build a directory CSV", "find markets then get Maps data", or similar location-database + Maps workflows. Set minPopulation, state, query, maxResultsPerCity, and concurrency. Use concurrency up to 5 for parallel city sessions. Keep proxyMode as location so each city can use a fresh residential proxy ID when the browser service is available; retryable city failures use fresh proxies across attempts. Saved CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, cid, cid_decimal, city population, and ZIP groups. This workflow captures star ratings from Maps list cards, not profile review counts; use maps_place_intel only when a selected profile needs deeper review details. For local Lead Magician ZIP enrichment, set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath only in local/test mode.'),
1593
+ inputSchema: DirectoryWorkflowInputSchema,
1594
+ outputSchema: DirectoryWorkflowOutputSchema,
1595
+ annotations: liveWebToolAnnotations("Directory Workflow: Markets + Maps")
1596
+ }, async (input) => formatDirectoryWorkflow(await executor2.directoryWorkflow(input), input));
1439
1597
  server2.registerTool("credits_info", {
1440
1598
  title: "MCP Scraper Credits & Costs",
1441
1599
  description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",