mcp-scraper 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -8
- package/dist/bin/api-server.cjs +4691 -3614
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/browser-agent-stdio-server.cjs +85 -8
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
- package/dist/bin/browser-agent-stdio-server.js +83 -6
- package/dist/bin/browser-agent-stdio-server.js.map +1 -1
- package/dist/bin/mcp-stdio-server.cjs +170 -12
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -3
- package/dist/bin/paa-harvest.cjs +223 -74
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -2
- package/dist/{chunk-GXBT5CDU.js → chunk-IQOCZGJJ.js} +39 -2
- package/dist/chunk-IQOCZGJJ.js.map +1 -0
- package/dist/{chunk-BMVQB3WN.js → chunk-KIF4PKFZ.js} +173 -14
- package/dist/chunk-KIF4PKFZ.js.map +1 -0
- package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
- package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
- package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
- package/dist/chunk-MY3S7EX7.js.map +1 -0
- package/dist/chunk-PYBMZ346.js +7 -0
- package/dist/chunk-PYBMZ346.js.map +1 -0
- package/dist/index.cjs +223 -74
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -2
- package/dist/{server-ASCMKUQ5.js → server-3QMDOEOS.js} +880 -181
- package/dist/server-3QMDOEOS.js.map +1 -0
- package/dist/{worker-KJ4A7WIR.js → worker-NAKGTIF5.js} +4 -4
- package/package.json +1 -1
- package/dist/chunk-2BS7BUEE.js +0 -7
- package/dist/chunk-2BS7BUEE.js.map +0 -1
- package/dist/chunk-BMVQB3WN.js.map +0 -1
- package/dist/chunk-GXBT5CDU.js.map +0 -1
- package/dist/chunk-TM22BLWP.js.map +0 -1
- package/dist/server-ASCMKUQ5.js.map +0 -1
- /package/dist/{worker-KJ4A7WIR.js.map → worker-NAKGTIF5.js.map} +0 -0
package/dist/bin/paa-harvest.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import {
|
|
2
2
|
CaptchaError,
|
|
3
|
+
LocationMismatchError,
|
|
3
4
|
RequestAbortedError
|
|
4
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-M2S27J6Z.js";
|
|
5
6
|
import {
|
|
6
7
|
finishHarvestAttempt,
|
|
7
8
|
startHarvestAttempt
|
|
@@ -207,6 +208,12 @@ function looksLikeTimeout(err, message) {
|
|
|
207
208
|
function looksLikeCaptcha(message) {
|
|
208
209
|
return /captcha|recaptcha|unusual traffic|google\.com\/sorry|blocked/i.test(message);
|
|
209
210
|
}
|
|
211
|
+
function looksLikeProxyTunnelFailure(message) {
|
|
212
|
+
return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
|
|
213
|
+
}
|
|
214
|
+
function looksLikeProxyUnavailable(message) {
|
|
215
|
+
return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
|
|
216
|
+
}
|
|
210
217
|
function classifyHarvestProblem(err) {
|
|
211
218
|
const message = errorMessage(err);
|
|
212
219
|
if (err instanceof RequestAbortedError) {
|
|
@@ -229,6 +236,36 @@ function classifyHarvestProblem(err) {
|
|
|
229
236
|
terminalStatus: "failed"
|
|
230
237
|
};
|
|
231
238
|
}
|
|
239
|
+
if (err instanceof LocationMismatchError) {
|
|
240
|
+
return {
|
|
241
|
+
error_code: "location_mismatch",
|
|
242
|
+
error_type: "location_mismatch",
|
|
243
|
+
message,
|
|
244
|
+
retryable: true,
|
|
245
|
+
httpStatus: 503,
|
|
246
|
+
terminalStatus: "failed"
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
if (looksLikeProxyTunnelFailure(message)) {
|
|
250
|
+
return {
|
|
251
|
+
error_code: "proxy_tunnel_failed",
|
|
252
|
+
error_type: "proxy_tunnel_failed",
|
|
253
|
+
message,
|
|
254
|
+
retryable: true,
|
|
255
|
+
httpStatus: 503,
|
|
256
|
+
terminalStatus: "failed"
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
if (looksLikeProxyUnavailable(message)) {
|
|
260
|
+
return {
|
|
261
|
+
error_code: "proxy_unavailable",
|
|
262
|
+
error_type: "proxy_unavailable",
|
|
263
|
+
message,
|
|
264
|
+
retryable: true,
|
|
265
|
+
httpStatus: 503,
|
|
266
|
+
terminalStatus: "failed"
|
|
267
|
+
};
|
|
268
|
+
}
|
|
232
269
|
if (looksLikeTimeout(err, message)) {
|
|
233
270
|
return {
|
|
234
271
|
error_code: "harvest_timeout",
|
|
@@ -319,4 +356,4 @@ export {
|
|
|
319
356
|
harvestProblemResponse,
|
|
320
357
|
createHarvestAttemptRecorder
|
|
321
358
|
};
|
|
322
|
-
//# sourceMappingURL=chunk-
|
|
359
|
+
//# sourceMappingURL=chunk-IQOCZGJJ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/api/rates.ts","../src/api/harvest-problems.ts","../src/api/harvest-attempt-events.ts"],"sourcesContent":["export const MC_COSTS = {\n serp: 100,\n paa: 100,\n page_scrape: 100,\n url_map: 2_000,\n yt_channel: 50,\n yt_transcription: 200,\n fb_ad: 50,\n maps_search: 2_000,\n maps_place: 2_000,\n maps_review: 50,\n fb_search: 50,\n fb_transcribe: 50,\n browser_minute: 4_000,\n} as const\n\nexport type McCostKey = keyof typeof MC_COSTS\n\nexport const MC_PER_BROWSER_MS = MC_COSTS.browser_minute / 60_000\n\nexport function browserActiveCostMc(activeMs: number): number {\n return Math.round(activeMs * MC_PER_BROWSER_MS)\n}\n\nexport const BROWSER_OPEN_MIN_BALANCE_MC = 1_000\n\nexport const MC_PER_CREDIT = 1_000\n\nexport const CREDIT_COST_CATALOG: Array<{\n key: McCostKey\n label: string\n aliases: string[]\n credits: number\n unit: string\n notes?: string\n}> = [\n {\n key: 'serp',\n label: 'SERP search',\n aliases: ['search_serp', 'serp', 'google search', 'organic results'],\n credits: mcToCredits(MC_COSTS.serp),\n unit: 'per search',\n notes: 'Returns AI Overview, PAA snippet, videos, forums, and local pack.',\n },\n {\n key: 'paa',\n label: 'PAA harvest',\n aliases: ['harvest_paa', 'paa', 'people also ask', 'questions'],\n credits: mcToCredits(MC_COSTS.paa),\n unit: 'per extracted question',\n notes: 'Includes full SERP feature extraction. Billed on actual questions returned — no cap enforced.',\n },\n {\n key: 'page_scrape',\n label: 'Page crawl / extract',\n aliases: ['extract_url', 'extract_site', 'page scrape', 'url scrape', 'single page', 'site crawl'],\n credits: mcToCredits(MC_COSTS.page_scrape),\n unit: 'per page',\n notes: 'Applies to both single-URL extraction and per-page site crawls.',\n },\n {\n key: 'url_map',\n label: 'Site URL mapping',\n aliases: ['map_site_urls', 'url map', 'site map', 'crawl urls'],\n credits: mcToCredits(MC_COSTS.url_map),\n unit: 'per mapping operation',\n notes: 'Flat rate for the full /map-urls call regardless of URL count discovered.',\n },\n {\n key: 'yt_channel',\n label: 'YouTube search / channel harvest',\n aliases: ['youtube_harvest', 'youtube search', 'youtube channel', 'yt_channel'],\n credits: mcToCredits(MC_COSTS.yt_channel),\n unit: 'per call',\n },\n {\n key: 'yt_transcription',\n label: 'YouTube transcription',\n aliases: ['youtube_transcribe', 'youtube transcript', 'transcription', 'yt_transcription'],\n credits: mcToCredits(MC_COSTS.yt_transcription),\n unit: 'per minute',\n notes: 'A 5-minute hold is taken, then reconciled to actual video duration.',\n },\n {\n key: 'fb_ad',\n label: 'Facebook search / ad lookup',\n aliases: ['facebook_page_intel', 'facebook_ad_search', 'facebook_ad', 'facebook ads', 'fb ads'],\n credits: mcToCredits(MC_COSTS.fb_ad),\n unit: 'per call',\n },\n {\n key: 'maps_search',\n label: 'Maps business search',\n aliases: ['maps_search', 'google maps search', 'gmb search', 'gbp search', 'business profiles'],\n credits: mcToCredits(MC_COSTS.maps_search),\n unit: 'per search',\n notes: 'Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses.',\n },\n {\n key: 'maps_place',\n label: 'Maps business lookup',\n aliases: ['maps_place_intel', 'google maps', 'maps place', 'place intel'],\n credits: mcToCredits(MC_COSTS.maps_place),\n unit: 'per business',\n notes: 'Base lookup. Reviews billed separately per card at maps_review rate.',\n },\n {\n key: 'maps_review',\n label: 'Maps review',\n aliases: ['maps_reviews', 'google reviews', 'review cards', 'reviews'],\n credits: mcToCredits(MC_COSTS.maps_review),\n unit: 'per review card',\n notes: 'Charged after extraction when includeReviews is true.',\n },\n {\n key: 'fb_search',\n label: 'Facebook ad library search',\n aliases: ['facebook_search', 'fb_search', 'fb ad search'],\n credits: mcToCredits(MC_COSTS.fb_search),\n unit: 'per search',\n notes: 'Browser automation to search Facebook Ads Library by keyword.',\n },\n {\n key: 'fb_transcribe',\n label: 'Facebook ad transcription',\n aliases: ['facebook_transcribe', 'fb_transcribe', 'fb ad transcript'],\n credits: mcToCredits(MC_COSTS.fb_transcribe),\n unit: 'per call',\n notes: 'Whisper transcription of Facebook ad video via fal.ai.',\n },\n {\n key: 'browser_minute',\n label: 'Interactive browser session',\n aliases: ['browser_open', 'browser agent', 'browser_agent', 'live browser', 'browse', 'browser control', 'interactive browser'],\n credits: mcToCredits(MC_COSTS.browser_minute),\n unit: 'per minute of active time',\n notes: 'Metered per second of active browser work (navigation, clicks, typing, screenshots). Idle and standby time are free. Billed against your balance as you act; close the session to stop the meter.',\n },\n]\n\nexport const CONCURRENCY_PRICE_ID = 'price_1Ta1NRS8aAcsk3TGwsRnYbix'\n\nexport const FREE_SIGNUP_MC = 500_000\nexport const FREE_MONTHLY_REFRESH_MC = 250_000\n\nexport const BALANCE_PRICE_IDS: Record<string, number> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': 11_000_000,\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': 27_500_000,\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': 60_500_000,\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': 121_000_000,\n}\n\nexport const BALANCE_PACK_LABELS: Record<string, string> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': '$10',\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': '$25',\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': '$50',\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': '$100',\n}\n\nexport function mcToCredits(mc: number): number {\n return mc / MC_PER_CREDIT\n}\n\nexport function insufficientBalanceResponse(balanceMc: number, requiredMc: number) {\n const topupUrl = process.env.TOPUP_URL ?? 'https://mcpscraper.dev/billing'\n const balanceCredits = mcToCredits(balanceMc)\n const requiredCredits = mcToCredits(requiredMc)\n return {\n error: 'insufficient_balance',\n error_code: 'insufficient_balance' as const,\n message: `Insufficient credits. Balance: ${balanceCredits} credits. This call requires ${requiredCredits} credits. Top up at ${topupUrl}`,\n balance_credits: balanceCredits,\n required_credits: requiredCredits,\n topup_url: topupUrl,\n }\n}\n\nexport const LedgerOperation = {\n TOPUP: 'topup',\n SIGNUP_GRANT: 'signup_grant',\n MONTHLY_REFRESH: 'monthly_free_refresh',\n PAA: 'paa',\n PAA_REFUND: 'paa_refund',\n SERP: 'serp',\n REFUND: 'refund',\n TRANSCRIPTION: 'transcription',\n TRANSCRIPTION_HOLD: 'transcription_hold',\n TRANSCRIPTION_REFUND: 'transcription_refund',\n YT_CHANNEL: 'yt_channel',\n FB_AD: 'fb_ad',\n MAPS_SEARCH: 'maps_search',\n MAPS_PLACE: 'maps_place',\n MAPS_REVIEW: 'maps_review',\n MAPS_REVIEW_REFUND: 'maps_review_refund',\n EXTRACT_SITE: 'extract_site',\n EXTRACT_SITE_REFUND: 'extract_site_refund',\n EXTRACT_URL: 'page_scrape',\n URL_MAP: 'url_map',\n EXTRACT_SITE_HOLD: 'extract_site_hold',\n YT_CHANNEL_REFUND: 'yt_channel_refund',\n FB_AD_REFUND: 'fb_ad_refund',\n URL_MAP_REFUND: 'url_map_refund',\n FB_SEARCH: 'fb_search',\n FB_TRANSCRIBE: 'fb_transcribe',\n FB_SEARCH_REFUND: 'fb_search_refund',\n FB_TRANSCRIBE_REFUND: 'fb_transcribe_refund',\n BROWSER_SESSION: 'browser_session',\n} as const\n\nexport type LedgerOperation = typeof LedgerOperation[keyof typeof LedgerOperation]\n","import { CaptchaError, LocationMismatchError, RequestAbortedError } from '../errors.js'\n\nexport type HarvestProblemCode =\n | 'request_aborted'\n | 'captcha_exhausted'\n | 'location_mismatch'\n | 'proxy_tunnel_failed'\n | 'proxy_unavailable'\n | 'harvest_timeout'\n | 'extraction_failed'\n\nexport interface HarvestProblem {\n error_code: HarvestProblemCode\n error_type: string\n message: string\n retryable: boolean\n httpStatus: number\n terminalStatus: 'cancelled' | 'failed'\n}\n\nfunction errorMessage(err: unknown): string {\n return err instanceof Error ? err.message : String(err)\n}\n\nfunction looksLikeTimeout(err: unknown, message: string): boolean {\n if (err instanceof DOMException && (err.name === 'TimeoutError' || err.name === 'AbortError')) return true\n return /timeout|timed out|Timeout \\d+ms exceeded|deadline/i.test(message)\n}\n\nfunction looksLikeCaptcha(message: string): boolean {\n return /captcha|recaptcha|unusual traffic|google\\.com\\/sorry|blocked/i.test(message)\n}\n\nfunction looksLikeProxyTunnelFailure(message: string): boolean {\n return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message)\n}\n\nfunction looksLikeProxyUnavailable(message: string): boolean {\n return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message)\n}\n\nexport function classifyHarvestProblem(err: unknown): HarvestProblem {\n const message = errorMessage(err)\n\n if (err instanceof RequestAbortedError) {\n return {\n error_code: 'request_aborted',\n error_type: 'request_aborted',\n message,\n retryable: true,\n httpStatus: 408,\n terminalStatus: 'cancelled',\n }\n }\n\n if (err instanceof CaptchaError || looksLikeCaptcha(message)) {\n return {\n error_code: 'captcha_exhausted',\n error_type: 'captcha',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (err instanceof LocationMismatchError) {\n return {\n error_code: 'location_mismatch',\n error_type: 'location_mismatch',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyTunnelFailure(message)) {\n return {\n error_code: 'proxy_tunnel_failed',\n error_type: 'proxy_tunnel_failed',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyUnavailable(message)) {\n return {\n error_code: 'proxy_unavailable',\n error_type: 'proxy_unavailable',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeTimeout(err, message)) {\n return {\n error_code: 'harvest_timeout',\n error_type: 'timeout',\n message,\n retryable: true,\n httpStatus: 504,\n terminalStatus: 'failed',\n }\n }\n\n return {\n error_code: 'extraction_failed',\n error_type: 'extraction',\n message,\n retryable: false,\n httpStatus: 500,\n terminalStatus: 'failed',\n }\n}\n\nexport function serializeHarvestProblem(problem: HarvestProblem): string {\n return JSON.stringify({\n error_code: problem.error_code,\n error_type: problem.error_type,\n message: problem.message,\n retryable: problem.retryable,\n })\n}\n\nexport function harvestProblemResponse(problem: HarvestProblem): {\n error: string\n error_code: HarvestProblemCode\n error_type: string\n retryable: boolean\n} {\n return {\n error: problem.message,\n error_code: problem.error_code,\n error_type: problem.error_type,\n retryable: problem.retryable,\n }\n}\n","import type { HarvestAttemptLogEvent } from '../harvest.js'\nimport { finishHarvestAttempt, startHarvestAttempt } from './db.js'\n\nexport function createHarvestAttemptRecorder(jobId: string, userId: number | bigint) {\n return async (event: HarvestAttemptLogEvent): Promise<void> => {\n if (event.type === 'started') {\n await startHarvestAttempt({\n jobId,\n userId,\n attemptNumber: event.attemptNumber,\n maxAttempts: event.maxAttempts,\n query: event.query,\n location: event.location,\n maxQuestions: event.maxQuestions,\n startedAt: event.startedAt,\n })\n return\n }\n\n await finishHarvestAttempt({\n jobId,\n attemptNumber: event.attemptNumber,\n outcome: event.outcome,\n kernelSessionId: event.kernelSessionId,\n questionCount: event.questionCount,\n durationMs: event.durationMs,\n error: event.error,\n willRetry: event.willRetry,\n kernelDeleteStarted: event.cleanup.kernelDeleteStarted,\n kernelDeleteSucceeded: event.cleanup.kernelDeleteSucceeded,\n kernelDeleteError: event.cleanup.kernelDeleteError,\n browserCloseSucceeded: event.cleanup.browserCloseSucceeded,\n browserCloseError: event.cleanup.browserCloseError,\n debug: event.debug,\n completedAt: event.completedAt,\n })\n }\n}\n"],"mappings":";;;;;;;;;;;AAAO,IAAM,WAAW;AAAA,EACtB,MAAmB;AAAA,EACnB,KAAmB;AAAA,EACnB,aAAmB;AAAA,EACnB,SAAiB;AAAA,EACjB,YAAoB;AAAA,EACpB,kBAAmB;AAAA,EACnB,OAAoB;AAAA,EACpB,aAAkB;AAAA,EAClB,YAAiB;AAAA,EACjB,aAAoB;AAAA,EACpB,WAAoB;AAAA,EACpB,eAAoB;AAAA,EACpB,gBAAiB;AACnB;AAIO,IAAM,oBAAoB,SAAS,iBAAiB;AAEpD,SAAS,oBAAoB,UAA0B;AAC5D,SAAO,KAAK,MAAM,WAAW,iBAAiB;AAChD;AAEO,IAAM,8BAA8B;AAEpC,IAAM,gBAAgB;AAEtB,IAAM,sBAOR;AAAA,EACH;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,QAAQ,iBAAiB,iBAAiB;AAAA,IACnE,SAAS,YAAY,SAAS,IAAI;AAAA,IAClC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,OAAO,mBAAmB,WAAW;AAAA,IAC9D,SAAS,YAAY,SAAS,GAAG;AAAA,IACjC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,gBAAgB,eAAe,cAAc,eAAe,YAAY;AAAA,IACjG,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,iBAAiB,WAAW,YAAY,YAAY;AAAA,IAC9D,SAAS,YAAY,SAAS,OAAO;AAAA,IACrC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,kBAAkB,mBAAmB,YAAY;AAAA,IAC9E,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,sBAAsB,sBAAsB,iBAAiB,kBAAkB;AAAA,IACzF,SAAS,YAAY,SAAS,gBAAgB;AAAA,IAC9C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,sBAAsB,eAAe,gBAAgB,QAAQ;AAAA,IAC9F,SAAS,YAAY,SAAS,KAAK;AAAA,IACnC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,sBAAsB,cAAc,cAAc,mBAAmB;AAAA,IAC9F,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,oBAAoB,eAAe,cAAc,aAAa;AAAA,IACxE,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,kBAAkB,gBAAgB,SAAS;AAAA,IACrE,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,aAAa,cAAc;AAAA,IACxD,SAAS,YAAY,SAAS,SAAS;AAAA,IACvC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,iBAAiB,kBAAkB;AAAA,IACpE,SAAS,YAAY,SAAS,aAAa;AAAA,IAC3C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,iBAAiB,iBAAiB,gBAAgB,UAAU,mBAAmB,qBAAqB;AAAA,IAC9H,SAAS,YAAY,SAAS,cAAc;AAAA,IAC5C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AACF;AAEO,IAAM,uBAAuB;AAE7B,IAAM,iBAAiB;AACvB,IAAM,0BAA0B;AAEhC,IAAM,oBAA4C;AAAA,EACvD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,IAAM,sBAA8C;AAAA,EACzD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,SAAS,YAAY,IAAoB;AAC9C,SAAO,KAAK;AACd;AAEO,SAAS,4BAA4B,WAAmB,YAAoB;AACjF,QAAM,WAAW,QAAQ,IAAI,aAAa;AAC1C,QAAM,iBAAiB,YAAY,SAAS;AAC5C,QAAM,kBAAkB,YAAY,UAAU;AAC9C,SAAO;AAAA,IACL,OAAO;AAAA,IACP,YAAY;AAAA,IACZ,SAAS,kCAAkC,cAAc,gCAAgC,eAAe,uBAAuB,QAAQ;AAAA,IACvI,iBAAiB;AAAA,IACjB,kBAAkB;AAAA,IAClB,WAAW;AAAA,EACb;AACF;AAEO,IAAM,kBAAkB;AAAA,EAC7B,OAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,iBAAuB;AAAA,EACvB,KAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,MAAuB;AAAA,EACvB,QAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,OAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,qBAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,SAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,gBAAuB;AAAA,EACvB,WAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,kBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,iBAAuB;AACzB;;;AC3LA,SAAS,aAAa,KAAsB;AAC1C,SAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AACxD;AAEA,SAAS,iBAAiB,KAAc,SAA0B;AAChE,MAAI,eAAe,iBAAiB,IAAI,SAAS,kBAAkB,IAAI,SAAS,cAAe,QAAO;AACtG,SAAO,qDAAqD,KAAK,OAAO;AAC1E;AAEA,SAAS,iBAAiB,SAA0B;AAClD,SAAO,gEAAgE,KAAK,OAAO;AACrF;AAEA,SAAS,4BAA4B,SAA0B;AAC7D,SAAO,gKAAgK,KAAK,OAAO;AACrL;AAEA,SAAS,0BAA0B,SAA0B;AAC3D,SAAO,4GAA4G,KAAK,OAAO;AACjI;AAEO,SAAS,uBAAuB,KAA8B;AACnE,QAAM,UAAU,aAAa,GAAG;AAEhC,MAAI,eAAe,qBAAqB;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,gBAAgB,iBAAiB,OAAO,GAAG;AAC5D,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,uBAAuB;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,4BAA4B,OAAO,GAAG;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,0BAA0B,OAAO,GAAG;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,iBAAiB,KAAK,OAAO,GAAG;AAClC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,YAAY;AAAA,IACZ,YAAY;AAAA,IACZ;AAAA,IACA,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,gBAAgB;AAAA,EAClB;AACF;AAEO,SAAS,wBAAwB,SAAiC;AACvE,SAAO,KAAK,UAAU;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,SAAS,QAAQ;AAAA,IACjB,WAAW,QAAQ;AAAA,EACrB,CAAC;AACH;AAEO,SAAS,uBAAuB,SAKrC;AACA,SAAO;AAAA,IACL,OAAO,QAAQ;AAAA,IACf,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,WAAW,QAAQ;AAAA,EACrB;AACF;;;AC1IO,SAAS,6BAA6B,OAAe,QAAyB;AACnF,SAAO,OAAO,UAAiD;AAC7D,QAAI,MAAM,SAAS,WAAW;AAC5B,YAAM,oBAAoB;AAAA,QACxB;AAAA,QACA;AAAA,QACA,eAAe,MAAM;AAAA,QACrB,aAAa,MAAM;AAAA,QACnB,OAAO,MAAM;AAAA,QACb,UAAU,MAAM;AAAA,QAChB,cAAc,MAAM;AAAA,QACpB,WAAW,MAAM;AAAA,MACnB,CAAC;AACD;AAAA,IACF;AAEA,UAAM,qBAAqB;AAAA,MACzB;AAAA,MACA,eAAe,MAAM;AAAA,MACrB,SAAS,MAAM;AAAA,MACf,iBAAiB,MAAM;AAAA,MACvB,eAAe,MAAM;AAAA,MACrB,YAAY,MAAM;AAAA,MAClB,OAAO,MAAM;AAAA,MACb,WAAW,MAAM;AAAA,MACjB,qBAAqB,MAAM,QAAQ;AAAA,MACnC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,OAAO,MAAM;AAAA,MACb,aAAa,MAAM;AAAA,IACrB,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
PACKAGE_VERSION
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-PYBMZ346.js";
|
|
4
4
|
import {
|
|
5
5
|
sanitizeVendorName
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-M2S27J6Z.js";
|
|
7
7
|
|
|
8
8
|
// src/harvest-timeout.ts
|
|
9
9
|
var VERCEL_FUNCTION_MAX_MS = 3e5;
|
|
@@ -688,6 +688,11 @@ function formatMapsSearch(raw, input) {
|
|
|
688
688
|
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
689
689
|
const d = parsed.data;
|
|
690
690
|
const results = d.results ?? [];
|
|
691
|
+
const normalizedResults = results.map((result) => ({
|
|
692
|
+
...result,
|
|
693
|
+
phone: result.phone ?? null,
|
|
694
|
+
hoursStatus: result.hoursStatus ?? null
|
|
695
|
+
}));
|
|
691
696
|
const searchQuery = d.searchQuery ?? [input.query, input.location].filter(Boolean).join(" ");
|
|
692
697
|
const requestedMax = d.requestedMaxResults ?? input.maxResults ?? 10;
|
|
693
698
|
const durationMs = d.durationMs;
|
|
@@ -727,7 +732,79 @@ ${rows}`,
|
|
|
727
732
|
extractedAt: d.extractedAt,
|
|
728
733
|
requestedMaxResults: requestedMax,
|
|
729
734
|
resultCount: results.length,
|
|
730
|
-
results,
|
|
735
|
+
results: normalizedResults,
|
|
736
|
+
durationMs: durationMs ?? 0
|
|
737
|
+
}
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
function formatDirectoryWorkflow(raw, input) {
|
|
741
|
+
const parsed = parseData(raw);
|
|
742
|
+
if ("error" in parsed) return { content: [{ type: "text", text: parsed.error }], isError: true };
|
|
743
|
+
const d = parsed.data;
|
|
744
|
+
const cities = (d.cities ?? []).map((city) => ({
|
|
745
|
+
...city,
|
|
746
|
+
results: city.results.map((result) => ({
|
|
747
|
+
...result,
|
|
748
|
+
phone: result.phone ?? null,
|
|
749
|
+
hoursStatus: result.hoursStatus ?? null
|
|
750
|
+
}))
|
|
751
|
+
}));
|
|
752
|
+
const warnings = d.warnings ?? [];
|
|
753
|
+
const csvPath = d.csvPath ?? null;
|
|
754
|
+
const totalResultCount = d.totalResultCount ?? cities.reduce((sum, city) => sum + city.resultCount, 0);
|
|
755
|
+
const durationMs = d.durationMs;
|
|
756
|
+
const marketRows = cities.map((city) => {
|
|
757
|
+
const zips = city.zips?.length ? city.zips.slice(0, 8).join(" ") + (city.zips.length > 8 ? ` +${city.zips.length - 8}` : "") : "\u2014";
|
|
758
|
+
return `| ${cell(city.city)} | ${city.population.toLocaleString()} | ${city.zips?.length ?? 0} | ${city.resultCount} | ${city.status} | ${cell(zips)} |`;
|
|
759
|
+
}).join("\n");
|
|
760
|
+
const businessRows = cities.flatMap((city) => city.results.slice(0, 3).map((result) => ({ city, result }))).map(({ city, result }) => {
|
|
761
|
+
const rating = [result.rating, result.reviewCount ? `(${result.reviewCount})` : null].filter(Boolean).join(" ");
|
|
762
|
+
return `| ${cell(city.city)} | ${result.position} | ${cell(result.name)} | ${cell(result.category)} | ${cell(rating)} | ${result.websiteUrl ? `[site](${result.websiteUrl})` : "\u2014"} | [maps](${result.placeUrl}) |`;
|
|
763
|
+
}).join("\n");
|
|
764
|
+
const warningText = warnings.length ? `
|
|
765
|
+
## Warnings
|
|
766
|
+
${warnings.map((w) => `- ${w}`).join("\n")}` : "";
|
|
767
|
+
const csvText = csvPath ? `
|
|
768
|
+
**CSV:** \`${csvPath}\`` : "";
|
|
769
|
+
const full = [
|
|
770
|
+
`# Directory Workflow: ${input.query}`,
|
|
771
|
+
`**Markets:** ${cities.length} \xB7 **Maps results:** ${totalResultCount} \xB7 **State:** ${d.state ?? input.state ?? "US"} \xB7 **Population threshold:** ${d.minPopulation ?? input.minPopulation ?? 1e5}`,
|
|
772
|
+
csvText,
|
|
773
|
+
`
|
|
774
|
+
## Markets
|
|
775
|
+
| City | Population | ZIPs | Maps Results | Status | ZIP Sample |
|
|
776
|
+
|---|---:|---:|---:|---|---|
|
|
777
|
+
${marketRows}`,
|
|
778
|
+
businessRows ? `
|
|
779
|
+
## Top Candidates By City
|
|
780
|
+
| City | # | Name | Category | Rating | Website | Maps |
|
|
781
|
+
|---|---:|---|---|---|---|---|
|
|
782
|
+
${businessRows}` : null,
|
|
783
|
+
warningText,
|
|
784
|
+
`
|
|
785
|
+
## Sources
|
|
786
|
+
- Population: ${d.censusSourceUrl ?? "Census Population Estimates Program"}
|
|
787
|
+
- ZIP groups: ${d.usZipsSourcePath ?? "not configured"}`,
|
|
788
|
+
durationMs != null ? `
|
|
789
|
+
*Completed in ${(durationMs / 1e3).toFixed(1)}s*` : null
|
|
790
|
+
].filter(Boolean).join("\n");
|
|
791
|
+
return {
|
|
792
|
+
...oneBlock(full),
|
|
793
|
+
structuredContent: {
|
|
794
|
+
query: d.query,
|
|
795
|
+
state: d.state,
|
|
796
|
+
minPopulation: d.minPopulation,
|
|
797
|
+
populationYear: d.populationYear,
|
|
798
|
+
maxResultsPerCity: d.maxResultsPerCity,
|
|
799
|
+
concurrency: d.concurrency,
|
|
800
|
+
censusSourceUrl: d.censusSourceUrl,
|
|
801
|
+
usZipsSourcePath: d.usZipsSourcePath ?? null,
|
|
802
|
+
warnings,
|
|
803
|
+
extractedAt: d.extractedAt,
|
|
804
|
+
selectedCityCount: d.selectedCityCount,
|
|
805
|
+
totalResultCount,
|
|
806
|
+
csvPath,
|
|
807
|
+
cities,
|
|
731
808
|
durationMs: durationMs ?? 0
|
|
732
809
|
}
|
|
733
810
|
};
|
|
@@ -893,8 +970,8 @@ var HarvestPaaInputSchema = {
|
|
|
893
970
|
gl: z.string().length(2).default("us").describe("Google country code inferred from location or user language. Examples: United States us, United Kingdom gb, Japan jp, Canada ca, Australia au."),
|
|
894
971
|
hl: z.string().default("en").describe("Google interface/content language inferred from the user request. Use en unless the user asks for another language or locale."),
|
|
895
972
|
device: z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
896
|
-
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default
|
|
897
|
-
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use
|
|
973
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
974
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
898
975
|
debug: z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior.")
|
|
899
976
|
};
|
|
900
977
|
var ExtractUrlInputSchema = {
|
|
@@ -951,7 +1028,25 @@ var MapsSearchInputSchema = {
|
|
|
951
1028
|
location: z.string().optional().describe('City, region, country, or service area for the Maps search, e.g. "Denver, CO". Infer from the user request when present.'),
|
|
952
1029
|
gl: z.string().length(2).default("us").describe("Google country code inferred from location."),
|
|
953
1030
|
hl: z.string().length(2).default("en").describe("Language inferred from user request."),
|
|
954
|
-
maxResults: z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more.")
|
|
1031
|
+
maxResults: z.number().int().min(1).max(50).default(10).describe("Number of Google Maps business/profile candidates to return. Default 10. Maximum 50. Use 10 unless the user asks for more."),
|
|
1032
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state Maps searches; it creates a fresh residential proxy ID when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1033
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or city-center ZIP."),
|
|
1034
|
+
debug: z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics when debugging Maps localization, CAPTCHA, or proxy behavior.")
|
|
1035
|
+
};
|
|
1036
|
+
var DirectoryWorkflowInputSchema = {
|
|
1037
|
+
query: z.string().min(1).describe("Business category, niche, or keyword to search on Google Maps for every selected market, e.g. roofers, dentists, med spas. Do not include the city here."),
|
|
1038
|
+
state: z.string().min(2).default("TN").describe("US state abbreviation or state name used to select Census places, e.g. TN or Tennessee."),
|
|
1039
|
+
minPopulation: z.number().int().min(0).default(1e5).describe('Minimum Census place population for market selection. Use 100000 for "cities above 100k population".'),
|
|
1040
|
+
populationYear: z.number().int().min(2020).max(2025).default(2025).describe("Census population estimate year from the 2020-2025 Population Estimates Program city/place dataset."),
|
|
1041
|
+
maxCities: z.number().int().min(1).max(100).default(25).describe("Maximum number of markets to process after sorting by population descending."),
|
|
1042
|
+
maxResultsPerCity: z.number().int().min(1).max(50).default(50).describe("Google Maps business/profile candidates to collect for each city. Maximum 50."),
|
|
1043
|
+
concurrency: z.number().int().min(1).max(5).default(5).describe("How many city Maps searches to run in parallel. Use 5 for broad directory batches unless debugging."),
|
|
1044
|
+
includeZipGroups: z.boolean().default(true).describe("Attach ZIP groups from a configured US ZIPS CSV when available. Set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath in local/test mode."),
|
|
1045
|
+
usZipsCsvPath: z.string().optional().describe("Local/test-only path to a US ZIPS CSV with state_abbr, zipcode, county, city columns, such as Lead Magician tools/analytics/data/uszips.csv. Deployed APIs should use MCP_SCRAPER_USZIPS_CSV_PATH instead."),
|
|
1046
|
+
saveCsv: z.boolean().default(true).describe("Save a directory-ready CSV to the MCP Scraper output directory and return its path. CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, CID fields, population, and ZIP groups."),
|
|
1047
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode for every city Maps search. Use location by default for US city/state batches; it creates fresh residential proxy IDs when the browser service is available. Use configured for the server proxy ID, and none only for local direct-network debugging."),
|
|
1048
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional ZIP override for proxy targeting. Normally omit it so each city can use its Lead Magician ZIP group or city/state location."),
|
|
1049
|
+
debug: z.boolean().default(false).describe("Include sanitized browser/proxy diagnostics in each Maps browser session when supported.")
|
|
955
1050
|
};
|
|
956
1051
|
var NullableString = z.string().nullable();
|
|
957
1052
|
var MapsSearchOutputSchema = {
|
|
@@ -972,12 +1067,62 @@ var MapsSearchOutputSchema = {
|
|
|
972
1067
|
reviewCount: NullableString,
|
|
973
1068
|
category: NullableString,
|
|
974
1069
|
address: NullableString,
|
|
1070
|
+
phone: NullableString,
|
|
1071
|
+
hoursStatus: NullableString,
|
|
975
1072
|
websiteUrl: NullableString,
|
|
976
1073
|
directionsUrl: NullableString,
|
|
977
1074
|
metadata: z.array(z.string())
|
|
978
1075
|
})),
|
|
979
1076
|
durationMs: z.number().int().min(0)
|
|
980
1077
|
};
|
|
1078
|
+
var DirectoryMapsBusinessOutput = z.object({
|
|
1079
|
+
position: z.number().int().min(1),
|
|
1080
|
+
name: z.string(),
|
|
1081
|
+
placeUrl: z.string().url(),
|
|
1082
|
+
cid: NullableString,
|
|
1083
|
+
cidDecimal: NullableString,
|
|
1084
|
+
rating: NullableString,
|
|
1085
|
+
reviewCount: NullableString,
|
|
1086
|
+
category: NullableString,
|
|
1087
|
+
address: NullableString,
|
|
1088
|
+
phone: NullableString,
|
|
1089
|
+
hoursStatus: NullableString,
|
|
1090
|
+
websiteUrl: NullableString,
|
|
1091
|
+
directionsUrl: NullableString,
|
|
1092
|
+
metadata: z.array(z.string())
|
|
1093
|
+
});
|
|
1094
|
+
var DirectoryWorkflowOutputSchema = {
|
|
1095
|
+
query: z.string(),
|
|
1096
|
+
state: z.string(),
|
|
1097
|
+
minPopulation: z.number().int().min(0),
|
|
1098
|
+
populationYear: z.number().int().min(2020).max(2025),
|
|
1099
|
+
maxResultsPerCity: z.number().int().min(1).max(50),
|
|
1100
|
+
concurrency: z.number().int().min(1).max(5),
|
|
1101
|
+
censusSourceUrl: z.string().url(),
|
|
1102
|
+
usZipsSourcePath: NullableString,
|
|
1103
|
+
warnings: z.array(z.string()),
|
|
1104
|
+
extractedAt: z.string(),
|
|
1105
|
+
selectedCityCount: z.number().int().min(0),
|
|
1106
|
+
totalResultCount: z.number().int().min(0),
|
|
1107
|
+
csvPath: NullableString,
|
|
1108
|
+
cities: z.array(z.object({
|
|
1109
|
+
city: z.string(),
|
|
1110
|
+
state: z.string(),
|
|
1111
|
+
location: z.string(),
|
|
1112
|
+
cityKey: z.string(),
|
|
1113
|
+
censusName: z.string(),
|
|
1114
|
+
population: z.number().int().min(0),
|
|
1115
|
+
populationYear: z.number().int().min(2020).max(2025),
|
|
1116
|
+
zips: z.array(z.string()),
|
|
1117
|
+
counties: z.array(z.string()),
|
|
1118
|
+
status: z.enum(["ok", "empty", "failed"]),
|
|
1119
|
+
error: NullableString,
|
|
1120
|
+
resultCount: z.number().int().min(0),
|
|
1121
|
+
durationMs: z.number().int().min(0),
|
|
1122
|
+
results: z.array(DirectoryMapsBusinessOutput)
|
|
1123
|
+
})),
|
|
1124
|
+
durationMs: z.number().int().min(0)
|
|
1125
|
+
};
|
|
981
1126
|
var OrganicResultOutput = z.object({
|
|
982
1127
|
position: z.number().int(),
|
|
983
1128
|
title: z.string(),
|
|
@@ -1157,8 +1302,8 @@ var SearchSerpInputSchema = {
|
|
|
1157
1302
|
gl: z.string().length(2).default("us").describe("Google country code inferred from location or user language."),
|
|
1158
1303
|
hl: z.string().default("en").describe("Google interface/content language inferred from user request."),
|
|
1159
1304
|
device: z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use desktop by default; use mobile only when the user asks for mobile rankings."),
|
|
1160
|
-
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default
|
|
1161
|
-
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use
|
|
1305
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy targeting mode. Use location by default for US city/state SERPs; it creates a fresh residential proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static configured proxy. Use none only for direct-network debugging."),
|
|
1306
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting. Use when the user gives a specific ZIP or when city-center targeting needs to be forced. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1162
1307
|
debug: z.boolean().default(false).describe("Include sanitized browser/session/location diagnostics in the response. Use true when debugging localization, CAPTCHA, or proxy behavior."),
|
|
1163
1308
|
pages: z.number().int().min(1).max(2).default(1).describe("Number of result pages to fetch (1\u20132)")
|
|
1164
1309
|
};
|
|
@@ -1168,8 +1313,8 @@ var CaptureSerpSnapshotInputSchema = {
|
|
|
1168
1313
|
gl: z.string().length(2).default("us").describe("Google country code inferred from the requested market, e.g. us, gb, ca, au."),
|
|
1169
1314
|
hl: z.string().default("en").describe("Google interface/content language inferred from the user request."),
|
|
1170
1315
|
device: z.enum(["desktop", "mobile"]).default("desktop").describe("SERP device context. Use mobile only when the user asks for mobile rankings or mobile SERP evidence."),
|
|
1171
|
-
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized residential proxy
|
|
1172
|
-
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed."),
|
|
1316
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location").describe("Proxy behavior for capture. Use location for localized US residential evidence; it creates a fresh proxy ID per attempt and retries CAPTCHA, proxy tunnel failure, and wrong-location evidence before returning. Use configured only for the static residential proxy, and none only for direct-network debugging."),
|
|
1317
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional().describe("Optional US ZIP override for residential location proxy targeting when a precise city-center or ZIP proxy is needed. With proxyMode location this ZIP is used for each fresh proxy attempt."),
|
|
1173
1318
|
pages: z.number().int().min(1).max(2).default(1).describe("Number of Google result pages to capture. Use 1 normally and 2 only when the user needs deeper ranking evidence."),
|
|
1174
1319
|
debug: z.boolean().default(false).describe("Include sanitized browser, proxy, and location diagnostics. Use true when debugging localization, CAPTCHA, proxy selection, or capture reliability."),
|
|
1175
1320
|
includePageSnapshots: z.boolean().default(false).describe("Also capture ranking-page snapshots for selected SERP URLs through the same product capture path."),
|
|
@@ -1244,14 +1389,14 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
|
1244
1389
|
if (savesReports) registerSavedReportResources(server);
|
|
1245
1390
|
server.registerTool("harvest_paa", {
|
|
1246
1391
|
title: "Google PAA + SERP Harvest",
|
|
1247
|
-
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1392
|
+
description: withReportNote('Best default tool for Google search research. Extracts People Also Ask questions plus answers/source URLs, organic SERP, local pack when present, entity IDs (CID/GCID/KG MID), and AI Overview. Infer the user language: split topic from location (e.g. "best hvac company in Denver CO" => query "best hvac company", location "Denver, CO", gl "us", hl "en"). For US local SERPs, leave proxyMode as location so the service uses fresh residential proxy IDs across retries and rejects wrong-location evidence instead of returning a bad market. Use maxQuestions 30 normally, 100-200 for "full", "deep", "all", or comprehensive research. Deep harvests above 100 questions can run for several minutes with no interim progress \u2014 warn the user before starting one and keep maxQuestions at or below 100 unless they explicitly want a deep harvest. Credits are charged by extracted question; unused request hold is refunded.'),
|
|
1248
1393
|
inputSchema: HarvestPaaInputSchema,
|
|
1249
1394
|
outputSchema: HarvestPaaOutputSchema,
|
|
1250
1395
|
annotations: liveWebToolAnnotations("Google PAA + SERP Harvest")
|
|
1251
1396
|
}, async (input) => formatHarvestPaa(await executor.harvestPaa(input), input));
|
|
1252
1397
|
server.registerTool("search_serp", {
|
|
1253
1398
|
title: "Google SERP Lookup",
|
|
1254
|
-
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request."),
|
|
1399
|
+
description: withReportNote("Fast Google SERP lookup without PAA expansion. Use when the user asks for rankings, organic results, local pack, quick SERP, or positions. Split topic from location and infer gl/hl from the user request. For US city/state rankings, keep proxyMode as location and pass proxyZip when a city-center ZIP is known; location mode uses fresh residential proxy IDs and retries CAPTCHA, proxy tunnel failures, and wrong-location evidence before returning."),
|
|
1255
1400
|
inputSchema: SearchSerpInputSchema,
|
|
1256
1401
|
outputSchema: SearchSerpOutputSchema,
|
|
1257
1402
|
annotations: liveWebToolAnnotations("Google SERP Lookup")
|
|
@@ -1319,11 +1464,18 @@ function buildPaaExtractorMcpServer(executor, options = {}) {
|
|
|
1319
1464
|
}, async (input) => formatMapsPlaceIntel(await executor.mapsPlaceIntel(input), input));
|
|
1320
1465
|
server.registerTool("maps_search", {
|
|
1321
1466
|
title: "Google Maps Business Search",
|
|
1322
|
-
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1467
|
+
description: withReportNote('Search Google Maps for multiple businesses/profiles by category, niche, keyword, or local market. Use this when the user asks for several Google Business Profiles, GMBs, GBPs, leads, prospects, competitors, or "more than the 3-pack." For US city/state Maps searches, keep proxyMode as location so the browser service can create a fresh residential proxy ID for that market; pass proxyZip only when a specific ZIP or city-center ZIP is known. Returns up to 50 candidates with names, place URLs, CIDs when available, ratings, review counts, and profile metadata. Default maxResults is 10; maximum is 50. Use maps_place_intel afterward only when a selected business needs full details and reviews.'),
|
|
1323
1468
|
inputSchema: MapsSearchInputSchema,
|
|
1324
1469
|
outputSchema: MapsSearchOutputSchema,
|
|
1325
1470
|
annotations: liveWebToolAnnotations("Google Maps Business Search")
|
|
1326
1471
|
}, async (input) => formatMapsSearch(await executor.mapsSearch(input), input));
|
|
1472
|
+
server.registerTool("directory_workflow", {
|
|
1473
|
+
title: "Directory Workflow: Markets + Maps",
|
|
1474
|
+
description: withReportNote('Build directory/prospecting datasets by selecting US city markets from the free Census Population Estimates city/place dataset, optionally joining configured US ZIPS/Lead Magician ZIP groups, then running Google Maps business searches for each city in parallel. Use this when the user wants "all cities over 100k population in a state", "build a directory CSV", "find markets then get Maps data", or similar location-database + Maps workflows. Set minPopulation, state, query, maxResultsPerCity, and concurrency. Use concurrency up to 5 for parallel city sessions. Keep proxyMode as location so each city can use a fresh residential proxy ID when the browser service is available; retryable city failures use fresh proxies across attempts. Saved CSV rows include source_location, result_position, business_name, review_stars, category, address, phone, hours_status, website_url, directions_url, place_url, cid, cid_decimal, city population, and ZIP groups. This workflow captures star ratings from Maps list cards, not profile review counts; use maps_place_intel only when a selected profile needs deeper review details. For local Lead Magician ZIP enrichment, set MCP_SCRAPER_USZIPS_CSV_PATH on the API server or pass usZipsCsvPath only in local/test mode.'),
|
|
1475
|
+
inputSchema: DirectoryWorkflowInputSchema,
|
|
1476
|
+
outputSchema: DirectoryWorkflowOutputSchema,
|
|
1477
|
+
annotations: liveWebToolAnnotations("Directory Workflow: Markets + Maps")
|
|
1478
|
+
}, async (input) => formatDirectoryWorkflow(await executor.directoryWorkflow(input), input));
|
|
1327
1479
|
server.registerTool("credits_info", {
|
|
1328
1480
|
title: "MCP Scraper Credits & Costs",
|
|
1329
1481
|
description: "Answer questions about MCP Scraper credits: current credit balance, what a specific tool/action costs, the full cost table, and optionally recent credit ledger entries. Does not expose payment methods or credit card information.",
|
|
@@ -1432,6 +1584,12 @@ var HttpMcpToolExecutor = class {
|
|
|
1432
1584
|
mapsSearch(input) {
|
|
1433
1585
|
return this.call("/maps/search", input);
|
|
1434
1586
|
}
|
|
1587
|
+
directoryWorkflow(input) {
|
|
1588
|
+
const cityCount = typeof input.maxCities === "number" ? input.maxCities : 25;
|
|
1589
|
+
const concurrency = typeof input.concurrency === "number" && input.concurrency > 0 ? input.concurrency : 5;
|
|
1590
|
+
const timeoutMs = this.httpTimeoutOverrideMs ?? Math.min(9e5, Math.max(18e4, Math.ceil(cityCount / concurrency) * 12e4));
|
|
1591
|
+
return this.call("/directory/run", input, timeoutMs);
|
|
1592
|
+
}
|
|
1435
1593
|
creditsInfo(input) {
|
|
1436
1594
|
return this.call("/billing/credits", input);
|
|
1437
1595
|
}
|
|
@@ -1446,10 +1604,11 @@ var HttpMcpToolExecutor = class {
|
|
|
1446
1604
|
export {
|
|
1447
1605
|
harvestTimeoutBudget,
|
|
1448
1606
|
configureReportSaving,
|
|
1607
|
+
outputBaseDir,
|
|
1449
1608
|
CaptureSerpSnapshotInputSchema,
|
|
1450
1609
|
CaptureSerpPageSnapshotsInputSchema,
|
|
1451
1610
|
liveWebToolAnnotations,
|
|
1452
1611
|
buildPaaExtractorMcpServer,
|
|
1453
1612
|
HttpMcpToolExecutor
|
|
1454
1613
|
};
|
|
1455
|
-
//# sourceMappingURL=chunk-
|
|
1614
|
+
//# sourceMappingURL=chunk-KIF4PKFZ.js.map
|