mcp-scraper 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +74 -8
  2. package/dist/bin/api-server.cjs +4691 -3614
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/browser-agent-stdio-server.cjs +85 -8
  6. package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
  7. package/dist/bin/browser-agent-stdio-server.js +83 -6
  8. package/dist/bin/browser-agent-stdio-server.js.map +1 -1
  9. package/dist/bin/mcp-stdio-server.cjs +170 -12
  10. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  11. package/dist/bin/mcp-stdio-server.js +3 -3
  12. package/dist/bin/paa-harvest.cjs +223 -74
  13. package/dist/bin/paa-harvest.cjs.map +1 -1
  14. package/dist/bin/paa-harvest.js +2 -2
  15. package/dist/{chunk-GXBT5CDU.js → chunk-IQOCZGJJ.js} +39 -2
  16. package/dist/chunk-IQOCZGJJ.js.map +1 -0
  17. package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
  18. package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
  19. package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
  20. package/dist/chunk-MY3S7EX7.js.map +1 -0
  21. package/dist/{chunk-BMVQB3WN.js → chunk-OR7DLLH2.js} +173 -14
  22. package/dist/chunk-OR7DLLH2.js.map +1 -0
  23. package/dist/chunk-XR65SANX.js +7 -0
  24. package/dist/chunk-XR65SANX.js.map +1 -0
  25. package/dist/index.cjs +223 -74
  26. package/dist/index.cjs.map +1 -1
  27. package/dist/index.d.cts +1 -0
  28. package/dist/index.d.ts +1 -0
  29. package/dist/index.js +2 -2
  30. package/dist/{server-ASCMKUQ5.js → server-CJMX2QUM.js} +880 -181
  31. package/dist/server-CJMX2QUM.js.map +1 -0
  32. package/dist/{worker-KJ4A7WIR.js → worker-NAKGTIF5.js} +4 -4
  33. package/package.json +1 -1
  34. package/dist/chunk-2BS7BUEE.js +0 -7
  35. package/dist/chunk-2BS7BUEE.js.map +0 -1
  36. package/dist/chunk-BMVQB3WN.js.map +0 -1
  37. package/dist/chunk-GXBT5CDU.js.map +0 -1
  38. package/dist/chunk-TM22BLWP.js.map +0 -1
  39. package/dist/server-ASCMKUQ5.js.map +0 -1
  40. /package/dist/{worker-KJ4A7WIR.js.map → worker-NAKGTIF5.js.map} +0 -0
@@ -2,8 +2,8 @@
2
2
  import {
3
3
  browserServiceApiKey,
4
4
  harvest
5
- } from "../chunk-TM22BLWP.js";
6
- import "../chunk-ZMOWIBMK.js";
5
+ } from "../chunk-MY3S7EX7.js";
6
+ import "../chunk-M2S27J6Z.js";
7
7
 
8
8
  // src/cli.ts
9
9
  import { Command } from "commander";
@@ -1,7 +1,8 @@
1
1
  import {
2
2
  CaptchaError,
3
+ LocationMismatchError,
3
4
  RequestAbortedError
4
- } from "./chunk-ZMOWIBMK.js";
5
+ } from "./chunk-M2S27J6Z.js";
5
6
  import {
6
7
  finishHarvestAttempt,
7
8
  startHarvestAttempt
@@ -207,6 +208,12 @@ function looksLikeTimeout(err, message) {
207
208
  function looksLikeCaptcha(message) {
208
209
  return /captcha|recaptcha|unusual traffic|google\.com\/sorry|blocked/i.test(message);
209
210
  }
211
+ function looksLikeProxyTunnelFailure(message) {
212
+ return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
213
+ }
214
+ function looksLikeProxyUnavailable(message) {
215
+ return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
216
+ }
210
217
  function classifyHarvestProblem(err) {
211
218
  const message = errorMessage(err);
212
219
  if (err instanceof RequestAbortedError) {
@@ -229,6 +236,36 @@ function classifyHarvestProblem(err) {
229
236
  terminalStatus: "failed"
230
237
  };
231
238
  }
239
+ if (err instanceof LocationMismatchError) {
240
+ return {
241
+ error_code: "location_mismatch",
242
+ error_type: "location_mismatch",
243
+ message,
244
+ retryable: true,
245
+ httpStatus: 503,
246
+ terminalStatus: "failed"
247
+ };
248
+ }
249
+ if (looksLikeProxyTunnelFailure(message)) {
250
+ return {
251
+ error_code: "proxy_tunnel_failed",
252
+ error_type: "proxy_tunnel_failed",
253
+ message,
254
+ retryable: true,
255
+ httpStatus: 503,
256
+ terminalStatus: "failed"
257
+ };
258
+ }
259
+ if (looksLikeProxyUnavailable(message)) {
260
+ return {
261
+ error_code: "proxy_unavailable",
262
+ error_type: "proxy_unavailable",
263
+ message,
264
+ retryable: true,
265
+ httpStatus: 503,
266
+ terminalStatus: "failed"
267
+ };
268
+ }
232
269
  if (looksLikeTimeout(err, message)) {
233
270
  return {
234
271
  error_code: "harvest_timeout",
@@ -319,4 +356,4 @@ export {
319
356
  harvestProblemResponse,
320
357
  createHarvestAttemptRecorder
321
358
  };
322
- //# sourceMappingURL=chunk-GXBT5CDU.js.map
359
+ //# sourceMappingURL=chunk-IQOCZGJJ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/api/rates.ts","../src/api/harvest-problems.ts","../src/api/harvest-attempt-events.ts"],"sourcesContent":["export const MC_COSTS = {\n serp: 100,\n paa: 100,\n page_scrape: 100,\n url_map: 2_000,\n yt_channel: 50,\n yt_transcription: 200,\n fb_ad: 50,\n maps_search: 2_000,\n maps_place: 2_000,\n maps_review: 50,\n fb_search: 50,\n fb_transcribe: 50,\n browser_minute: 4_000,\n} as const\n\nexport type McCostKey = keyof typeof MC_COSTS\n\nexport const MC_PER_BROWSER_MS = MC_COSTS.browser_minute / 60_000\n\nexport function browserActiveCostMc(activeMs: number): number {\n return Math.round(activeMs * MC_PER_BROWSER_MS)\n}\n\nexport const BROWSER_OPEN_MIN_BALANCE_MC = 1_000\n\nexport const MC_PER_CREDIT = 1_000\n\nexport const CREDIT_COST_CATALOG: Array<{\n key: McCostKey\n label: string\n aliases: string[]\n credits: number\n unit: string\n notes?: string\n}> = [\n {\n key: 'serp',\n label: 'SERP search',\n aliases: ['search_serp', 'serp', 'google search', 'organic results'],\n credits: mcToCredits(MC_COSTS.serp),\n unit: 'per search',\n notes: 'Returns AI Overview, PAA snippet, videos, forums, and local pack.',\n },\n {\n key: 'paa',\n label: 'PAA harvest',\n aliases: ['harvest_paa', 'paa', 'people also ask', 'questions'],\n credits: mcToCredits(MC_COSTS.paa),\n unit: 'per extracted question',\n notes: 'Includes full SERP feature extraction. Billed on actual questions returned — no cap enforced.',\n },\n {\n key: 'page_scrape',\n label: 'Page crawl / extract',\n aliases: ['extract_url', 'extract_site', 'page scrape', 'url scrape', 'single page', 'site crawl'],\n credits: mcToCredits(MC_COSTS.page_scrape),\n unit: 'per page',\n notes: 'Applies to both single-URL extraction and per-page site crawls.',\n },\n {\n key: 'url_map',\n label: 'Site URL mapping',\n aliases: ['map_site_urls', 'url map', 'site map', 'crawl urls'],\n credits: mcToCredits(MC_COSTS.url_map),\n unit: 'per mapping operation',\n notes: 'Flat rate for the full /map-urls call regardless of URL count discovered.',\n },\n {\n key: 'yt_channel',\n label: 'YouTube search / channel harvest',\n aliases: ['youtube_harvest', 'youtube search', 'youtube channel', 'yt_channel'],\n credits: mcToCredits(MC_COSTS.yt_channel),\n unit: 'per call',\n },\n {\n key: 'yt_transcription',\n label: 'YouTube transcription',\n aliases: ['youtube_transcribe', 'youtube transcript', 'transcription', 'yt_transcription'],\n credits: mcToCredits(MC_COSTS.yt_transcription),\n unit: 'per minute',\n notes: 'A 5-minute hold is taken, then reconciled to actual video duration.',\n },\n {\n key: 'fb_ad',\n label: 'Facebook search / ad lookup',\n aliases: ['facebook_page_intel', 'facebook_ad_search', 'facebook_ad', 'facebook ads', 'fb ads'],\n credits: mcToCredits(MC_COSTS.fb_ad),\n unit: 'per call',\n },\n {\n key: 'maps_search',\n label: 'Maps business search',\n aliases: ['maps_search', 'google maps search', 'gmb search', 'gbp search', 'business profiles'],\n credits: mcToCredits(MC_COSTS.maps_search),\n unit: 'per search',\n notes: 'Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses.',\n },\n {\n key: 'maps_place',\n label: 'Maps business lookup',\n aliases: ['maps_place_intel', 'google maps', 'maps place', 'place intel'],\n credits: mcToCredits(MC_COSTS.maps_place),\n unit: 'per business',\n notes: 'Base lookup. Reviews billed separately per card at maps_review rate.',\n },\n {\n key: 'maps_review',\n label: 'Maps review',\n aliases: ['maps_reviews', 'google reviews', 'review cards', 'reviews'],\n credits: mcToCredits(MC_COSTS.maps_review),\n unit: 'per review card',\n notes: 'Charged after extraction when includeReviews is true.',\n },\n {\n key: 'fb_search',\n label: 'Facebook ad library search',\n aliases: ['facebook_search', 'fb_search', 'fb ad search'],\n credits: mcToCredits(MC_COSTS.fb_search),\n unit: 'per search',\n notes: 'Browser automation to search Facebook Ads Library by keyword.',\n },\n {\n key: 'fb_transcribe',\n label: 'Facebook ad transcription',\n aliases: ['facebook_transcribe', 'fb_transcribe', 'fb ad transcript'],\n credits: mcToCredits(MC_COSTS.fb_transcribe),\n unit: 'per call',\n notes: 'Whisper transcription of Facebook ad video via fal.ai.',\n },\n {\n key: 'browser_minute',\n label: 'Interactive browser session',\n aliases: ['browser_open', 'browser agent', 'browser_agent', 'live browser', 'browse', 'browser control', 'interactive browser'],\n credits: mcToCredits(MC_COSTS.browser_minute),\n unit: 'per minute of active time',\n notes: 'Metered per second of active browser work (navigation, clicks, typing, screenshots). Idle and standby time are free. Billed against your balance as you act; close the session to stop the meter.',\n },\n]\n\nexport const CONCURRENCY_PRICE_ID = 'price_1Ta1NRS8aAcsk3TGwsRnYbix'\n\nexport const FREE_SIGNUP_MC = 500_000\nexport const FREE_MONTHLY_REFRESH_MC = 250_000\n\nexport const BALANCE_PRICE_IDS: Record<string, number> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': 11_000_000,\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': 27_500_000,\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': 60_500_000,\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': 121_000_000,\n}\n\nexport const BALANCE_PACK_LABELS: Record<string, string> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': '$10',\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': '$25',\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': '$50',\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': '$100',\n}\n\nexport function mcToCredits(mc: number): number {\n return mc / MC_PER_CREDIT\n}\n\nexport function insufficientBalanceResponse(balanceMc: number, requiredMc: number) {\n const topupUrl = process.env.TOPUP_URL ?? 'https://mcpscraper.dev/billing'\n const balanceCredits = mcToCredits(balanceMc)\n const requiredCredits = mcToCredits(requiredMc)\n return {\n error: 'insufficient_balance',\n error_code: 'insufficient_balance' as const,\n message: `Insufficient credits. Balance: ${balanceCredits} credits. This call requires ${requiredCredits} credits. Top up at ${topupUrl}`,\n balance_credits: balanceCredits,\n required_credits: requiredCredits,\n topup_url: topupUrl,\n }\n}\n\nexport const LedgerOperation = {\n TOPUP: 'topup',\n SIGNUP_GRANT: 'signup_grant',\n MONTHLY_REFRESH: 'monthly_free_refresh',\n PAA: 'paa',\n PAA_REFUND: 'paa_refund',\n SERP: 'serp',\n REFUND: 'refund',\n TRANSCRIPTION: 'transcription',\n TRANSCRIPTION_HOLD: 'transcription_hold',\n TRANSCRIPTION_REFUND: 'transcription_refund',\n YT_CHANNEL: 'yt_channel',\n FB_AD: 'fb_ad',\n MAPS_SEARCH: 'maps_search',\n MAPS_PLACE: 'maps_place',\n MAPS_REVIEW: 'maps_review',\n MAPS_REVIEW_REFUND: 'maps_review_refund',\n EXTRACT_SITE: 'extract_site',\n EXTRACT_SITE_REFUND: 'extract_site_refund',\n EXTRACT_URL: 'page_scrape',\n URL_MAP: 'url_map',\n EXTRACT_SITE_HOLD: 'extract_site_hold',\n YT_CHANNEL_REFUND: 'yt_channel_refund',\n FB_AD_REFUND: 'fb_ad_refund',\n URL_MAP_REFUND: 'url_map_refund',\n FB_SEARCH: 'fb_search',\n FB_TRANSCRIBE: 'fb_transcribe',\n FB_SEARCH_REFUND: 'fb_search_refund',\n FB_TRANSCRIBE_REFUND: 'fb_transcribe_refund',\n BROWSER_SESSION: 'browser_session',\n} as const\n\nexport type LedgerOperation = typeof LedgerOperation[keyof typeof LedgerOperation]\n","import { CaptchaError, LocationMismatchError, RequestAbortedError } from '../errors.js'\n\nexport type HarvestProblemCode =\n | 'request_aborted'\n | 'captcha_exhausted'\n | 'location_mismatch'\n | 'proxy_tunnel_failed'\n | 'proxy_unavailable'\n | 'harvest_timeout'\n | 'extraction_failed'\n\nexport interface HarvestProblem {\n error_code: HarvestProblemCode\n error_type: string\n message: string\n retryable: boolean\n httpStatus: number\n terminalStatus: 'cancelled' | 'failed'\n}\n\nfunction errorMessage(err: unknown): string {\n return err instanceof Error ? err.message : String(err)\n}\n\nfunction looksLikeTimeout(err: unknown, message: string): boolean {\n if (err instanceof DOMException && (err.name === 'TimeoutError' || err.name === 'AbortError')) return true\n return /timeout|timed out|Timeout \\d+ms exceeded|deadline/i.test(message)\n}\n\nfunction looksLikeCaptcha(message: string): boolean {\n return /captcha|recaptcha|unusual traffic|google\\.com\\/sorry|blocked/i.test(message)\n}\n\nfunction looksLikeProxyTunnelFailure(message: string): boolean {\n return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message)\n}\n\nfunction looksLikeProxyUnavailable(message: string): boolean {\n return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message)\n}\n\nexport function classifyHarvestProblem(err: unknown): HarvestProblem {\n const message = errorMessage(err)\n\n if (err instanceof RequestAbortedError) {\n return {\n error_code: 'request_aborted',\n error_type: 'request_aborted',\n message,\n retryable: true,\n httpStatus: 408,\n terminalStatus: 'cancelled',\n }\n }\n\n if (err instanceof CaptchaError || looksLikeCaptcha(message)) {\n return {\n error_code: 'captcha_exhausted',\n error_type: 'captcha',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (err instanceof LocationMismatchError) {\n return {\n error_code: 'location_mismatch',\n error_type: 'location_mismatch',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyTunnelFailure(message)) {\n return {\n error_code: 'proxy_tunnel_failed',\n error_type: 'proxy_tunnel_failed',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyUnavailable(message)) {\n return {\n error_code: 'proxy_unavailable',\n error_type: 'proxy_unavailable',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeTimeout(err, message)) {\n return {\n error_code: 'harvest_timeout',\n error_type: 'timeout',\n message,\n retryable: true,\n httpStatus: 504,\n terminalStatus: 'failed',\n }\n }\n\n return {\n error_code: 'extraction_failed',\n error_type: 'extraction',\n message,\n retryable: false,\n httpStatus: 500,\n terminalStatus: 'failed',\n }\n}\n\nexport function serializeHarvestProblem(problem: HarvestProblem): string {\n return JSON.stringify({\n error_code: problem.error_code,\n error_type: problem.error_type,\n message: problem.message,\n retryable: problem.retryable,\n })\n}\n\nexport function harvestProblemResponse(problem: HarvestProblem): {\n error: string\n error_code: HarvestProblemCode\n error_type: string\n retryable: boolean\n} {\n return {\n error: problem.message,\n error_code: problem.error_code,\n error_type: problem.error_type,\n retryable: problem.retryable,\n }\n}\n","import type { HarvestAttemptLogEvent } from '../harvest.js'\nimport { finishHarvestAttempt, startHarvestAttempt } from './db.js'\n\nexport function createHarvestAttemptRecorder(jobId: string, userId: number | bigint) {\n return async (event: HarvestAttemptLogEvent): Promise<void> => {\n if (event.type === 'started') {\n await startHarvestAttempt({\n jobId,\n userId,\n attemptNumber: event.attemptNumber,\n maxAttempts: event.maxAttempts,\n query: event.query,\n location: event.location,\n maxQuestions: event.maxQuestions,\n startedAt: event.startedAt,\n })\n return\n }\n\n await finishHarvestAttempt({\n jobId,\n attemptNumber: event.attemptNumber,\n outcome: event.outcome,\n kernelSessionId: event.kernelSessionId,\n questionCount: event.questionCount,\n durationMs: event.durationMs,\n error: event.error,\n willRetry: event.willRetry,\n kernelDeleteStarted: event.cleanup.kernelDeleteStarted,\n kernelDeleteSucceeded: event.cleanup.kernelDeleteSucceeded,\n kernelDeleteError: event.cleanup.kernelDeleteError,\n browserCloseSucceeded: event.cleanup.browserCloseSucceeded,\n browserCloseError: event.cleanup.browserCloseError,\n debug: event.debug,\n completedAt: event.completedAt,\n })\n }\n}\n"],"mappings":";;;;;;;;;;;AAAO,IAAM,WAAW;AAAA,EACtB,MAAmB;AAAA,EACnB,KAAmB;AAAA,EACnB,aAAmB;AAAA,EACnB,SAAiB;AAAA,EACjB,YAAoB;AAAA,EACpB,kBAAmB;AAAA,EACnB,OAAoB;AAAA,EACpB,aAAkB;AAAA,EAClB,YAAiB;AAAA,EACjB,aAAoB;AAAA,EACpB,WAAoB;AAAA,EACpB,eAAoB;AAAA,EACpB,gBAAiB;AACnB;AAIO,IAAM,oBAAoB,SAAS,iBAAiB;AAEpD,SAAS,oBAAoB,UAA0B;AAC5D,SAAO,KAAK,MAAM,WAAW,iBAAiB;AAChD;AAEO,IAAM,8BAA8B;AAEpC,IAAM,gBAAgB;AAEtB,IAAM,sBAOR;AAAA,EACH;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,QAAQ,iBAAiB,iBAAiB;AAAA,IACnE,SAAS,YAAY,SAAS,IAAI;AAAA,IAClC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,OAAO,mBAAmB,WAAW;AAAA,IAC9D,SAAS,YAAY,SAAS,GAAG;AAAA,IACjC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,gBAAgB,eAAe,cAAc,eAAe,YAAY;AAAA,IACjG,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,iBAAiB,WAAW,YAAY,YAAY;AAAA,IAC9D,SAAS,YAAY,SAAS,OAAO;AAAA,IACrC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,kBAAkB,mBAAmB,YAAY;AAAA,IAC9E,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,sBAAsB,sBAAsB,iBAAiB,kBAAkB;AAAA,IACzF,SAAS,YAAY,SAAS,gBAAgB;AAAA,IAC9C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,sBAAsB,eAAe,gBAAgB,QAAQ;AAAA,IAC9F,SAAS,YAAY,SAAS,KAAK;AAAA,IACnC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,sBAAsB,cAAc,cAAc,mBAAmB;AAAA,IAC9F,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,oBAAoB,eAAe,cAAc,aAAa;AAAA,IACxE,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,kBAAkB,gBAAgB,SAAS;AAAA,IACrE,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,aAAa,cAAc;AAAA,IACxD,SAAS,YAAY,SAAS,SAAS;AAAA,IACvC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,iBAAiB,kBAAkB;AAAA,IACpE,SAAS,YAAY,SAAS,aAAa;AAAA,IAC3C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,iBAAiB,iBAAiB,gBAAgB,UAAU,mBAAmB,qBAAqB;AAAA,IAC9H,SAAS,YAAY,SAAS,cAAc;AAAA,IAC5C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AACF;AAEO,IAAM,uBAAuB;AAE7B,IAAM,iBAAiB;AACvB,IAAM,0BAA0B;AAEhC,IAAM,oBAA4C;AAAA,EACvD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,IAAM,sBAA8C;AAAA,EACzD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,SAAS,YAAY,IAAoB;AAC9C,SAAO,KAAK;AACd;AAEO,SAAS,4BAA4B,WAAmB,YAAoB;AACjF,QAAM,WAAW,QAAQ,IAAI,aAAa;AAC1C,QAAM,iBAAiB,YAAY,SAAS;AAC5C,QAAM,kBAAkB,YAAY,UAAU;AAC9C,SAAO;AAAA,IACL,OAAO;AAAA,IACP,YAAY;AAAA,IACZ,SAAS,kCAAkC,cAAc,gCAAgC,eAAe,uBAAuB,QAAQ;AAAA,IACvI,iBAAiB;AAAA,IACjB,kBAAkB;AAAA,IAClB,WAAW;AAAA,EACb;AACF;AAEO,IAAM,kBAAkB;AAAA,EAC7B,OAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,iBAAuB;AAAA,EACvB,KAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,MAAuB;AAAA,EACvB,QAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,OAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,qBAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,SAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,gBAAuB;AAAA,EACvB,WAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,kBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,iBAAuB;AACzB;;;AC3LA,SAAS,aAAa,KAAsB;AAC1C,SAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AACxD;AAEA,SAAS,iBAAiB,KAAc,SAA0B;AAChE,MAAI,eAAe,iBAAiB,IAAI,SAAS,kBAAkB,IAAI,SAAS,cAAe,QAAO;AACtG,SAAO,qDAAqD,KAAK,OAAO;AAC1E;AAEA,SAAS,iBAAiB,SAA0B;AAClD,SAAO,gEAAgE,KAAK,OAAO;AACrF;AAEA,SAAS,4BAA4B,SAA0B;AAC7D,SAAO,gKAAgK,KAAK,OAAO;AACrL;AAEA,SAAS,0BAA0B,SAA0B;AAC3D,SAAO,4GAA4G,KAAK,OAAO;AACjI;AAEO,SAAS,uBAAuB,KAA8B;AACnE,QAAM,UAAU,aAAa,GAAG;AAEhC,MAAI,eAAe,qBAAqB;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,gBAAgB,iBAAiB,OAAO,GAAG;AAC5D,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,uBAAuB;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,4BAA4B,OAAO,GAAG;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,0BAA0B,OAAO,GAAG;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,iBAAiB,KAAK,OAAO,GAAG;AAClC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,YAAY;AAAA,IACZ,YAAY;AAAA,IACZ;AAAA,IACA,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,gBAAgB;AAAA,EAClB;AACF;AAEO,SAAS,wBAAwB,SAAiC;AACvE,SAAO,KAAK,UAAU;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,SAAS,QAAQ;AAAA,IACjB,WAAW,QAAQ;AAAA,EACrB,CAAC;AACH;AAEO,SAAS,uBAAuB,SAKrC;AACA,SAAO;AAAA,IACL,OAAO,QAAQ;AAAA,IACf,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,WAAW,QAAQ;AAAA,EACrB;AACF;;;AC1IO,SAAS,6BAA6B,OAAe,QAAyB;AACnF,SAAO,OAAO,UAAiD;AAC7D,QAAI,MAAM,SAAS,WAAW;AAC5B,YAAM,oBAAoB;AAAA,QACxB;AAAA,QACA;AAAA,QACA,eAAe,MAAM;AAAA,QACrB,aAAa,MAAM;AAAA,QACnB,OAAO,MAAM;AAAA,QACb,UAAU,MAAM;AAAA,QAChB,cAAc,MAAM;AAAA,QACpB,WAAW,MAAM;AAAA,MACnB,CAAC;AACD;AAAA,IACF;AAEA,UAAM,qBAAqB;AAAA,MACzB;AAAA,MACA,eAAe,MAAM;AAAA,MACrB,SAAS,MAAM;AAAA,MACf,iBAAiB,MAAM;AAAA,MACvB,eAAe,MAAM;AAAA,MACrB,YAAY,MAAM;AAAA,MAClB,OAAO,MAAM;AAAA,MACb,WAAW,MAAM;AAAA,MACjB,qBAAqB,MAAM,QAAQ;AAAA,MACnC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,OAAO,MAAM;AAAA,MACb,aAAa,MAAM;AAAA,IACrB,CAAC;AAAA,EACH;AACF;","names":[]}
@@ -25,12 +25,19 @@ var RequestAbortedError = class extends Error {
25
25
  super(message);
26
26
  }
27
27
  };
28
+ var LocationMismatchError = class extends Error {
29
+ name = "LocationMismatchError";
30
+ constructor(message = "Google returned results for a different location than requested") {
31
+ super(message);
32
+ }
33
+ };
28
34
 
29
35
  export {
30
36
  RECAPTCHA_INSTRUCTIONS,
31
37
  sanitizeVendorName,
32
38
  CaptchaError,
33
39
  ExtractionError,
34
- RequestAbortedError
40
+ RequestAbortedError,
41
+ LocationMismatchError
35
42
  };
36
- //# sourceMappingURL=chunk-ZMOWIBMK.js.map
43
+ //# sourceMappingURL=chunk-M2S27J6Z.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/errors.ts"],"sourcesContent":["export const RECAPTCHA_INSTRUCTIONS = 'Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.'\n\nexport function sanitizeVendorName(message: string): string {\n return message\n .replace(/kernel\\.sh\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\.sh\\s+session/gi, 'this session')\n .replace(/kernel\\.sh/gi, 'the service')\n .replace(/kernel\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\s+session/gi, 'this session')\n .replace(/\\bkernel\\b/gi, 'the service')\n .replace(/ +/g, ' ')\n .trim()\n}\n\nexport class CaptchaError extends Error {\n readonly name = 'CaptchaError'\n constructor(public readonly instructions: string) {\n super(`CAPTCHA detected. ${instructions}`)\n }\n}\n\nexport class ExtractionError extends Error {\n readonly name = 'ExtractionError'\n constructor(message: string, public readonly cause?: unknown) {\n super(message)\n }\n}\n\nexport class RequestAbortedError extends Error {\n readonly name = 'RequestAbortedError'\n constructor(message = 'Request aborted before harvest completed') {\n super(message)\n }\n}\n"],"mappings":";AAAO,IAAM,yBAAyB;AAE/B,SAAS,mBAAmB,SAAyB;AAC1D,SAAO,QACJ,QAAQ,4BAA4B,UAAU,EAC9C,QAAQ,0BAA0B,cAAc,EAChD,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,wBAAwB,UAAU,EAC1C,QAAQ,sBAAsB,cAAc,EAC5C,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAEO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAEtC,YAA4B,cAAsB;AAChD,UAAM,qBAAqB,YAAY,EAAE;AADf;AAAA,EAE5B;AAAA,EAF4B;AAAA,EADnB,OAAO;AAIlB;AAEO,IAAM,kBAAN,cAA8B,MAAM;AAAA,EAEzC,YAAY,SAAiC,OAAiB;AAC5D,UAAM,OAAO;AAD8B;AAAA,EAE7C;AAAA,EAF6C;AAAA,EADpC,OAAO;AAIlB;AAEO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EACpC,OAAO;AAAA,EAChB,YAAY,UAAU,4CAA4C;AAChE,UAAM,OAAO;AAAA,EACf;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/errors.ts"],"sourcesContent":["export const RECAPTCHA_INSTRUCTIONS = 'Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.'\n\nexport function sanitizeVendorName(message: string): string {\n return message\n .replace(/kernel\\.sh\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\.sh\\s+session/gi, 'this session')\n .replace(/kernel\\.sh/gi, 'the service')\n .replace(/kernel\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\s+session/gi, 'this session')\n .replace(/\\bkernel\\b/gi, 'the service')\n .replace(/ +/g, ' ')\n .trim()\n}\n\nexport class CaptchaError extends Error {\n readonly name = 'CaptchaError'\n constructor(public readonly instructions: string) {\n super(`CAPTCHA detected. ${instructions}`)\n }\n}\n\nexport class ExtractionError extends Error {\n readonly name = 'ExtractionError'\n constructor(message: string, public readonly cause?: unknown) {\n super(message)\n }\n}\n\nexport class RequestAbortedError extends Error {\n readonly name = 'RequestAbortedError'\n constructor(message = 'Request aborted before harvest completed') {\n super(message)\n }\n}\n\nexport class LocationMismatchError extends Error {\n readonly name = 'LocationMismatchError'\n constructor(message = 'Google returned results for a different location than requested') {\n super(message)\n }\n}\n"],"mappings":";AAAO,IAAM,yBAAyB;AAE/B,SAAS,mBAAmB,SAAyB;AAC1D,SAAO,QACJ,QAAQ,4BAA4B,UAAU,EAC9C,QAAQ,0BAA0B,cAAc,EAChD,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,wBAAwB,UAAU,EAC1C,QAAQ,sBAAsB,cAAc,EAC5C,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAEO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAEtC,YAA4B,cAAsB;AAChD,UAAM,qBAAqB,YAAY,EAAE;AADf;AAAA,EAE5B;AAAA,EAF4B;AAAA,EADnB,OAAO;AAIlB;AAEO,IAAM,kBAAN,cAA8B,MAAM;AAAA,EAEzC,YAAY,SAAiC,OAAiB;AAC5D,UAAM,OAAO;AAD8B;AAAA,EAE7C;AAAA,EAF6C;AAAA,EADpC,OAAO;AAIlB;AAEO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EACpC,OAAO;AAAA,EAChB,YAAY,UAAU,4CAA4C;AAChE,UAAM,OAAO;AAAA,EACf;AACF;AAEO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC,OAAO;AAAA,EAChB,YAAY,UAAU,mEAAmE;AACvF,UAAM,OAAO;AAAA,EACf;AACF;","names":[]}
@@ -1,10 +1,11 @@
1
1
  import {
2
2
  CaptchaError,
3
3
  ExtractionError,
4
+ LocationMismatchError,
4
5
  RECAPTCHA_INSTRUCTIONS,
5
6
  RequestAbortedError,
6
7
  sanitizeVendorName
7
- } from "./chunk-ZMOWIBMK.js";
8
+ } from "./chunk-M2S27J6Z.js";
8
9
 
9
10
  // src/lib/browser-service-env.ts
10
11
  function browserServiceApiKey() {
@@ -57,8 +58,12 @@ var MapsSearchOptionsSchema = z.object({
57
58
  gl: z.string().length(2).default("us"),
58
59
  hl: z.string().length(2).default("en"),
59
60
  maxResults: z.number().int().min(1).max(50).default(10),
61
+ proxyMode: z.enum(["location", "configured", "none"]).default("location"),
62
+ proxyZip: z.string().regex(/^\d{5}$/).optional(),
63
+ debug: z.boolean().default(false),
60
64
  kernelApiKey: z.string().optional(),
61
65
  kernelProxyId: z.string().optional(),
66
+ kernelProxyResolution: z.unknown().optional(),
62
67
  headless: z.boolean().default(true)
63
68
  });
64
69
  var RawPAAItemSchema = z.object({
@@ -2232,16 +2237,18 @@ var US_CITY_CENTER_ZIPS = {
2232
2237
  function proxyIdSuffix2(proxyId) {
2233
2238
  return proxyId ? proxyId.slice(-6) : null;
2234
2239
  }
2235
- function resolution(source, proxyMode, proxyId, target, error) {
2240
+ function resolution(source, proxyMode, proxyId, target, error, disposable = false) {
2236
2241
  return {
2237
2242
  kernelProxyId: proxyId,
2243
+ ...disposable && proxyId ? { disposableProxyId: proxyId } : {},
2238
2244
  resolution: {
2239
2245
  source,
2240
2246
  proxyMode,
2241
2247
  proxyIdPresent: Boolean(proxyId),
2242
2248
  proxyIdSuffix: proxyIdSuffix2(proxyId),
2243
2249
  target,
2244
- error
2250
+ error,
2251
+ disposable
2245
2252
  }
2246
2253
  };
2247
2254
  }
@@ -2271,6 +2278,10 @@ function kernelCityIdentifierCandidates(city) {
2271
2278
  function proxyName(country, state, city) {
2272
2279
  return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
2273
2280
  }
2281
+ function freshProxyName(baseName, attemptIndex) {
2282
+ const stamp = `${Date.now()}-${attemptIndex ?? 0}-${Math.random().toString(36).slice(2, 8)}`;
2283
+ return `${baseName}-fresh-${stamp}`;
2284
+ }
2274
2285
  function zipProxyName(zip) {
2275
2286
  return `mcp-serp-residential-us-zip-${zip}`;
2276
2287
  }
@@ -2340,6 +2351,12 @@ function zipTarget(target, zip) {
2340
2351
  }
2341
2352
  };
2342
2353
  }
2354
+ function withProxyName(target, name) {
2355
+ return {
2356
+ ...target,
2357
+ proxyName: name
2358
+ };
2359
+ }
2343
2360
  function configMatches(config, target, city) {
2344
2361
  if (target.level === "zip") {
2345
2362
  return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
@@ -2378,6 +2395,55 @@ function escalatedTargetLevel(target, attemptIndex) {
2378
2395
  function errorText2(err) {
2379
2396
  return err instanceof Error ? err.message : String(err);
2380
2397
  }
2398
+ function freshTargetCandidates(target, explicitZip, attemptIndex) {
2399
+ const out = [];
2400
+ const zip = knownZipFor(target, explicitZip);
2401
+ if (zip) {
2402
+ const targetZip = zipTarget(target, zip);
2403
+ out.push(withProxyName(targetZip, freshProxyName(targetZip.proxyName, attemptIndex)));
2404
+ }
2405
+ for (const city of target.cityCandidates) {
2406
+ const cityTarget = {
2407
+ ...target,
2408
+ level: "city",
2409
+ city,
2410
+ proxyName: proxyName(target.country, target.state, city),
2411
+ config: {
2412
+ country: target.country,
2413
+ state: target.state,
2414
+ city
2415
+ }
2416
+ };
2417
+ out.push(withProxyName(cityTarget, freshProxyName(cityTarget.proxyName, attemptIndex)));
2418
+ }
2419
+ const fallbackTarget = stateTarget(target);
2420
+ out.push(withProxyName(fallbackTarget, freshProxyName(fallbackTarget.proxyName, attemptIndex)));
2421
+ return out;
2422
+ }
2423
+ async function createFreshLocationProxy(kernel, options, target) {
2424
+ const createErrors = [];
2425
+ for (const candidate of freshTargetCandidates(target, options.proxyZip, options.attemptIndex)) {
2426
+ try {
2427
+ const created = await kernel.proxies.create({
2428
+ type: "residential",
2429
+ name: candidate.proxyName,
2430
+ config: candidate.level === "zip" ? { country: candidate.country, zip: candidate.zip } : candidate.config
2431
+ });
2432
+ if (created.id) {
2433
+ return resolution("location_created", options.proxyMode, created.id, candidate, null, true);
2434
+ }
2435
+ createErrors.push(`${candidate.proxyName}: Kernel did not return a proxy id`);
2436
+ } catch (err) {
2437
+ createErrors.push(`${candidate.proxyName}: ${errorText2(err)}`);
2438
+ }
2439
+ }
2440
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
2441
+ }
2442
+ async function deleteKernelProxyId(kernelApiKey, proxyId) {
2443
+ if (!kernelApiKey || !proxyId) return;
2444
+ const kernel = new Kernel2({ apiKey: kernelApiKey });
2445
+ await kernel.proxies.delete(proxyId);
2446
+ }
2381
2447
  async function resolveKernelProxyId(options) {
2382
2448
  if (options.proxyMode === "none") {
2383
2449
  return resolution("disabled", options.proxyMode, void 0, null, null);
@@ -2392,6 +2458,9 @@ async function resolveKernelProxyId(options) {
2392
2458
  const kernel = new Kernel2({ apiKey: options.kernelApiKey });
2393
2459
  try {
2394
2460
  const attemptIndex = options.attemptIndex ?? 0;
2461
+ if (options.fresh) {
2462
+ return await createFreshLocationProxy(kernel, options, target);
2463
+ }
2395
2464
  if (attemptIndex >= 1) {
2396
2465
  const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
2397
2466
  const createErrors2 = [];
@@ -2495,6 +2564,7 @@ async function resolveKernelProxyId(options) {
2495
2564
 
2496
2565
  // src/harvest.ts
2497
2566
  var MAX_ATTEMPTS = 3;
2567
+ var LOCATION_PROXY_MAX_ATTEMPTS = 5;
2498
2568
  function abortReason(signal) {
2499
2569
  if (signal.reason instanceof DOMException && signal.reason.name === "TimeoutError") return signal.reason;
2500
2570
  return new RequestAbortedError();
@@ -2524,9 +2594,12 @@ async function emitAttemptEvent(sink, event) {
2524
2594
  }
2525
2595
  function classifyAttemptError(err) {
2526
2596
  if (err instanceof CaptchaError) return "captcha";
2597
+ if (err instanceof LocationMismatchError) return "location_mismatch";
2527
2598
  if (err instanceof RequestAbortedError) return "request_aborted";
2528
2599
  if (err instanceof DOMException && (err.name === "TimeoutError" || err.name === "AbortError")) return "timeout";
2529
2600
  const message = err instanceof Error ? err.message : String(err);
2601
+ if (looksLikeProxyTunnelFailure(message)) return "proxy_tunnel_failed";
2602
+ if (looksLikeProxyUnavailable(message)) return "proxy_unavailable";
2530
2603
  return /timeout|timed out|Timeout \d+ms exceeded|deadline/i.test(message) ? "timeout" : "error";
2531
2604
  }
2532
2605
  function classifyAttemptResult(result) {
@@ -2535,6 +2608,49 @@ function classifyAttemptResult(result) {
2535
2608
  function errorMessage(err) {
2536
2609
  return err instanceof Error ? err.message : String(err);
2537
2610
  }
2611
+ function maxAttemptsForProxyMode(proxyMode) {
2612
+ return proxyMode === "location" ? LOCATION_PROXY_MAX_ATTEMPTS : MAX_ATTEMPTS;
2613
+ }
2614
+ function looksLikeProxyTunnelFailure(message) {
2615
+ return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
2616
+ }
2617
+ function looksLikeProxyUnavailable(message) {
2618
+ return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
2619
+ }
2620
+ function retryableLocationProxyError(outcome) {
2621
+ return outcome === "captcha" || outcome === "proxy_tunnel_failed" || outcome === "proxy_unavailable";
2622
+ }
2623
+ function locationMismatchMessage(result) {
2624
+ const evidence = result.diagnostics.debug?.locationEvidence;
2625
+ const expected = evidence?.expected?.canonicalLocation ?? result.location ?? "requested location";
2626
+ const candidates = evidence?.candidates.slice(0, 3).map((candidate) => `${candidate.city}, ${candidate.regionCode}`).join("; ");
2627
+ return candidates ? `Google returned results for ${candidates}, not ${expected}` : `Google returned results for a different location than ${expected}`;
2628
+ }
2629
+ function shouldRetryLocationMismatch(result, proxyMode) {
2630
+ return proxyMode === "location" && result.diagnostics.debug?.locationEvidence?.status === "mismatch";
2631
+ }
2632
+ function stripInternalDebug(result, keepDebug) {
2633
+ if (keepDebug || !result.diagnostics.debug) return result;
2634
+ const diagnostics = { ...result.diagnostics };
2635
+ delete diagnostics.debug;
2636
+ return { ...result, diagnostics };
2637
+ }
2638
+ async function cleanupDisposableProxy(kernelApiKey, proxyId) {
2639
+ if (!kernelApiKey || !proxyId) return;
2640
+ try {
2641
+ await deleteKernelProxyId(kernelApiKey, proxyId);
2642
+ console.info(JSON.stringify({
2643
+ event: "kernel_proxy_deleted",
2644
+ proxy_id_suffix: proxyId.slice(-6)
2645
+ }));
2646
+ } catch (err) {
2647
+ console.warn(JSON.stringify({
2648
+ event: "kernel_proxy_delete_failed",
2649
+ proxy_id_suffix: proxyId.slice(-6),
2650
+ message: errorMessage(err)
2651
+ }));
2652
+ }
2653
+ }
2538
2654
  async function extractOnce(options, signal) {
2539
2655
  const driver = new BrowserDriver();
2540
2656
  const reporter = new ProgressReporter();
@@ -2602,26 +2718,35 @@ async function harvest(rawOptions) {
2602
2718
  proxyZip: typeof raw.proxyZip === "string" ? raw.proxyZip : void 0,
2603
2719
  gl: typeof raw.gl === "string" ? raw.gl : "us"
2604
2720
  };
2721
+ const requestedDebug = typeof raw.debug === "boolean" ? raw.debug : false;
2722
+ const needsLocationEvidence = proxyMode === "location" && Boolean(proxyOpts.location);
2723
+ const maxAttempts = maxAttemptsForProxyMode(proxyMode);
2605
2724
  const serializer = new OutputSerializer();
2606
- for (let i = 0; i < MAX_ATTEMPTS; i++) {
2725
+ let lastError = null;
2726
+ for (let i = 0; i < maxAttempts; i++) {
2607
2727
  const attemptNumber = i + 1;
2608
2728
  const startedAtMs = Date.now();
2609
2729
  try {
2610
2730
  if (signal?.aborted) throw abortReason(signal);
2611
- const resolution2 = await resolveKernelProxyId({ ...proxyOpts, attemptIndex: i });
2731
+ const resolution2 = await resolveKernelProxyId({
2732
+ ...proxyOpts,
2733
+ attemptIndex: i,
2734
+ fresh: proxyMode === "location"
2735
+ });
2612
2736
  const mergedAttempt = {
2613
2737
  ...raw,
2614
2738
  kernelApiKey,
2615
2739
  kernelProxyId: resolution2.kernelProxyId,
2616
2740
  kernelProxyResolution: resolution2.resolution,
2617
- proxyMode
2741
+ proxyMode,
2742
+ debug: requestedDebug || needsLocationEvidence
2618
2743
  };
2619
2744
  if (proxyMode === "none") mergedAttempt.kernelProxyId = void 0;
2620
2745
  const attemptOptions = HarvestOptionsSchema.parse(mergedAttempt);
2621
2746
  await emitAttemptEvent(onAttemptEvent, {
2622
2747
  type: "started",
2623
2748
  attemptNumber,
2624
- maxAttempts: MAX_ATTEMPTS,
2749
+ maxAttempts,
2625
2750
  query: attemptOptions.query,
2626
2751
  location: attemptOptions.location ?? null,
2627
2752
  maxQuestions: attemptOptions.maxQuestions,
@@ -2630,7 +2755,7 @@ async function harvest(rawOptions) {
2630
2755
  console.info(JSON.stringify({
2631
2756
  event: "harvest_attempt_started",
2632
2757
  attempt_number: attemptNumber,
2633
- max_attempts: MAX_ATTEMPTS,
2758
+ max_attempts: maxAttempts,
2634
2759
  query: attemptOptions.query,
2635
2760
  location: attemptOptions.location ?? null,
2636
2761
  max_questions: attemptOptions.maxQuestions
@@ -2638,57 +2763,84 @@ async function harvest(rawOptions) {
2638
2763
  const attempt = await extractOnce(attemptOptions, signal);
2639
2764
  if (attempt.error) {
2640
2765
  const err = attempt.error;
2641
- if (err instanceof CaptchaError) {
2642
- const willRetry = i < MAX_ATTEMPTS - 1;
2766
+ const outcome = classifyAttemptError(err);
2767
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2768
+ if (outcome === "captcha") {
2643
2769
  console.warn(JSON.stringify({
2644
2770
  event: "harvest_attempt_captcha",
2645
2771
  attempt_number: attemptNumber,
2646
- max_attempts: MAX_ATTEMPTS,
2647
- message: err.message,
2772
+ max_attempts: maxAttempts,
2773
+ message: errorMessage(err),
2774
+ will_retry: willRetry
2775
+ }));
2776
+ } else if (willRetry) {
2777
+ console.warn(JSON.stringify({
2778
+ event: "harvest_attempt_proxy_retry",
2779
+ attempt_number: attemptNumber,
2780
+ max_attempts: maxAttempts,
2781
+ outcome,
2782
+ message: errorMessage(err),
2648
2783
  will_retry: willRetry
2649
2784
  }));
2650
- await emitAttemptEvent(onAttemptEvent, {
2651
- type: "finished",
2652
- attemptNumber,
2653
- maxAttempts: MAX_ATTEMPTS,
2654
- outcome: "captcha",
2655
- kernelSessionId: attempt.cleanup.kernelSessionId,
2656
- questionCount: 0,
2657
- durationMs: Date.now() - startedAtMs,
2658
- error: err.message,
2659
- willRetry,
2660
- cleanup: attempt.cleanup,
2661
- debug: attempt.debug,
2662
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2663
- });
2664
- if (willRetry) continue;
2665
- break;
2666
2785
  }
2667
2786
  await emitAttemptEvent(onAttemptEvent, {
2668
2787
  type: "finished",
2669
2788
  attemptNumber,
2670
- maxAttempts: MAX_ATTEMPTS,
2671
- outcome: classifyAttemptError(err),
2789
+ maxAttempts,
2790
+ outcome,
2672
2791
  kernelSessionId: attempt.cleanup.kernelSessionId,
2673
2792
  questionCount: 0,
2674
2793
  durationMs: Date.now() - startedAtMs,
2675
2794
  error: errorMessage(err),
2676
- willRetry: false,
2795
+ willRetry,
2677
2796
  cleanup: attempt.cleanup,
2678
2797
  debug: attempt.debug,
2679
2798
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2680
2799
  });
2681
- throw err;
2800
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2801
+ lastError = err;
2802
+ if (willRetry) continue;
2803
+ break;
2682
2804
  }
2683
2805
  const result = attempt.result;
2684
2806
  if (!result) throw new Error("Harvest attempt completed without a result");
2807
+ if (shouldRetryLocationMismatch(result, proxyMode)) {
2808
+ const err = new LocationMismatchError(locationMismatchMessage(result));
2809
+ const willRetry = i < maxAttempts - 1;
2810
+ console.warn(JSON.stringify({
2811
+ event: "harvest_attempt_location_mismatch",
2812
+ attempt_number: attemptNumber,
2813
+ max_attempts: maxAttempts,
2814
+ message: err.message,
2815
+ will_retry: willRetry
2816
+ }));
2817
+ await emitAttemptEvent(onAttemptEvent, {
2818
+ type: "finished",
2819
+ attemptNumber,
2820
+ maxAttempts,
2821
+ outcome: "location_mismatch",
2822
+ kernelSessionId: attempt.cleanup.kernelSessionId,
2823
+ questionCount: result.totalQuestions,
2824
+ durationMs: Date.now() - startedAtMs,
2825
+ error: err.message,
2826
+ willRetry,
2827
+ cleanup: attempt.cleanup,
2828
+ debug: attempt.debug,
2829
+ completedAt: (/* @__PURE__ */ new Date()).toISOString()
2830
+ });
2831
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2832
+ lastError = err;
2833
+ if (willRetry) continue;
2834
+ break;
2835
+ }
2836
+ const finalResult = stripInternalDebug(result, requestedDebug);
2685
2837
  await emitAttemptEvent(onAttemptEvent, {
2686
2838
  type: "finished",
2687
2839
  attemptNumber,
2688
- maxAttempts: MAX_ATTEMPTS,
2689
- outcome: classifyAttemptResult(result),
2840
+ maxAttempts,
2841
+ outcome: classifyAttemptResult(finalResult),
2690
2842
  kernelSessionId: attempt.cleanup.kernelSessionId,
2691
- questionCount: result.totalQuestions,
2843
+ questionCount: finalResult.totalQuestions,
2692
2844
  durationMs: Date.now() - startedAtMs,
2693
2845
  error: null,
2694
2846
  willRetry: false,
@@ -2696,64 +2848,52 @@ async function harvest(rawOptions) {
2696
2848
  debug: attempt.debug,
2697
2849
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2698
2850
  });
2851
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2699
2852
  if (attemptOptions.format === "json" || attemptOptions.format === "both") {
2700
- await serializer.writeJSON(result, attemptOptions.outputDir);
2853
+ await serializer.writeJSON(finalResult, attemptOptions.outputDir);
2701
2854
  }
2702
2855
  if (attemptOptions.format === "csv" || attemptOptions.format === "both") {
2703
2856
  await Promise.all([
2704
- serializer.writeCSV(result.flat, attemptOptions.outputDir),
2705
- result.videos.length > 0 ? serializer.writeVideoCSV(result.videos, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2706
- result.forums.length > 0 ? serializer.writeForumCSV(result.forums, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2707
- result.aiOverview.detected ? serializer.writeAIOverviewCSV(result.aiOverview.citations, result.aiOverview.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2708
- result.aiMode.detected ? serializer.writeAIModeCSV(result.aiMode.citations, result.aiMode.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2709
- result.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(result.whatPeopleSaying, result.seed, attemptOptions.outputDir) : Promise.resolve("")
2857
+ serializer.writeCSV(finalResult.flat, attemptOptions.outputDir),
2858
+ finalResult.videos.length > 0 ? serializer.writeVideoCSV(finalResult.videos, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2859
+ finalResult.forums.length > 0 ? serializer.writeForumCSV(finalResult.forums, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2860
+ finalResult.aiOverview.detected ? serializer.writeAIOverviewCSV(finalResult.aiOverview.citations, finalResult.aiOverview.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2861
+ finalResult.aiMode.detected ? serializer.writeAIModeCSV(finalResult.aiMode.citations, finalResult.aiMode.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2862
+ finalResult.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(finalResult.whatPeopleSaying, finalResult.seed, attemptOptions.outputDir) : Promise.resolve("")
2710
2863
  ]);
2711
2864
  }
2712
- return result;
2865
+ return finalResult;
2713
2866
  } catch (err) {
2714
- if (err instanceof CaptchaError) {
2715
- const willRetry = i < MAX_ATTEMPTS - 1;
2867
+ const outcome = classifyAttemptError(err);
2868
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2869
+ if (outcome === "captcha") {
2716
2870
  console.warn(JSON.stringify({
2717
2871
  event: "harvest_attempt_captcha",
2718
2872
  attempt_number: attemptNumber,
2719
- max_attempts: MAX_ATTEMPTS,
2720
- message: err.message,
2873
+ max_attempts: maxAttempts,
2874
+ message: errorMessage(err),
2875
+ will_retry: willRetry
2876
+ }));
2877
+ } else if (willRetry) {
2878
+ console.warn(JSON.stringify({
2879
+ event: "harvest_attempt_proxy_retry",
2880
+ attempt_number: attemptNumber,
2881
+ max_attempts: maxAttempts,
2882
+ outcome,
2883
+ message: errorMessage(err),
2721
2884
  will_retry: willRetry
2722
2885
  }));
2723
- await emitAttemptEvent(onAttemptEvent, {
2724
- type: "finished",
2725
- attemptNumber,
2726
- maxAttempts: MAX_ATTEMPTS,
2727
- outcome: "captcha",
2728
- kernelSessionId: null,
2729
- questionCount: 0,
2730
- durationMs: Date.now() - startedAtMs,
2731
- error: err.message,
2732
- willRetry,
2733
- cleanup: {
2734
- kernelSessionId: null,
2735
- kernelDeleteStarted: false,
2736
- kernelDeleteSucceeded: null,
2737
- kernelDeleteError: null,
2738
- browserCloseSucceeded: null,
2739
- browserCloseError: null
2740
- },
2741
- debug: null,
2742
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2743
- });
2744
- if (willRetry) continue;
2745
- break;
2746
2886
  }
2747
2887
  await emitAttemptEvent(onAttemptEvent, {
2748
2888
  type: "finished",
2749
2889
  attemptNumber,
2750
- maxAttempts: MAX_ATTEMPTS,
2751
- outcome: classifyAttemptError(err),
2890
+ maxAttempts,
2891
+ outcome,
2752
2892
  kernelSessionId: null,
2753
2893
  questionCount: 0,
2754
2894
  durationMs: Date.now() - startedAtMs,
2755
2895
  error: errorMessage(err),
2756
- willRetry: false,
2896
+ willRetry,
2757
2897
  cleanup: {
2758
2898
  kernelSessionId: null,
2759
2899
  kernelDeleteStarted: false,
@@ -2765,15 +2905,19 @@ async function harvest(rawOptions) {
2765
2905
  debug: null,
2766
2906
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2767
2907
  });
2908
+ lastError = err;
2909
+ if (willRetry) continue;
2910
+ if (outcome === "captcha") break;
2768
2911
  throw err;
2769
2912
  }
2770
2913
  }
2914
+ if (lastError && !(lastError instanceof CaptchaError)) throw lastError;
2771
2915
  console.warn(JSON.stringify({
2772
2916
  event: "harvest_captcha_exhausted",
2773
- max_attempts: MAX_ATTEMPTS,
2917
+ max_attempts: maxAttempts,
2774
2918
  session_kind: kernelApiKey ? "kernel" : "local"
2775
2919
  }));
2776
- throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${MAX_ATTEMPTS} fresh sessions. Try again in a few minutes.`));
2920
+ throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${maxAttempts} fresh sessions. Try again in a few minutes.`));
2777
2921
  }
2778
2922
 
2779
2923
  export {
@@ -2788,7 +2932,8 @@ export {
2788
2932
  MapsSelectors,
2789
2933
  buildYouTubeChannelVideosUrl,
2790
2934
  BrowserDriver,
2935
+ deleteKernelProxyId,
2791
2936
  resolveKernelProxyId,
2792
2937
  harvest
2793
2938
  };
2794
- //# sourceMappingURL=chunk-TM22BLWP.js.map
2939
+ //# sourceMappingURL=chunk-MY3S7EX7.js.map