mcp-scraper 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -8
- package/dist/bin/api-server.cjs +4691 -3614
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/browser-agent-stdio-server.cjs +85 -8
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
- package/dist/bin/browser-agent-stdio-server.js +83 -6
- package/dist/bin/browser-agent-stdio-server.js.map +1 -1
- package/dist/bin/mcp-stdio-server.cjs +170 -12
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -3
- package/dist/bin/paa-harvest.cjs +223 -74
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -2
- package/dist/{chunk-GXBT5CDU.js → chunk-IQOCZGJJ.js} +39 -2
- package/dist/chunk-IQOCZGJJ.js.map +1 -0
- package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
- package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
- package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
- package/dist/chunk-MY3S7EX7.js.map +1 -0
- package/dist/{chunk-BMVQB3WN.js → chunk-OR7DLLH2.js} +173 -14
- package/dist/chunk-OR7DLLH2.js.map +1 -0
- package/dist/chunk-XR65SANX.js +7 -0
- package/dist/chunk-XR65SANX.js.map +1 -0
- package/dist/index.cjs +223 -74
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -2
- package/dist/{server-ASCMKUQ5.js → server-CJMX2QUM.js} +880 -181
- package/dist/server-CJMX2QUM.js.map +1 -0
- package/dist/{worker-KJ4A7WIR.js → worker-NAKGTIF5.js} +4 -4
- package/package.json +1 -1
- package/dist/chunk-2BS7BUEE.js +0 -7
- package/dist/chunk-2BS7BUEE.js.map +0 -1
- package/dist/chunk-BMVQB3WN.js.map +0 -1
- package/dist/chunk-GXBT5CDU.js.map +0 -1
- package/dist/chunk-TM22BLWP.js.map +0 -1
- package/dist/server-ASCMKUQ5.js.map +0 -1
- /package/dist/{worker-KJ4A7WIR.js.map → worker-NAKGTIF5.js.map} +0 -0
package/dist/bin/paa-harvest.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import {
|
|
2
2
|
CaptchaError,
|
|
3
|
+
LocationMismatchError,
|
|
3
4
|
RequestAbortedError
|
|
4
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-M2S27J6Z.js";
|
|
5
6
|
import {
|
|
6
7
|
finishHarvestAttempt,
|
|
7
8
|
startHarvestAttempt
|
|
@@ -207,6 +208,12 @@ function looksLikeTimeout(err, message) {
|
|
|
207
208
|
function looksLikeCaptcha(message) {
|
|
208
209
|
return /captcha|recaptcha|unusual traffic|google\.com\/sorry|blocked/i.test(message);
|
|
209
210
|
}
|
|
211
|
+
function looksLikeProxyTunnelFailure(message) {
|
|
212
|
+
return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
|
|
213
|
+
}
|
|
214
|
+
function looksLikeProxyUnavailable(message) {
|
|
215
|
+
return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
|
|
216
|
+
}
|
|
210
217
|
function classifyHarvestProblem(err) {
|
|
211
218
|
const message = errorMessage(err);
|
|
212
219
|
if (err instanceof RequestAbortedError) {
|
|
@@ -229,6 +236,36 @@ function classifyHarvestProblem(err) {
|
|
|
229
236
|
terminalStatus: "failed"
|
|
230
237
|
};
|
|
231
238
|
}
|
|
239
|
+
if (err instanceof LocationMismatchError) {
|
|
240
|
+
return {
|
|
241
|
+
error_code: "location_mismatch",
|
|
242
|
+
error_type: "location_mismatch",
|
|
243
|
+
message,
|
|
244
|
+
retryable: true,
|
|
245
|
+
httpStatus: 503,
|
|
246
|
+
terminalStatus: "failed"
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
if (looksLikeProxyTunnelFailure(message)) {
|
|
250
|
+
return {
|
|
251
|
+
error_code: "proxy_tunnel_failed",
|
|
252
|
+
error_type: "proxy_tunnel_failed",
|
|
253
|
+
message,
|
|
254
|
+
retryable: true,
|
|
255
|
+
httpStatus: 503,
|
|
256
|
+
terminalStatus: "failed"
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
if (looksLikeProxyUnavailable(message)) {
|
|
260
|
+
return {
|
|
261
|
+
error_code: "proxy_unavailable",
|
|
262
|
+
error_type: "proxy_unavailable",
|
|
263
|
+
message,
|
|
264
|
+
retryable: true,
|
|
265
|
+
httpStatus: 503,
|
|
266
|
+
terminalStatus: "failed"
|
|
267
|
+
};
|
|
268
|
+
}
|
|
232
269
|
if (looksLikeTimeout(err, message)) {
|
|
233
270
|
return {
|
|
234
271
|
error_code: "harvest_timeout",
|
|
@@ -319,4 +356,4 @@ export {
|
|
|
319
356
|
harvestProblemResponse,
|
|
320
357
|
createHarvestAttemptRecorder
|
|
321
358
|
};
|
|
322
|
-
//# sourceMappingURL=chunk-
|
|
359
|
+
//# sourceMappingURL=chunk-IQOCZGJJ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/api/rates.ts","../src/api/harvest-problems.ts","../src/api/harvest-attempt-events.ts"],"sourcesContent":["export const MC_COSTS = {\n serp: 100,\n paa: 100,\n page_scrape: 100,\n url_map: 2_000,\n yt_channel: 50,\n yt_transcription: 200,\n fb_ad: 50,\n maps_search: 2_000,\n maps_place: 2_000,\n maps_review: 50,\n fb_search: 50,\n fb_transcribe: 50,\n browser_minute: 4_000,\n} as const\n\nexport type McCostKey = keyof typeof MC_COSTS\n\nexport const MC_PER_BROWSER_MS = MC_COSTS.browser_minute / 60_000\n\nexport function browserActiveCostMc(activeMs: number): number {\n return Math.round(activeMs * MC_PER_BROWSER_MS)\n}\n\nexport const BROWSER_OPEN_MIN_BALANCE_MC = 1_000\n\nexport const MC_PER_CREDIT = 1_000\n\nexport const CREDIT_COST_CATALOG: Array<{\n key: McCostKey\n label: string\n aliases: string[]\n credits: number\n unit: string\n notes?: string\n}> = [\n {\n key: 'serp',\n label: 'SERP search',\n aliases: ['search_serp', 'serp', 'google search', 'organic results'],\n credits: mcToCredits(MC_COSTS.serp),\n unit: 'per search',\n notes: 'Returns AI Overview, PAA snippet, videos, forums, and local pack.',\n },\n {\n key: 'paa',\n label: 'PAA harvest',\n aliases: ['harvest_paa', 'paa', 'people also ask', 'questions'],\n credits: mcToCredits(MC_COSTS.paa),\n unit: 'per extracted question',\n notes: 'Includes full SERP feature extraction. Billed on actual questions returned — no cap enforced.',\n },\n {\n key: 'page_scrape',\n label: 'Page crawl / extract',\n aliases: ['extract_url', 'extract_site', 'page scrape', 'url scrape', 'single page', 'site crawl'],\n credits: mcToCredits(MC_COSTS.page_scrape),\n unit: 'per page',\n notes: 'Applies to both single-URL extraction and per-page site crawls.',\n },\n {\n key: 'url_map',\n label: 'Site URL mapping',\n aliases: ['map_site_urls', 'url map', 'site map', 'crawl urls'],\n credits: mcToCredits(MC_COSTS.url_map),\n unit: 'per mapping operation',\n notes: 'Flat rate for the full /map-urls call regardless of URL count discovered.',\n },\n {\n key: 'yt_channel',\n label: 'YouTube search / channel harvest',\n aliases: ['youtube_harvest', 'youtube search', 'youtube channel', 'yt_channel'],\n credits: mcToCredits(MC_COSTS.yt_channel),\n unit: 'per call',\n },\n {\n key: 'yt_transcription',\n label: 'YouTube transcription',\n aliases: ['youtube_transcribe', 'youtube transcript', 'transcription', 'yt_transcription'],\n credits: mcToCredits(MC_COSTS.yt_transcription),\n unit: 'per minute',\n notes: 'A 5-minute hold is taken, then reconciled to actual video duration.',\n },\n {\n key: 'fb_ad',\n label: 'Facebook search / ad lookup',\n aliases: ['facebook_page_intel', 'facebook_ad_search', 'facebook_ad', 'facebook ads', 'fb ads'],\n credits: mcToCredits(MC_COSTS.fb_ad),\n unit: 'per call',\n },\n {\n key: 'maps_search',\n label: 'Maps business search',\n aliases: ['maps_search', 'google maps search', 'gmb search', 'gbp search', 'business profiles'],\n credits: mcToCredits(MC_COSTS.maps_search),\n unit: 'per search',\n notes: 'Returns up to 50 Google Maps business/profile candidates. Use maps_place_intel to hydrate selected businesses.',\n },\n {\n key: 'maps_place',\n label: 'Maps business lookup',\n aliases: ['maps_place_intel', 'google maps', 'maps place', 'place intel'],\n credits: mcToCredits(MC_COSTS.maps_place),\n unit: 'per business',\n notes: 'Base lookup. Reviews billed separately per card at maps_review rate.',\n },\n {\n key: 'maps_review',\n label: 'Maps review',\n aliases: ['maps_reviews', 'google reviews', 'review cards', 'reviews'],\n credits: mcToCredits(MC_COSTS.maps_review),\n unit: 'per review card',\n notes: 'Charged after extraction when includeReviews is true.',\n },\n {\n key: 'fb_search',\n label: 'Facebook ad library search',\n aliases: ['facebook_search', 'fb_search', 'fb ad search'],\n credits: mcToCredits(MC_COSTS.fb_search),\n unit: 'per search',\n notes: 'Browser automation to search Facebook Ads Library by keyword.',\n },\n {\n key: 'fb_transcribe',\n label: 'Facebook ad transcription',\n aliases: ['facebook_transcribe', 'fb_transcribe', 'fb ad transcript'],\n credits: mcToCredits(MC_COSTS.fb_transcribe),\n unit: 'per call',\n notes: 'Whisper transcription of Facebook ad video via fal.ai.',\n },\n {\n key: 'browser_minute',\n label: 'Interactive browser session',\n aliases: ['browser_open', 'browser agent', 'browser_agent', 'live browser', 'browse', 'browser control', 'interactive browser'],\n credits: mcToCredits(MC_COSTS.browser_minute),\n unit: 'per minute of active time',\n notes: 'Metered per second of active browser work (navigation, clicks, typing, screenshots). Idle and standby time are free. Billed against your balance as you act; close the session to stop the meter.',\n },\n]\n\nexport const CONCURRENCY_PRICE_ID = 'price_1Ta1NRS8aAcsk3TGwsRnYbix'\n\nexport const FREE_SIGNUP_MC = 500_000\nexport const FREE_MONTHLY_REFRESH_MC = 250_000\n\nexport const BALANCE_PRICE_IDS: Record<string, number> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': 11_000_000,\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': 27_500_000,\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': 60_500_000,\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': 121_000_000,\n}\n\nexport const BALANCE_PACK_LABELS: Record<string, string> = {\n 'price_1TZx6rS8aAcsk3TGNMc1Vgpo': '$10',\n 'price_1TZx6sS8aAcsk3TGxgqB7khO': '$25',\n 'price_1TZx6tS8aAcsk3TG8PnJqHlG': '$50',\n 'price_1TZx6tS8aAcsk3TGNgRMpy0e': '$100',\n}\n\nexport function mcToCredits(mc: number): number {\n return mc / MC_PER_CREDIT\n}\n\nexport function insufficientBalanceResponse(balanceMc: number, requiredMc: number) {\n const topupUrl = process.env.TOPUP_URL ?? 'https://mcpscraper.dev/billing'\n const balanceCredits = mcToCredits(balanceMc)\n const requiredCredits = mcToCredits(requiredMc)\n return {\n error: 'insufficient_balance',\n error_code: 'insufficient_balance' as const,\n message: `Insufficient credits. Balance: ${balanceCredits} credits. This call requires ${requiredCredits} credits. Top up at ${topupUrl}`,\n balance_credits: balanceCredits,\n required_credits: requiredCredits,\n topup_url: topupUrl,\n }\n}\n\nexport const LedgerOperation = {\n TOPUP: 'topup',\n SIGNUP_GRANT: 'signup_grant',\n MONTHLY_REFRESH: 'monthly_free_refresh',\n PAA: 'paa',\n PAA_REFUND: 'paa_refund',\n SERP: 'serp',\n REFUND: 'refund',\n TRANSCRIPTION: 'transcription',\n TRANSCRIPTION_HOLD: 'transcription_hold',\n TRANSCRIPTION_REFUND: 'transcription_refund',\n YT_CHANNEL: 'yt_channel',\n FB_AD: 'fb_ad',\n MAPS_SEARCH: 'maps_search',\n MAPS_PLACE: 'maps_place',\n MAPS_REVIEW: 'maps_review',\n MAPS_REVIEW_REFUND: 'maps_review_refund',\n EXTRACT_SITE: 'extract_site',\n EXTRACT_SITE_REFUND: 'extract_site_refund',\n EXTRACT_URL: 'page_scrape',\n URL_MAP: 'url_map',\n EXTRACT_SITE_HOLD: 'extract_site_hold',\n YT_CHANNEL_REFUND: 'yt_channel_refund',\n FB_AD_REFUND: 'fb_ad_refund',\n URL_MAP_REFUND: 'url_map_refund',\n FB_SEARCH: 'fb_search',\n FB_TRANSCRIBE: 'fb_transcribe',\n FB_SEARCH_REFUND: 'fb_search_refund',\n FB_TRANSCRIBE_REFUND: 'fb_transcribe_refund',\n BROWSER_SESSION: 'browser_session',\n} as const\n\nexport type LedgerOperation = typeof LedgerOperation[keyof typeof LedgerOperation]\n","import { CaptchaError, LocationMismatchError, RequestAbortedError } from '../errors.js'\n\nexport type HarvestProblemCode =\n | 'request_aborted'\n | 'captcha_exhausted'\n | 'location_mismatch'\n | 'proxy_tunnel_failed'\n | 'proxy_unavailable'\n | 'harvest_timeout'\n | 'extraction_failed'\n\nexport interface HarvestProblem {\n error_code: HarvestProblemCode\n error_type: string\n message: string\n retryable: boolean\n httpStatus: number\n terminalStatus: 'cancelled' | 'failed'\n}\n\nfunction errorMessage(err: unknown): string {\n return err instanceof Error ? err.message : String(err)\n}\n\nfunction looksLikeTimeout(err: unknown, message: string): boolean {\n if (err instanceof DOMException && (err.name === 'TimeoutError' || err.name === 'AbortError')) return true\n return /timeout|timed out|Timeout \\d+ms exceeded|deadline/i.test(message)\n}\n\nfunction looksLikeCaptcha(message: string): boolean {\n return /captcha|recaptcha|unusual traffic|google\\.com\\/sorry|blocked/i.test(message)\n}\n\nfunction looksLikeProxyTunnelFailure(message: string): boolean {\n return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message)\n}\n\nfunction looksLikeProxyUnavailable(message: string): boolean {\n return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message)\n}\n\nexport function classifyHarvestProblem(err: unknown): HarvestProblem {\n const message = errorMessage(err)\n\n if (err instanceof RequestAbortedError) {\n return {\n error_code: 'request_aborted',\n error_type: 'request_aborted',\n message,\n retryable: true,\n httpStatus: 408,\n terminalStatus: 'cancelled',\n }\n }\n\n if (err instanceof CaptchaError || looksLikeCaptcha(message)) {\n return {\n error_code: 'captcha_exhausted',\n error_type: 'captcha',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (err instanceof LocationMismatchError) {\n return {\n error_code: 'location_mismatch',\n error_type: 'location_mismatch',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyTunnelFailure(message)) {\n return {\n error_code: 'proxy_tunnel_failed',\n error_type: 'proxy_tunnel_failed',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeProxyUnavailable(message)) {\n return {\n error_code: 'proxy_unavailable',\n error_type: 'proxy_unavailable',\n message,\n retryable: true,\n httpStatus: 503,\n terminalStatus: 'failed',\n }\n }\n\n if (looksLikeTimeout(err, message)) {\n return {\n error_code: 'harvest_timeout',\n error_type: 'timeout',\n message,\n retryable: true,\n httpStatus: 504,\n terminalStatus: 'failed',\n }\n }\n\n return {\n error_code: 'extraction_failed',\n error_type: 'extraction',\n message,\n retryable: false,\n httpStatus: 500,\n terminalStatus: 'failed',\n }\n}\n\nexport function serializeHarvestProblem(problem: HarvestProblem): string {\n return JSON.stringify({\n error_code: problem.error_code,\n error_type: problem.error_type,\n message: problem.message,\n retryable: problem.retryable,\n })\n}\n\nexport function harvestProblemResponse(problem: HarvestProblem): {\n error: string\n error_code: HarvestProblemCode\n error_type: string\n retryable: boolean\n} {\n return {\n error: problem.message,\n error_code: problem.error_code,\n error_type: problem.error_type,\n retryable: problem.retryable,\n }\n}\n","import type { HarvestAttemptLogEvent } from '../harvest.js'\nimport { finishHarvestAttempt, startHarvestAttempt } from './db.js'\n\nexport function createHarvestAttemptRecorder(jobId: string, userId: number | bigint) {\n return async (event: HarvestAttemptLogEvent): Promise<void> => {\n if (event.type === 'started') {\n await startHarvestAttempt({\n jobId,\n userId,\n attemptNumber: event.attemptNumber,\n maxAttempts: event.maxAttempts,\n query: event.query,\n location: event.location,\n maxQuestions: event.maxQuestions,\n startedAt: event.startedAt,\n })\n return\n }\n\n await finishHarvestAttempt({\n jobId,\n attemptNumber: event.attemptNumber,\n outcome: event.outcome,\n kernelSessionId: event.kernelSessionId,\n questionCount: event.questionCount,\n durationMs: event.durationMs,\n error: event.error,\n willRetry: event.willRetry,\n kernelDeleteStarted: event.cleanup.kernelDeleteStarted,\n kernelDeleteSucceeded: event.cleanup.kernelDeleteSucceeded,\n kernelDeleteError: event.cleanup.kernelDeleteError,\n browserCloseSucceeded: event.cleanup.browserCloseSucceeded,\n browserCloseError: event.cleanup.browserCloseError,\n debug: event.debug,\n completedAt: event.completedAt,\n })\n }\n}\n"],"mappings":";;;;;;;;;;;AAAO,IAAM,WAAW;AAAA,EACtB,MAAmB;AAAA,EACnB,KAAmB;AAAA,EACnB,aAAmB;AAAA,EACnB,SAAiB;AAAA,EACjB,YAAoB;AAAA,EACpB,kBAAmB;AAAA,EACnB,OAAoB;AAAA,EACpB,aAAkB;AAAA,EAClB,YAAiB;AAAA,EACjB,aAAoB;AAAA,EACpB,WAAoB;AAAA,EACpB,eAAoB;AAAA,EACpB,gBAAiB;AACnB;AAIO,IAAM,oBAAoB,SAAS,iBAAiB;AAEpD,SAAS,oBAAoB,UAA0B;AAC5D,SAAO,KAAK,MAAM,WAAW,iBAAiB;AAChD;AAEO,IAAM,8BAA8B;AAEpC,IAAM,gBAAgB;AAEtB,IAAM,sBAOR;AAAA,EACH;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,QAAQ,iBAAiB,iBAAiB;AAAA,IACnE,SAAS,YAAY,SAAS,IAAI;AAAA,IAClC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,OAAO,mBAAmB,WAAW;AAAA,IAC9D,SAAS,YAAY,SAAS,GAAG;AAAA,IACjC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,gBAAgB,eAAe,cAAc,eAAe,YAAY;AAAA,IACjG,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,iBAAiB,WAAW,YAAY,YAAY;AAAA,IAC9D,SAAS,YAAY,SAAS,OAAO;AAAA,IACrC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,kBAAkB,mBAAmB,YAAY;AAAA,IAC9E,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,sBAAsB,sBAAsB,iBAAiB,kBAAkB;AAAA,IACzF,SAAS,YAAY,SAAS,gBAAgB;AAAA,IAC9C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,sBAAsB,eAAe,gBAAgB,QAAQ;AAAA,IAC9F,SAAS,YAAY,SAAS,KAAK;AAAA,IACnC,MAAM;AAAA,EACR;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,eAAe,sBAAsB,cAAc,cAAc,mBAAmB;AAAA,IAC9F,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,oBAAoB,eAAe,cAAc,aAAa;AAAA,IACxE,SAAS,YAAY,SAAS,UAAU;AAAA,IACxC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,kBAAkB,gBAAgB,SAAS;AAAA,IACrE,SAAS,YAAY,SAAS,WAAW;AAAA,IACzC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,mBAAmB,aAAa,cAAc;AAAA,IACxD,SAAS,YAAY,SAAS,SAAS;AAAA,IACvC,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,uBAAuB,iBAAiB,kBAAkB;AAAA,IACpE,SAAS,YAAY,SAAS,aAAa;AAAA,IAC3C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AAAA,EACA;AAAA,IACE,KAAK;AAAA,IACL,OAAO;AAAA,IACP,SAAS,CAAC,gBAAgB,iBAAiB,iBAAiB,gBAAgB,UAAU,mBAAmB,qBAAqB;AAAA,IAC9H,SAAS,YAAY,SAAS,cAAc;AAAA,IAC5C,MAAM;AAAA,IACN,OAAO;AAAA,EACT;AACF;AAEO,IAAM,uBAAuB;AAE7B,IAAM,iBAAiB;AACvB,IAAM,0BAA0B;AAEhC,IAAM,oBAA4C;AAAA,EACvD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,IAAM,sBAA8C;AAAA,EACzD,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AAAA,EAClC,kCAAkC;AACpC;AAEO,SAAS,YAAY,IAAoB;AAC9C,SAAO,KAAK;AACd;AAEO,SAAS,4BAA4B,WAAmB,YAAoB;AACjF,QAAM,WAAW,QAAQ,IAAI,aAAa;AAC1C,QAAM,iBAAiB,YAAY,SAAS;AAC5C,QAAM,kBAAkB,YAAY,UAAU;AAC9C,SAAO;AAAA,IACL,OAAO;AAAA,IACP,YAAY;AAAA,IACZ,SAAS,kCAAkC,cAAc,gCAAgC,eAAe,uBAAuB,QAAQ;AAAA,IACvI,iBAAiB;AAAA,IACjB,kBAAkB;AAAA,IAClB,WAAW;AAAA,EACb;AACF;AAEO,IAAM,kBAAkB;AAAA,EAC7B,OAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,iBAAuB;AAAA,EACvB,KAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,MAAuB;AAAA,EACvB,QAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,OAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,YAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,oBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,qBAAuB;AAAA,EACvB,aAAuB;AAAA,EACvB,SAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,mBAAuB;AAAA,EACvB,cAAuB;AAAA,EACvB,gBAAuB;AAAA,EACvB,WAAuB;AAAA,EACvB,eAAuB;AAAA,EACvB,kBAAuB;AAAA,EACvB,sBAAuB;AAAA,EACvB,iBAAuB;AACzB;;;AC3LA,SAAS,aAAa,KAAsB;AAC1C,SAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AACxD;AAEA,SAAS,iBAAiB,KAAc,SAA0B;AAChE,MAAI,eAAe,iBAAiB,IAAI,SAAS,kBAAkB,IAAI,SAAS,cAAe,QAAO;AACtG,SAAO,qDAAqD,KAAK,OAAO;AAC1E;AAEA,SAAS,iBAAiB,SAA0B;AAClD,SAAO,gEAAgE,KAAK,OAAO;AACrF;AAEA,SAAS,4BAA4B,SAA0B;AAC7D,SAAO,gKAAgK,KAAK,OAAO;AACrL;AAEA,SAAS,0BAA0B,SAA0B;AAC3D,SAAO,4GAA4G,KAAK,OAAO;AACjI;AAEO,SAAS,uBAAuB,KAA8B;AACnE,QAAM,UAAU,aAAa,GAAG;AAEhC,MAAI,eAAe,qBAAqB;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,gBAAgB,iBAAiB,OAAO,GAAG;AAC5D,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,eAAe,uBAAuB;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,4BAA4B,OAAO,GAAG;AACxC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,0BAA0B,OAAO,GAAG;AACtC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,iBAAiB,KAAK,OAAO,GAAG;AAClC,WAAO;AAAA,MACL,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ;AAAA,MACA,WAAW;AAAA,MACX,YAAY;AAAA,MACZ,gBAAgB;AAAA,IAClB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,YAAY;AAAA,IACZ,YAAY;AAAA,IACZ;AAAA,IACA,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,gBAAgB;AAAA,EAClB;AACF;AAEO,SAAS,wBAAwB,SAAiC;AACvE,SAAO,KAAK,UAAU;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,SAAS,QAAQ;AAAA,IACjB,WAAW,QAAQ;AAAA,EACrB,CAAC;AACH;AAEO,SAAS,uBAAuB,SAKrC;AACA,SAAO;AAAA,IACL,OAAO,QAAQ;AAAA,IACf,YAAY,QAAQ;AAAA,IACpB,YAAY,QAAQ;AAAA,IACpB,WAAW,QAAQ;AAAA,EACrB;AACF;;;AC1IO,SAAS,6BAA6B,OAAe,QAAyB;AACnF,SAAO,OAAO,UAAiD;AAC7D,QAAI,MAAM,SAAS,WAAW;AAC5B,YAAM,oBAAoB;AAAA,QACxB;AAAA,QACA;AAAA,QACA,eAAe,MAAM;AAAA,QACrB,aAAa,MAAM;AAAA,QACnB,OAAO,MAAM;AAAA,QACb,UAAU,MAAM;AAAA,QAChB,cAAc,MAAM;AAAA,QACpB,WAAW,MAAM;AAAA,MACnB,CAAC;AACD;AAAA,IACF;AAEA,UAAM,qBAAqB;AAAA,MACzB;AAAA,MACA,eAAe,MAAM;AAAA,MACrB,SAAS,MAAM;AAAA,MACf,iBAAiB,MAAM;AAAA,MACvB,eAAe,MAAM;AAAA,MACrB,YAAY,MAAM;AAAA,MAClB,OAAO,MAAM;AAAA,MACb,WAAW,MAAM;AAAA,MACjB,qBAAqB,MAAM,QAAQ;AAAA,MACnC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,uBAAuB,MAAM,QAAQ;AAAA,MACrC,mBAAmB,MAAM,QAAQ;AAAA,MACjC,OAAO,MAAM;AAAA,MACb,aAAa,MAAM;AAAA,IACrB,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
@@ -25,12 +25,19 @@ var RequestAbortedError = class extends Error {
|
|
|
25
25
|
super(message);
|
|
26
26
|
}
|
|
27
27
|
};
|
|
28
|
+
var LocationMismatchError = class extends Error {
|
|
29
|
+
name = "LocationMismatchError";
|
|
30
|
+
constructor(message = "Google returned results for a different location than requested") {
|
|
31
|
+
super(message);
|
|
32
|
+
}
|
|
33
|
+
};
|
|
28
34
|
|
|
29
35
|
export {
|
|
30
36
|
RECAPTCHA_INSTRUCTIONS,
|
|
31
37
|
sanitizeVendorName,
|
|
32
38
|
CaptchaError,
|
|
33
39
|
ExtractionError,
|
|
34
|
-
RequestAbortedError
|
|
40
|
+
RequestAbortedError,
|
|
41
|
+
LocationMismatchError
|
|
35
42
|
};
|
|
36
|
-
//# sourceMappingURL=chunk-
|
|
43
|
+
//# sourceMappingURL=chunk-M2S27J6Z.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/errors.ts"],"sourcesContent":["export const RECAPTCHA_INSTRUCTIONS = 'Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.'\n\nexport function sanitizeVendorName(message: string): string {\n return message\n .replace(/kernel\\.sh\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\.sh\\s+session/gi, 'this session')\n .replace(/kernel\\.sh/gi, 'the service')\n .replace(/kernel\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\s+session/gi, 'this session')\n .replace(/\\bkernel\\b/gi, 'the service')\n .replace(/ +/g, ' ')\n .trim()\n}\n\nexport class CaptchaError extends Error {\n readonly name = 'CaptchaError'\n constructor(public readonly instructions: string) {\n super(`CAPTCHA detected. ${instructions}`)\n }\n}\n\nexport class ExtractionError extends Error {\n readonly name = 'ExtractionError'\n constructor(message: string, public readonly cause?: unknown) {\n super(message)\n }\n}\n\nexport class RequestAbortedError extends Error {\n readonly name = 'RequestAbortedError'\n constructor(message = 'Request aborted before harvest completed') {\n super(message)\n }\n}\n"],"mappings":";AAAO,IAAM,yBAAyB;AAE/B,SAAS,mBAAmB,SAAyB;AAC1D,SAAO,QACJ,QAAQ,4BAA4B,UAAU,EAC9C,QAAQ,0BAA0B,cAAc,EAChD,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,wBAAwB,UAAU,EAC1C,QAAQ,sBAAsB,cAAc,EAC5C,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAEO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAEtC,YAA4B,cAAsB;AAChD,UAAM,qBAAqB,YAAY,EAAE;AADf;AAAA,EAE5B;AAAA,EAF4B;AAAA,EADnB,OAAO;AAIlB;AAEO,IAAM,kBAAN,cAA8B,MAAM;AAAA,EAEzC,YAAY,SAAiC,OAAiB;AAC5D,UAAM,OAAO;AAD8B;AAAA,EAE7C;AAAA,EAF6C;AAAA,EADpC,OAAO;AAIlB;AAEO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EACpC,OAAO;AAAA,EAChB,YAAY,UAAU,4CAA4C;AAChE,UAAM,OAAO;AAAA,EACf;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/errors.ts"],"sourcesContent":["export const RECAPTCHA_INSTRUCTIONS = 'Google returned a CAPTCHA. Run with --headless=false to re-warm the browser profile, then retry.'\n\nexport function sanitizeVendorName(message: string): string {\n return message\n .replace(/kernel\\.sh\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\.sh\\s+session/gi, 'this session')\n .replace(/kernel\\.sh/gi, 'the service')\n .replace(/kernel\\s+sessions?/gi, 'sessions')\n .replace(/kernel\\s+session/gi, 'this session')\n .replace(/\\bkernel\\b/gi, 'the service')\n .replace(/ +/g, ' ')\n .trim()\n}\n\nexport class CaptchaError extends Error {\n readonly name = 'CaptchaError'\n constructor(public readonly instructions: string) {\n super(`CAPTCHA detected. ${instructions}`)\n }\n}\n\nexport class ExtractionError extends Error {\n readonly name = 'ExtractionError'\n constructor(message: string, public readonly cause?: unknown) {\n super(message)\n }\n}\n\nexport class RequestAbortedError extends Error {\n readonly name = 'RequestAbortedError'\n constructor(message = 'Request aborted before harvest completed') {\n super(message)\n }\n}\n\nexport class LocationMismatchError extends Error {\n readonly name = 'LocationMismatchError'\n constructor(message = 'Google returned results for a different location than requested') {\n super(message)\n }\n}\n"],"mappings":";AAAO,IAAM,yBAAyB;AAE/B,SAAS,mBAAmB,SAAyB;AAC1D,SAAO,QACJ,QAAQ,4BAA4B,UAAU,EAC9C,QAAQ,0BAA0B,cAAc,EAChD,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,wBAAwB,UAAU,EAC1C,QAAQ,sBAAsB,cAAc,EAC5C,QAAQ,gBAAgB,aAAa,EACrC,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACV;AAEO,IAAM,eAAN,cAA2B,MAAM;AAAA,EAEtC,YAA4B,cAAsB;AAChD,UAAM,qBAAqB,YAAY,EAAE;AADf;AAAA,EAE5B;AAAA,EAF4B;AAAA,EADnB,OAAO;AAIlB;AAEO,IAAM,kBAAN,cAA8B,MAAM;AAAA,EAEzC,YAAY,SAAiC,OAAiB;AAC5D,UAAM,OAAO;AAD8B;AAAA,EAE7C;AAAA,EAF6C;AAAA,EADpC,OAAO;AAIlB;AAEO,IAAM,sBAAN,cAAkC,MAAM;AAAA,EACpC,OAAO;AAAA,EAChB,YAAY,UAAU,4CAA4C;AAChE,UAAM,OAAO;AAAA,EACf;AACF;AAEO,IAAM,wBAAN,cAAoC,MAAM;AAAA,EACtC,OAAO;AAAA,EAChB,YAAY,UAAU,mEAAmE;AACvF,UAAM,OAAO;AAAA,EACf;AACF;","names":[]}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
CaptchaError,
|
|
3
3
|
ExtractionError,
|
|
4
|
+
LocationMismatchError,
|
|
4
5
|
RECAPTCHA_INSTRUCTIONS,
|
|
5
6
|
RequestAbortedError,
|
|
6
7
|
sanitizeVendorName
|
|
7
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-M2S27J6Z.js";
|
|
8
9
|
|
|
9
10
|
// src/lib/browser-service-env.ts
|
|
10
11
|
function browserServiceApiKey() {
|
|
@@ -57,8 +58,12 @@ var MapsSearchOptionsSchema = z.object({
|
|
|
57
58
|
gl: z.string().length(2).default("us"),
|
|
58
59
|
hl: z.string().length(2).default("en"),
|
|
59
60
|
maxResults: z.number().int().min(1).max(50).default(10),
|
|
61
|
+
proxyMode: z.enum(["location", "configured", "none"]).default("location"),
|
|
62
|
+
proxyZip: z.string().regex(/^\d{5}$/).optional(),
|
|
63
|
+
debug: z.boolean().default(false),
|
|
60
64
|
kernelApiKey: z.string().optional(),
|
|
61
65
|
kernelProxyId: z.string().optional(),
|
|
66
|
+
kernelProxyResolution: z.unknown().optional(),
|
|
62
67
|
headless: z.boolean().default(true)
|
|
63
68
|
});
|
|
64
69
|
var RawPAAItemSchema = z.object({
|
|
@@ -2232,16 +2237,18 @@ var US_CITY_CENTER_ZIPS = {
|
|
|
2232
2237
|
function proxyIdSuffix2(proxyId) {
|
|
2233
2238
|
return proxyId ? proxyId.slice(-6) : null;
|
|
2234
2239
|
}
|
|
2235
|
-
function resolution(source, proxyMode, proxyId, target, error) {
|
|
2240
|
+
function resolution(source, proxyMode, proxyId, target, error, disposable = false) {
|
|
2236
2241
|
return {
|
|
2237
2242
|
kernelProxyId: proxyId,
|
|
2243
|
+
...disposable && proxyId ? { disposableProxyId: proxyId } : {},
|
|
2238
2244
|
resolution: {
|
|
2239
2245
|
source,
|
|
2240
2246
|
proxyMode,
|
|
2241
2247
|
proxyIdPresent: Boolean(proxyId),
|
|
2242
2248
|
proxyIdSuffix: proxyIdSuffix2(proxyId),
|
|
2243
2249
|
target,
|
|
2244
|
-
error
|
|
2250
|
+
error,
|
|
2251
|
+
disposable
|
|
2245
2252
|
}
|
|
2246
2253
|
};
|
|
2247
2254
|
}
|
|
@@ -2271,6 +2278,10 @@ function kernelCityIdentifierCandidates(city) {
|
|
|
2271
2278
|
function proxyName(country, state, city) {
|
|
2272
2279
|
return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
|
|
2273
2280
|
}
|
|
2281
|
+
function freshProxyName(baseName, attemptIndex) {
|
|
2282
|
+
const stamp = `${Date.now()}-${attemptIndex ?? 0}-${Math.random().toString(36).slice(2, 8)}`;
|
|
2283
|
+
return `${baseName}-fresh-${stamp}`;
|
|
2284
|
+
}
|
|
2274
2285
|
function zipProxyName(zip) {
|
|
2275
2286
|
return `mcp-serp-residential-us-zip-${zip}`;
|
|
2276
2287
|
}
|
|
@@ -2340,6 +2351,12 @@ function zipTarget(target, zip) {
|
|
|
2340
2351
|
}
|
|
2341
2352
|
};
|
|
2342
2353
|
}
|
|
2354
|
+
function withProxyName(target, name) {
|
|
2355
|
+
return {
|
|
2356
|
+
...target,
|
|
2357
|
+
proxyName: name
|
|
2358
|
+
};
|
|
2359
|
+
}
|
|
2343
2360
|
function configMatches(config, target, city) {
|
|
2344
2361
|
if (target.level === "zip") {
|
|
2345
2362
|
return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
|
|
@@ -2378,6 +2395,55 @@ function escalatedTargetLevel(target, attemptIndex) {
|
|
|
2378
2395
|
function errorText2(err) {
|
|
2379
2396
|
return err instanceof Error ? err.message : String(err);
|
|
2380
2397
|
}
|
|
2398
|
+
function freshTargetCandidates(target, explicitZip, attemptIndex) {
|
|
2399
|
+
const out = [];
|
|
2400
|
+
const zip = knownZipFor(target, explicitZip);
|
|
2401
|
+
if (zip) {
|
|
2402
|
+
const targetZip = zipTarget(target, zip);
|
|
2403
|
+
out.push(withProxyName(targetZip, freshProxyName(targetZip.proxyName, attemptIndex)));
|
|
2404
|
+
}
|
|
2405
|
+
for (const city of target.cityCandidates) {
|
|
2406
|
+
const cityTarget = {
|
|
2407
|
+
...target,
|
|
2408
|
+
level: "city",
|
|
2409
|
+
city,
|
|
2410
|
+
proxyName: proxyName(target.country, target.state, city),
|
|
2411
|
+
config: {
|
|
2412
|
+
country: target.country,
|
|
2413
|
+
state: target.state,
|
|
2414
|
+
city
|
|
2415
|
+
}
|
|
2416
|
+
};
|
|
2417
|
+
out.push(withProxyName(cityTarget, freshProxyName(cityTarget.proxyName, attemptIndex)));
|
|
2418
|
+
}
|
|
2419
|
+
const fallbackTarget = stateTarget(target);
|
|
2420
|
+
out.push(withProxyName(fallbackTarget, freshProxyName(fallbackTarget.proxyName, attemptIndex)));
|
|
2421
|
+
return out;
|
|
2422
|
+
}
|
|
2423
|
+
async function createFreshLocationProxy(kernel, options, target) {
|
|
2424
|
+
const createErrors = [];
|
|
2425
|
+
for (const candidate of freshTargetCandidates(target, options.proxyZip, options.attemptIndex)) {
|
|
2426
|
+
try {
|
|
2427
|
+
const created = await kernel.proxies.create({
|
|
2428
|
+
type: "residential",
|
|
2429
|
+
name: candidate.proxyName,
|
|
2430
|
+
config: candidate.level === "zip" ? { country: candidate.country, zip: candidate.zip } : candidate.config
|
|
2431
|
+
});
|
|
2432
|
+
if (created.id) {
|
|
2433
|
+
return resolution("location_created", options.proxyMode, created.id, candidate, null, true);
|
|
2434
|
+
}
|
|
2435
|
+
createErrors.push(`${candidate.proxyName}: Kernel did not return a proxy id`);
|
|
2436
|
+
} catch (err) {
|
|
2437
|
+
createErrors.push(`${candidate.proxyName}: ${errorText2(err)}`);
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
|
|
2441
|
+
}
|
|
2442
|
+
async function deleteKernelProxyId(kernelApiKey, proxyId) {
|
|
2443
|
+
if (!kernelApiKey || !proxyId) return;
|
|
2444
|
+
const kernel = new Kernel2({ apiKey: kernelApiKey });
|
|
2445
|
+
await kernel.proxies.delete(proxyId);
|
|
2446
|
+
}
|
|
2381
2447
|
async function resolveKernelProxyId(options) {
|
|
2382
2448
|
if (options.proxyMode === "none") {
|
|
2383
2449
|
return resolution("disabled", options.proxyMode, void 0, null, null);
|
|
@@ -2392,6 +2458,9 @@ async function resolveKernelProxyId(options) {
|
|
|
2392
2458
|
const kernel = new Kernel2({ apiKey: options.kernelApiKey });
|
|
2393
2459
|
try {
|
|
2394
2460
|
const attemptIndex = options.attemptIndex ?? 0;
|
|
2461
|
+
if (options.fresh) {
|
|
2462
|
+
return await createFreshLocationProxy(kernel, options, target);
|
|
2463
|
+
}
|
|
2395
2464
|
if (attemptIndex >= 1) {
|
|
2396
2465
|
const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
|
|
2397
2466
|
const createErrors2 = [];
|
|
@@ -2495,6 +2564,7 @@ async function resolveKernelProxyId(options) {
|
|
|
2495
2564
|
|
|
2496
2565
|
// src/harvest.ts
|
|
2497
2566
|
var MAX_ATTEMPTS = 3;
|
|
2567
|
+
var LOCATION_PROXY_MAX_ATTEMPTS = 5;
|
|
2498
2568
|
function abortReason(signal) {
|
|
2499
2569
|
if (signal.reason instanceof DOMException && signal.reason.name === "TimeoutError") return signal.reason;
|
|
2500
2570
|
return new RequestAbortedError();
|
|
@@ -2524,9 +2594,12 @@ async function emitAttemptEvent(sink, event) {
|
|
|
2524
2594
|
}
|
|
2525
2595
|
function classifyAttemptError(err) {
|
|
2526
2596
|
if (err instanceof CaptchaError) return "captcha";
|
|
2597
|
+
if (err instanceof LocationMismatchError) return "location_mismatch";
|
|
2527
2598
|
if (err instanceof RequestAbortedError) return "request_aborted";
|
|
2528
2599
|
if (err instanceof DOMException && (err.name === "TimeoutError" || err.name === "AbortError")) return "timeout";
|
|
2529
2600
|
const message = err instanceof Error ? err.message : String(err);
|
|
2601
|
+
if (looksLikeProxyTunnelFailure(message)) return "proxy_tunnel_failed";
|
|
2602
|
+
if (looksLikeProxyUnavailable(message)) return "proxy_unavailable";
|
|
2530
2603
|
return /timeout|timed out|Timeout \d+ms exceeded|deadline/i.test(message) ? "timeout" : "error";
|
|
2531
2604
|
}
|
|
2532
2605
|
function classifyAttemptResult(result) {
|
|
@@ -2535,6 +2608,49 @@ function classifyAttemptResult(result) {
|
|
|
2535
2608
|
function errorMessage(err) {
|
|
2536
2609
|
return err instanceof Error ? err.message : String(err);
|
|
2537
2610
|
}
|
|
2611
|
+
function maxAttemptsForProxyMode(proxyMode) {
|
|
2612
|
+
return proxyMode === "location" ? LOCATION_PROXY_MAX_ATTEMPTS : MAX_ATTEMPTS;
|
|
2613
|
+
}
|
|
2614
|
+
function looksLikeProxyTunnelFailure(message) {
|
|
2615
|
+
return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
|
|
2616
|
+
}
|
|
2617
|
+
function looksLikeProxyUnavailable(message) {
|
|
2618
|
+
return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
|
|
2619
|
+
}
|
|
2620
|
+
function retryableLocationProxyError(outcome) {
|
|
2621
|
+
return outcome === "captcha" || outcome === "proxy_tunnel_failed" || outcome === "proxy_unavailable";
|
|
2622
|
+
}
|
|
2623
|
+
function locationMismatchMessage(result) {
|
|
2624
|
+
const evidence = result.diagnostics.debug?.locationEvidence;
|
|
2625
|
+
const expected = evidence?.expected?.canonicalLocation ?? result.location ?? "requested location";
|
|
2626
|
+
const candidates = evidence?.candidates.slice(0, 3).map((candidate) => `${candidate.city}, ${candidate.regionCode}`).join("; ");
|
|
2627
|
+
return candidates ? `Google returned results for ${candidates}, not ${expected}` : `Google returned results for a different location than ${expected}`;
|
|
2628
|
+
}
|
|
2629
|
+
function shouldRetryLocationMismatch(result, proxyMode) {
|
|
2630
|
+
return proxyMode === "location" && result.diagnostics.debug?.locationEvidence?.status === "mismatch";
|
|
2631
|
+
}
|
|
2632
|
+
function stripInternalDebug(result, keepDebug) {
|
|
2633
|
+
if (keepDebug || !result.diagnostics.debug) return result;
|
|
2634
|
+
const diagnostics = { ...result.diagnostics };
|
|
2635
|
+
delete diagnostics.debug;
|
|
2636
|
+
return { ...result, diagnostics };
|
|
2637
|
+
}
|
|
2638
|
+
async function cleanupDisposableProxy(kernelApiKey, proxyId) {
|
|
2639
|
+
if (!kernelApiKey || !proxyId) return;
|
|
2640
|
+
try {
|
|
2641
|
+
await deleteKernelProxyId(kernelApiKey, proxyId);
|
|
2642
|
+
console.info(JSON.stringify({
|
|
2643
|
+
event: "kernel_proxy_deleted",
|
|
2644
|
+
proxy_id_suffix: proxyId.slice(-6)
|
|
2645
|
+
}));
|
|
2646
|
+
} catch (err) {
|
|
2647
|
+
console.warn(JSON.stringify({
|
|
2648
|
+
event: "kernel_proxy_delete_failed",
|
|
2649
|
+
proxy_id_suffix: proxyId.slice(-6),
|
|
2650
|
+
message: errorMessage(err)
|
|
2651
|
+
}));
|
|
2652
|
+
}
|
|
2653
|
+
}
|
|
2538
2654
|
async function extractOnce(options, signal) {
|
|
2539
2655
|
const driver = new BrowserDriver();
|
|
2540
2656
|
const reporter = new ProgressReporter();
|
|
@@ -2602,26 +2718,35 @@ async function harvest(rawOptions) {
|
|
|
2602
2718
|
proxyZip: typeof raw.proxyZip === "string" ? raw.proxyZip : void 0,
|
|
2603
2719
|
gl: typeof raw.gl === "string" ? raw.gl : "us"
|
|
2604
2720
|
};
|
|
2721
|
+
const requestedDebug = typeof raw.debug === "boolean" ? raw.debug : false;
|
|
2722
|
+
const needsLocationEvidence = proxyMode === "location" && Boolean(proxyOpts.location);
|
|
2723
|
+
const maxAttempts = maxAttemptsForProxyMode(proxyMode);
|
|
2605
2724
|
const serializer = new OutputSerializer();
|
|
2606
|
-
|
|
2725
|
+
let lastError = null;
|
|
2726
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
2607
2727
|
const attemptNumber = i + 1;
|
|
2608
2728
|
const startedAtMs = Date.now();
|
|
2609
2729
|
try {
|
|
2610
2730
|
if (signal?.aborted) throw abortReason(signal);
|
|
2611
|
-
const resolution2 = await resolveKernelProxyId({
|
|
2731
|
+
const resolution2 = await resolveKernelProxyId({
|
|
2732
|
+
...proxyOpts,
|
|
2733
|
+
attemptIndex: i,
|
|
2734
|
+
fresh: proxyMode === "location"
|
|
2735
|
+
});
|
|
2612
2736
|
const mergedAttempt = {
|
|
2613
2737
|
...raw,
|
|
2614
2738
|
kernelApiKey,
|
|
2615
2739
|
kernelProxyId: resolution2.kernelProxyId,
|
|
2616
2740
|
kernelProxyResolution: resolution2.resolution,
|
|
2617
|
-
proxyMode
|
|
2741
|
+
proxyMode,
|
|
2742
|
+
debug: requestedDebug || needsLocationEvidence
|
|
2618
2743
|
};
|
|
2619
2744
|
if (proxyMode === "none") mergedAttempt.kernelProxyId = void 0;
|
|
2620
2745
|
const attemptOptions = HarvestOptionsSchema.parse(mergedAttempt);
|
|
2621
2746
|
await emitAttemptEvent(onAttemptEvent, {
|
|
2622
2747
|
type: "started",
|
|
2623
2748
|
attemptNumber,
|
|
2624
|
-
maxAttempts
|
|
2749
|
+
maxAttempts,
|
|
2625
2750
|
query: attemptOptions.query,
|
|
2626
2751
|
location: attemptOptions.location ?? null,
|
|
2627
2752
|
maxQuestions: attemptOptions.maxQuestions,
|
|
@@ -2630,7 +2755,7 @@ async function harvest(rawOptions) {
|
|
|
2630
2755
|
console.info(JSON.stringify({
|
|
2631
2756
|
event: "harvest_attempt_started",
|
|
2632
2757
|
attempt_number: attemptNumber,
|
|
2633
|
-
max_attempts:
|
|
2758
|
+
max_attempts: maxAttempts,
|
|
2634
2759
|
query: attemptOptions.query,
|
|
2635
2760
|
location: attemptOptions.location ?? null,
|
|
2636
2761
|
max_questions: attemptOptions.maxQuestions
|
|
@@ -2638,57 +2763,84 @@ async function harvest(rawOptions) {
|
|
|
2638
2763
|
const attempt = await extractOnce(attemptOptions, signal);
|
|
2639
2764
|
if (attempt.error) {
|
|
2640
2765
|
const err = attempt.error;
|
|
2641
|
-
|
|
2642
|
-
|
|
2766
|
+
const outcome = classifyAttemptError(err);
|
|
2767
|
+
const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
|
|
2768
|
+
if (outcome === "captcha") {
|
|
2643
2769
|
console.warn(JSON.stringify({
|
|
2644
2770
|
event: "harvest_attempt_captcha",
|
|
2645
2771
|
attempt_number: attemptNumber,
|
|
2646
|
-
max_attempts:
|
|
2647
|
-
message: err
|
|
2772
|
+
max_attempts: maxAttempts,
|
|
2773
|
+
message: errorMessage(err),
|
|
2774
|
+
will_retry: willRetry
|
|
2775
|
+
}));
|
|
2776
|
+
} else if (willRetry) {
|
|
2777
|
+
console.warn(JSON.stringify({
|
|
2778
|
+
event: "harvest_attempt_proxy_retry",
|
|
2779
|
+
attempt_number: attemptNumber,
|
|
2780
|
+
max_attempts: maxAttempts,
|
|
2781
|
+
outcome,
|
|
2782
|
+
message: errorMessage(err),
|
|
2648
2783
|
will_retry: willRetry
|
|
2649
2784
|
}));
|
|
2650
|
-
await emitAttemptEvent(onAttemptEvent, {
|
|
2651
|
-
type: "finished",
|
|
2652
|
-
attemptNumber,
|
|
2653
|
-
maxAttempts: MAX_ATTEMPTS,
|
|
2654
|
-
outcome: "captcha",
|
|
2655
|
-
kernelSessionId: attempt.cleanup.kernelSessionId,
|
|
2656
|
-
questionCount: 0,
|
|
2657
|
-
durationMs: Date.now() - startedAtMs,
|
|
2658
|
-
error: err.message,
|
|
2659
|
-
willRetry,
|
|
2660
|
-
cleanup: attempt.cleanup,
|
|
2661
|
-
debug: attempt.debug,
|
|
2662
|
-
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2663
|
-
});
|
|
2664
|
-
if (willRetry) continue;
|
|
2665
|
-
break;
|
|
2666
2785
|
}
|
|
2667
2786
|
await emitAttemptEvent(onAttemptEvent, {
|
|
2668
2787
|
type: "finished",
|
|
2669
2788
|
attemptNumber,
|
|
2670
|
-
maxAttempts
|
|
2671
|
-
outcome
|
|
2789
|
+
maxAttempts,
|
|
2790
|
+
outcome,
|
|
2672
2791
|
kernelSessionId: attempt.cleanup.kernelSessionId,
|
|
2673
2792
|
questionCount: 0,
|
|
2674
2793
|
durationMs: Date.now() - startedAtMs,
|
|
2675
2794
|
error: errorMessage(err),
|
|
2676
|
-
willRetry
|
|
2795
|
+
willRetry,
|
|
2677
2796
|
cleanup: attempt.cleanup,
|
|
2678
2797
|
debug: attempt.debug,
|
|
2679
2798
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2680
2799
|
});
|
|
2681
|
-
|
|
2800
|
+
await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
|
|
2801
|
+
lastError = err;
|
|
2802
|
+
if (willRetry) continue;
|
|
2803
|
+
break;
|
|
2682
2804
|
}
|
|
2683
2805
|
const result = attempt.result;
|
|
2684
2806
|
if (!result) throw new Error("Harvest attempt completed without a result");
|
|
2807
|
+
if (shouldRetryLocationMismatch(result, proxyMode)) {
|
|
2808
|
+
const err = new LocationMismatchError(locationMismatchMessage(result));
|
|
2809
|
+
const willRetry = i < maxAttempts - 1;
|
|
2810
|
+
console.warn(JSON.stringify({
|
|
2811
|
+
event: "harvest_attempt_location_mismatch",
|
|
2812
|
+
attempt_number: attemptNumber,
|
|
2813
|
+
max_attempts: maxAttempts,
|
|
2814
|
+
message: err.message,
|
|
2815
|
+
will_retry: willRetry
|
|
2816
|
+
}));
|
|
2817
|
+
await emitAttemptEvent(onAttemptEvent, {
|
|
2818
|
+
type: "finished",
|
|
2819
|
+
attemptNumber,
|
|
2820
|
+
maxAttempts,
|
|
2821
|
+
outcome: "location_mismatch",
|
|
2822
|
+
kernelSessionId: attempt.cleanup.kernelSessionId,
|
|
2823
|
+
questionCount: result.totalQuestions,
|
|
2824
|
+
durationMs: Date.now() - startedAtMs,
|
|
2825
|
+
error: err.message,
|
|
2826
|
+
willRetry,
|
|
2827
|
+
cleanup: attempt.cleanup,
|
|
2828
|
+
debug: attempt.debug,
|
|
2829
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2830
|
+
});
|
|
2831
|
+
await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
|
|
2832
|
+
lastError = err;
|
|
2833
|
+
if (willRetry) continue;
|
|
2834
|
+
break;
|
|
2835
|
+
}
|
|
2836
|
+
const finalResult = stripInternalDebug(result, requestedDebug);
|
|
2685
2837
|
await emitAttemptEvent(onAttemptEvent, {
|
|
2686
2838
|
type: "finished",
|
|
2687
2839
|
attemptNumber,
|
|
2688
|
-
maxAttempts
|
|
2689
|
-
outcome: classifyAttemptResult(
|
|
2840
|
+
maxAttempts,
|
|
2841
|
+
outcome: classifyAttemptResult(finalResult),
|
|
2690
2842
|
kernelSessionId: attempt.cleanup.kernelSessionId,
|
|
2691
|
-
questionCount:
|
|
2843
|
+
questionCount: finalResult.totalQuestions,
|
|
2692
2844
|
durationMs: Date.now() - startedAtMs,
|
|
2693
2845
|
error: null,
|
|
2694
2846
|
willRetry: false,
|
|
@@ -2696,64 +2848,52 @@ async function harvest(rawOptions) {
|
|
|
2696
2848
|
debug: attempt.debug,
|
|
2697
2849
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2698
2850
|
});
|
|
2851
|
+
await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
|
|
2699
2852
|
if (attemptOptions.format === "json" || attemptOptions.format === "both") {
|
|
2700
|
-
await serializer.writeJSON(
|
|
2853
|
+
await serializer.writeJSON(finalResult, attemptOptions.outputDir);
|
|
2701
2854
|
}
|
|
2702
2855
|
if (attemptOptions.format === "csv" || attemptOptions.format === "both") {
|
|
2703
2856
|
await Promise.all([
|
|
2704
|
-
serializer.writeCSV(
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2857
|
+
serializer.writeCSV(finalResult.flat, attemptOptions.outputDir),
|
|
2858
|
+
finalResult.videos.length > 0 ? serializer.writeVideoCSV(finalResult.videos, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
|
|
2859
|
+
finalResult.forums.length > 0 ? serializer.writeForumCSV(finalResult.forums, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
|
|
2860
|
+
finalResult.aiOverview.detected ? serializer.writeAIOverviewCSV(finalResult.aiOverview.citations, finalResult.aiOverview.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
|
|
2861
|
+
finalResult.aiMode.detected ? serializer.writeAIModeCSV(finalResult.aiMode.citations, finalResult.aiMode.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
|
|
2862
|
+
finalResult.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(finalResult.whatPeopleSaying, finalResult.seed, attemptOptions.outputDir) : Promise.resolve("")
|
|
2710
2863
|
]);
|
|
2711
2864
|
}
|
|
2712
|
-
return
|
|
2865
|
+
return finalResult;
|
|
2713
2866
|
} catch (err) {
|
|
2714
|
-
|
|
2715
|
-
|
|
2867
|
+
const outcome = classifyAttemptError(err);
|
|
2868
|
+
const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
|
|
2869
|
+
if (outcome === "captcha") {
|
|
2716
2870
|
console.warn(JSON.stringify({
|
|
2717
2871
|
event: "harvest_attempt_captcha",
|
|
2718
2872
|
attempt_number: attemptNumber,
|
|
2719
|
-
max_attempts:
|
|
2720
|
-
message: err
|
|
2873
|
+
max_attempts: maxAttempts,
|
|
2874
|
+
message: errorMessage(err),
|
|
2875
|
+
will_retry: willRetry
|
|
2876
|
+
}));
|
|
2877
|
+
} else if (willRetry) {
|
|
2878
|
+
console.warn(JSON.stringify({
|
|
2879
|
+
event: "harvest_attempt_proxy_retry",
|
|
2880
|
+
attempt_number: attemptNumber,
|
|
2881
|
+
max_attempts: maxAttempts,
|
|
2882
|
+
outcome,
|
|
2883
|
+
message: errorMessage(err),
|
|
2721
2884
|
will_retry: willRetry
|
|
2722
2885
|
}));
|
|
2723
|
-
await emitAttemptEvent(onAttemptEvent, {
|
|
2724
|
-
type: "finished",
|
|
2725
|
-
attemptNumber,
|
|
2726
|
-
maxAttempts: MAX_ATTEMPTS,
|
|
2727
|
-
outcome: "captcha",
|
|
2728
|
-
kernelSessionId: null,
|
|
2729
|
-
questionCount: 0,
|
|
2730
|
-
durationMs: Date.now() - startedAtMs,
|
|
2731
|
-
error: err.message,
|
|
2732
|
-
willRetry,
|
|
2733
|
-
cleanup: {
|
|
2734
|
-
kernelSessionId: null,
|
|
2735
|
-
kernelDeleteStarted: false,
|
|
2736
|
-
kernelDeleteSucceeded: null,
|
|
2737
|
-
kernelDeleteError: null,
|
|
2738
|
-
browserCloseSucceeded: null,
|
|
2739
|
-
browserCloseError: null
|
|
2740
|
-
},
|
|
2741
|
-
debug: null,
|
|
2742
|
-
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2743
|
-
});
|
|
2744
|
-
if (willRetry) continue;
|
|
2745
|
-
break;
|
|
2746
2886
|
}
|
|
2747
2887
|
await emitAttemptEvent(onAttemptEvent, {
|
|
2748
2888
|
type: "finished",
|
|
2749
2889
|
attemptNumber,
|
|
2750
|
-
maxAttempts
|
|
2751
|
-
outcome
|
|
2890
|
+
maxAttempts,
|
|
2891
|
+
outcome,
|
|
2752
2892
|
kernelSessionId: null,
|
|
2753
2893
|
questionCount: 0,
|
|
2754
2894
|
durationMs: Date.now() - startedAtMs,
|
|
2755
2895
|
error: errorMessage(err),
|
|
2756
|
-
willRetry
|
|
2896
|
+
willRetry,
|
|
2757
2897
|
cleanup: {
|
|
2758
2898
|
kernelSessionId: null,
|
|
2759
2899
|
kernelDeleteStarted: false,
|
|
@@ -2765,15 +2905,19 @@ async function harvest(rawOptions) {
|
|
|
2765
2905
|
debug: null,
|
|
2766
2906
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2767
2907
|
});
|
|
2908
|
+
lastError = err;
|
|
2909
|
+
if (willRetry) continue;
|
|
2910
|
+
if (outcome === "captcha") break;
|
|
2768
2911
|
throw err;
|
|
2769
2912
|
}
|
|
2770
2913
|
}
|
|
2914
|
+
if (lastError && !(lastError instanceof CaptchaError)) throw lastError;
|
|
2771
2915
|
console.warn(JSON.stringify({
|
|
2772
2916
|
event: "harvest_captcha_exhausted",
|
|
2773
|
-
max_attempts:
|
|
2917
|
+
max_attempts: maxAttempts,
|
|
2774
2918
|
session_kind: kernelApiKey ? "kernel" : "local"
|
|
2775
2919
|
}));
|
|
2776
|
-
throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${
|
|
2920
|
+
throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${maxAttempts} fresh sessions. Try again in a few minutes.`));
|
|
2777
2921
|
}
|
|
2778
2922
|
|
|
2779
2923
|
export {
|
|
@@ -2788,7 +2932,8 @@ export {
|
|
|
2788
2932
|
MapsSelectors,
|
|
2789
2933
|
buildYouTubeChannelVideosUrl,
|
|
2790
2934
|
BrowserDriver,
|
|
2935
|
+
deleteKernelProxyId,
|
|
2791
2936
|
resolveKernelProxyId,
|
|
2792
2937
|
harvest
|
|
2793
2938
|
};
|
|
2794
|
-
//# sourceMappingURL=chunk-
|
|
2939
|
+
//# sourceMappingURL=chunk-MY3S7EX7.js.map
|