mcp-scraper 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +74 -8
  2. package/dist/bin/api-server.cjs +5615 -3733
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/browser-agent-stdio-server.cjs +391 -0
  6. package/dist/bin/browser-agent-stdio-server.cjs.map +1 -0
  7. package/dist/bin/browser-agent-stdio-server.d.cts +1 -0
  8. package/dist/bin/browser-agent-stdio-server.d.ts +1 -0
  9. package/dist/bin/browser-agent-stdio-server.js +390 -0
  10. package/dist/bin/browser-agent-stdio-server.js.map +1 -0
  11. package/dist/bin/mcp-stdio-server.cjs +170 -12
  12. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  13. package/dist/bin/mcp-stdio-server.js +3 -2
  14. package/dist/bin/mcp-stdio-server.js.map +1 -1
  15. package/dist/bin/paa-harvest.cjs +223 -74
  16. package/dist/bin/paa-harvest.cjs.map +1 -1
  17. package/dist/bin/paa-harvest.js +2 -2
  18. package/dist/{chunk-ZK456YXN.js → chunk-IQOCZGJJ.js} +58 -4
  19. package/dist/chunk-IQOCZGJJ.js.map +1 -0
  20. package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
  21. package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
  22. package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
  23. package/dist/chunk-MY3S7EX7.js.map +1 -0
  24. package/dist/{chunk-JNC32DMS.js → chunk-OR7DLLH2.js} +175 -16
  25. package/dist/chunk-OR7DLLH2.js.map +1 -0
  26. package/dist/chunk-XR65SANX.js +7 -0
  27. package/dist/chunk-XR65SANX.js.map +1 -0
  28. package/dist/index.cjs +223 -74
  29. package/dist/index.cjs.map +1 -1
  30. package/dist/index.d.cts +1 -0
  31. package/dist/index.d.ts +1 -0
  32. package/dist/index.js +2 -2
  33. package/dist/{server-MTXAJG5J.js → server-CJMX2QUM.js} +1655 -194
  34. package/dist/server-CJMX2QUM.js.map +1 -0
  35. package/dist/{worker-AUCXFHEL.js → worker-NAKGTIF5.js} +4 -4
  36. package/docs/specs/api-forge-spec.md +234 -0
  37. package/docs/specs/deferred-work-spec.md +74 -0
  38. package/docs/specs/oauth-mcp-spec.md +213 -0
  39. package/package.json +3 -2
  40. package/dist/chunk-JNC32DMS.js.map +0 -1
  41. package/dist/chunk-TM22BLWP.js.map +0 -1
  42. package/dist/chunk-ZK456YXN.js.map +0 -1
  43. package/dist/server-MTXAJG5J.js.map +0 -1
  44. /package/dist/{worker-AUCXFHEL.js.map → worker-NAKGTIF5.js.map} +0 -0
@@ -1,10 +1,11 @@
1
1
  import {
2
2
  CaptchaError,
3
3
  ExtractionError,
4
+ LocationMismatchError,
4
5
  RECAPTCHA_INSTRUCTIONS,
5
6
  RequestAbortedError,
6
7
  sanitizeVendorName
7
- } from "./chunk-ZMOWIBMK.js";
8
+ } from "./chunk-M2S27J6Z.js";
8
9
 
9
10
  // src/lib/browser-service-env.ts
10
11
  function browserServiceApiKey() {
@@ -57,8 +58,12 @@ var MapsSearchOptionsSchema = z.object({
57
58
  gl: z.string().length(2).default("us"),
58
59
  hl: z.string().length(2).default("en"),
59
60
  maxResults: z.number().int().min(1).max(50).default(10),
61
+ proxyMode: z.enum(["location", "configured", "none"]).default("location"),
62
+ proxyZip: z.string().regex(/^\d{5}$/).optional(),
63
+ debug: z.boolean().default(false),
60
64
  kernelApiKey: z.string().optional(),
61
65
  kernelProxyId: z.string().optional(),
66
+ kernelProxyResolution: z.unknown().optional(),
62
67
  headless: z.boolean().default(true)
63
68
  });
64
69
  var RawPAAItemSchema = z.object({
@@ -2232,16 +2237,18 @@ var US_CITY_CENTER_ZIPS = {
2232
2237
  function proxyIdSuffix2(proxyId) {
2233
2238
  return proxyId ? proxyId.slice(-6) : null;
2234
2239
  }
2235
- function resolution(source, proxyMode, proxyId, target, error) {
2240
+ function resolution(source, proxyMode, proxyId, target, error, disposable = false) {
2236
2241
  return {
2237
2242
  kernelProxyId: proxyId,
2243
+ ...disposable && proxyId ? { disposableProxyId: proxyId } : {},
2238
2244
  resolution: {
2239
2245
  source,
2240
2246
  proxyMode,
2241
2247
  proxyIdPresent: Boolean(proxyId),
2242
2248
  proxyIdSuffix: proxyIdSuffix2(proxyId),
2243
2249
  target,
2244
- error
2250
+ error,
2251
+ disposable
2245
2252
  }
2246
2253
  };
2247
2254
  }
@@ -2271,6 +2278,10 @@ function kernelCityIdentifierCandidates(city) {
2271
2278
  function proxyName(country, state, city) {
2272
2279
  return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
2273
2280
  }
2281
+ function freshProxyName(baseName, attemptIndex) {
2282
+ const stamp = `${Date.now()}-${attemptIndex ?? 0}-${Math.random().toString(36).slice(2, 8)}`;
2283
+ return `${baseName}-fresh-${stamp}`;
2284
+ }
2274
2285
  function zipProxyName(zip) {
2275
2286
  return `mcp-serp-residential-us-zip-${zip}`;
2276
2287
  }
@@ -2340,6 +2351,12 @@ function zipTarget(target, zip) {
2340
2351
  }
2341
2352
  };
2342
2353
  }
2354
+ function withProxyName(target, name) {
2355
+ return {
2356
+ ...target,
2357
+ proxyName: name
2358
+ };
2359
+ }
2343
2360
  function configMatches(config, target, city) {
2344
2361
  if (target.level === "zip") {
2345
2362
  return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
@@ -2378,6 +2395,55 @@ function escalatedTargetLevel(target, attemptIndex) {
2378
2395
  function errorText2(err) {
2379
2396
  return err instanceof Error ? err.message : String(err);
2380
2397
  }
2398
+ function freshTargetCandidates(target, explicitZip, attemptIndex) {
2399
+ const out = [];
2400
+ const zip = knownZipFor(target, explicitZip);
2401
+ if (zip) {
2402
+ const targetZip = zipTarget(target, zip);
2403
+ out.push(withProxyName(targetZip, freshProxyName(targetZip.proxyName, attemptIndex)));
2404
+ }
2405
+ for (const city of target.cityCandidates) {
2406
+ const cityTarget = {
2407
+ ...target,
2408
+ level: "city",
2409
+ city,
2410
+ proxyName: proxyName(target.country, target.state, city),
2411
+ config: {
2412
+ country: target.country,
2413
+ state: target.state,
2414
+ city
2415
+ }
2416
+ };
2417
+ out.push(withProxyName(cityTarget, freshProxyName(cityTarget.proxyName, attemptIndex)));
2418
+ }
2419
+ const fallbackTarget = stateTarget(target);
2420
+ out.push(withProxyName(fallbackTarget, freshProxyName(fallbackTarget.proxyName, attemptIndex)));
2421
+ return out;
2422
+ }
2423
+ async function createFreshLocationProxy(kernel, options, target) {
2424
+ const createErrors = [];
2425
+ for (const candidate of freshTargetCandidates(target, options.proxyZip, options.attemptIndex)) {
2426
+ try {
2427
+ const created = await kernel.proxies.create({
2428
+ type: "residential",
2429
+ name: candidate.proxyName,
2430
+ config: candidate.level === "zip" ? { country: candidate.country, zip: candidate.zip } : candidate.config
2431
+ });
2432
+ if (created.id) {
2433
+ return resolution("location_created", options.proxyMode, created.id, candidate, null, true);
2434
+ }
2435
+ createErrors.push(`${candidate.proxyName}: Kernel did not return a proxy id`);
2436
+ } catch (err) {
2437
+ createErrors.push(`${candidate.proxyName}: ${errorText2(err)}`);
2438
+ }
2439
+ }
2440
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
2441
+ }
2442
+ async function deleteKernelProxyId(kernelApiKey, proxyId) {
2443
+ if (!kernelApiKey || !proxyId) return;
2444
+ const kernel = new Kernel2({ apiKey: kernelApiKey });
2445
+ await kernel.proxies.delete(proxyId);
2446
+ }
2381
2447
  async function resolveKernelProxyId(options) {
2382
2448
  if (options.proxyMode === "none") {
2383
2449
  return resolution("disabled", options.proxyMode, void 0, null, null);
@@ -2392,6 +2458,9 @@ async function resolveKernelProxyId(options) {
2392
2458
  const kernel = new Kernel2({ apiKey: options.kernelApiKey });
2393
2459
  try {
2394
2460
  const attemptIndex = options.attemptIndex ?? 0;
2461
+ if (options.fresh) {
2462
+ return await createFreshLocationProxy(kernel, options, target);
2463
+ }
2395
2464
  if (attemptIndex >= 1) {
2396
2465
  const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
2397
2466
  const createErrors2 = [];
@@ -2495,6 +2564,7 @@ async function resolveKernelProxyId(options) {
2495
2564
 
2496
2565
  // src/harvest.ts
2497
2566
  var MAX_ATTEMPTS = 3;
2567
+ var LOCATION_PROXY_MAX_ATTEMPTS = 5;
2498
2568
  function abortReason(signal) {
2499
2569
  if (signal.reason instanceof DOMException && signal.reason.name === "TimeoutError") return signal.reason;
2500
2570
  return new RequestAbortedError();
@@ -2524,9 +2594,12 @@ async function emitAttemptEvent(sink, event) {
2524
2594
  }
2525
2595
  function classifyAttemptError(err) {
2526
2596
  if (err instanceof CaptchaError) return "captcha";
2597
+ if (err instanceof LocationMismatchError) return "location_mismatch";
2527
2598
  if (err instanceof RequestAbortedError) return "request_aborted";
2528
2599
  if (err instanceof DOMException && (err.name === "TimeoutError" || err.name === "AbortError")) return "timeout";
2529
2600
  const message = err instanceof Error ? err.message : String(err);
2601
+ if (looksLikeProxyTunnelFailure(message)) return "proxy_tunnel_failed";
2602
+ if (looksLikeProxyUnavailable(message)) return "proxy_unavailable";
2530
2603
  return /timeout|timed out|Timeout \d+ms exceeded|deadline/i.test(message) ? "timeout" : "error";
2531
2604
  }
2532
2605
  function classifyAttemptResult(result) {
@@ -2535,6 +2608,49 @@ function classifyAttemptResult(result) {
2535
2608
  function errorMessage(err) {
2536
2609
  return err instanceof Error ? err.message : String(err);
2537
2610
  }
2611
+ function maxAttemptsForProxyMode(proxyMode) {
2612
+ return proxyMode === "location" ? LOCATION_PROXY_MAX_ATTEMPTS : MAX_ATTEMPTS;
2613
+ }
2614
+ function looksLikeProxyTunnelFailure(message) {
2615
+ return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
2616
+ }
2617
+ function looksLikeProxyUnavailable(message) {
2618
+ return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
2619
+ }
2620
+ function retryableLocationProxyError(outcome) {
2621
+ return outcome === "captcha" || outcome === "proxy_tunnel_failed" || outcome === "proxy_unavailable";
2622
+ }
2623
+ function locationMismatchMessage(result) {
2624
+ const evidence = result.diagnostics.debug?.locationEvidence;
2625
+ const expected = evidence?.expected?.canonicalLocation ?? result.location ?? "requested location";
2626
+ const candidates = evidence?.candidates.slice(0, 3).map((candidate) => `${candidate.city}, ${candidate.regionCode}`).join("; ");
2627
+ return candidates ? `Google returned results for ${candidates}, not ${expected}` : `Google returned results for a different location than ${expected}`;
2628
+ }
2629
+ function shouldRetryLocationMismatch(result, proxyMode) {
2630
+ return proxyMode === "location" && result.diagnostics.debug?.locationEvidence?.status === "mismatch";
2631
+ }
2632
+ function stripInternalDebug(result, keepDebug) {
2633
+ if (keepDebug || !result.diagnostics.debug) return result;
2634
+ const diagnostics = { ...result.diagnostics };
2635
+ delete diagnostics.debug;
2636
+ return { ...result, diagnostics };
2637
+ }
2638
+ async function cleanupDisposableProxy(kernelApiKey, proxyId) {
2639
+ if (!kernelApiKey || !proxyId) return;
2640
+ try {
2641
+ await deleteKernelProxyId(kernelApiKey, proxyId);
2642
+ console.info(JSON.stringify({
2643
+ event: "kernel_proxy_deleted",
2644
+ proxy_id_suffix: proxyId.slice(-6)
2645
+ }));
2646
+ } catch (err) {
2647
+ console.warn(JSON.stringify({
2648
+ event: "kernel_proxy_delete_failed",
2649
+ proxy_id_suffix: proxyId.slice(-6),
2650
+ message: errorMessage(err)
2651
+ }));
2652
+ }
2653
+ }
2538
2654
  async function extractOnce(options, signal) {
2539
2655
  const driver = new BrowserDriver();
2540
2656
  const reporter = new ProgressReporter();
@@ -2602,26 +2718,35 @@ async function harvest(rawOptions) {
2602
2718
  proxyZip: typeof raw.proxyZip === "string" ? raw.proxyZip : void 0,
2603
2719
  gl: typeof raw.gl === "string" ? raw.gl : "us"
2604
2720
  };
2721
+ const requestedDebug = typeof raw.debug === "boolean" ? raw.debug : false;
2722
+ const needsLocationEvidence = proxyMode === "location" && Boolean(proxyOpts.location);
2723
+ const maxAttempts = maxAttemptsForProxyMode(proxyMode);
2605
2724
  const serializer = new OutputSerializer();
2606
- for (let i = 0; i < MAX_ATTEMPTS; i++) {
2725
+ let lastError = null;
2726
+ for (let i = 0; i < maxAttempts; i++) {
2607
2727
  const attemptNumber = i + 1;
2608
2728
  const startedAtMs = Date.now();
2609
2729
  try {
2610
2730
  if (signal?.aborted) throw abortReason(signal);
2611
- const resolution2 = await resolveKernelProxyId({ ...proxyOpts, attemptIndex: i });
2731
+ const resolution2 = await resolveKernelProxyId({
2732
+ ...proxyOpts,
2733
+ attemptIndex: i,
2734
+ fresh: proxyMode === "location"
2735
+ });
2612
2736
  const mergedAttempt = {
2613
2737
  ...raw,
2614
2738
  kernelApiKey,
2615
2739
  kernelProxyId: resolution2.kernelProxyId,
2616
2740
  kernelProxyResolution: resolution2.resolution,
2617
- proxyMode
2741
+ proxyMode,
2742
+ debug: requestedDebug || needsLocationEvidence
2618
2743
  };
2619
2744
  if (proxyMode === "none") mergedAttempt.kernelProxyId = void 0;
2620
2745
  const attemptOptions = HarvestOptionsSchema.parse(mergedAttempt);
2621
2746
  await emitAttemptEvent(onAttemptEvent, {
2622
2747
  type: "started",
2623
2748
  attemptNumber,
2624
- maxAttempts: MAX_ATTEMPTS,
2749
+ maxAttempts,
2625
2750
  query: attemptOptions.query,
2626
2751
  location: attemptOptions.location ?? null,
2627
2752
  maxQuestions: attemptOptions.maxQuestions,
@@ -2630,7 +2755,7 @@ async function harvest(rawOptions) {
2630
2755
  console.info(JSON.stringify({
2631
2756
  event: "harvest_attempt_started",
2632
2757
  attempt_number: attemptNumber,
2633
- max_attempts: MAX_ATTEMPTS,
2758
+ max_attempts: maxAttempts,
2634
2759
  query: attemptOptions.query,
2635
2760
  location: attemptOptions.location ?? null,
2636
2761
  max_questions: attemptOptions.maxQuestions
@@ -2638,57 +2763,84 @@ async function harvest(rawOptions) {
2638
2763
  const attempt = await extractOnce(attemptOptions, signal);
2639
2764
  if (attempt.error) {
2640
2765
  const err = attempt.error;
2641
- if (err instanceof CaptchaError) {
2642
- const willRetry = i < MAX_ATTEMPTS - 1;
2766
+ const outcome = classifyAttemptError(err);
2767
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2768
+ if (outcome === "captcha") {
2643
2769
  console.warn(JSON.stringify({
2644
2770
  event: "harvest_attempt_captcha",
2645
2771
  attempt_number: attemptNumber,
2646
- max_attempts: MAX_ATTEMPTS,
2647
- message: err.message,
2772
+ max_attempts: maxAttempts,
2773
+ message: errorMessage(err),
2774
+ will_retry: willRetry
2775
+ }));
2776
+ } else if (willRetry) {
2777
+ console.warn(JSON.stringify({
2778
+ event: "harvest_attempt_proxy_retry",
2779
+ attempt_number: attemptNumber,
2780
+ max_attempts: maxAttempts,
2781
+ outcome,
2782
+ message: errorMessage(err),
2648
2783
  will_retry: willRetry
2649
2784
  }));
2650
- await emitAttemptEvent(onAttemptEvent, {
2651
- type: "finished",
2652
- attemptNumber,
2653
- maxAttempts: MAX_ATTEMPTS,
2654
- outcome: "captcha",
2655
- kernelSessionId: attempt.cleanup.kernelSessionId,
2656
- questionCount: 0,
2657
- durationMs: Date.now() - startedAtMs,
2658
- error: err.message,
2659
- willRetry,
2660
- cleanup: attempt.cleanup,
2661
- debug: attempt.debug,
2662
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2663
- });
2664
- if (willRetry) continue;
2665
- break;
2666
2785
  }
2667
2786
  await emitAttemptEvent(onAttemptEvent, {
2668
2787
  type: "finished",
2669
2788
  attemptNumber,
2670
- maxAttempts: MAX_ATTEMPTS,
2671
- outcome: classifyAttemptError(err),
2789
+ maxAttempts,
2790
+ outcome,
2672
2791
  kernelSessionId: attempt.cleanup.kernelSessionId,
2673
2792
  questionCount: 0,
2674
2793
  durationMs: Date.now() - startedAtMs,
2675
2794
  error: errorMessage(err),
2676
- willRetry: false,
2795
+ willRetry,
2677
2796
  cleanup: attempt.cleanup,
2678
2797
  debug: attempt.debug,
2679
2798
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2680
2799
  });
2681
- throw err;
2800
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2801
+ lastError = err;
2802
+ if (willRetry) continue;
2803
+ break;
2682
2804
  }
2683
2805
  const result = attempt.result;
2684
2806
  if (!result) throw new Error("Harvest attempt completed without a result");
2807
+ if (shouldRetryLocationMismatch(result, proxyMode)) {
2808
+ const err = new LocationMismatchError(locationMismatchMessage(result));
2809
+ const willRetry = i < maxAttempts - 1;
2810
+ console.warn(JSON.stringify({
2811
+ event: "harvest_attempt_location_mismatch",
2812
+ attempt_number: attemptNumber,
2813
+ max_attempts: maxAttempts,
2814
+ message: err.message,
2815
+ will_retry: willRetry
2816
+ }));
2817
+ await emitAttemptEvent(onAttemptEvent, {
2818
+ type: "finished",
2819
+ attemptNumber,
2820
+ maxAttempts,
2821
+ outcome: "location_mismatch",
2822
+ kernelSessionId: attempt.cleanup.kernelSessionId,
2823
+ questionCount: result.totalQuestions,
2824
+ durationMs: Date.now() - startedAtMs,
2825
+ error: err.message,
2826
+ willRetry,
2827
+ cleanup: attempt.cleanup,
2828
+ debug: attempt.debug,
2829
+ completedAt: (/* @__PURE__ */ new Date()).toISOString()
2830
+ });
2831
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2832
+ lastError = err;
2833
+ if (willRetry) continue;
2834
+ break;
2835
+ }
2836
+ const finalResult = stripInternalDebug(result, requestedDebug);
2685
2837
  await emitAttemptEvent(onAttemptEvent, {
2686
2838
  type: "finished",
2687
2839
  attemptNumber,
2688
- maxAttempts: MAX_ATTEMPTS,
2689
- outcome: classifyAttemptResult(result),
2840
+ maxAttempts,
2841
+ outcome: classifyAttemptResult(finalResult),
2690
2842
  kernelSessionId: attempt.cleanup.kernelSessionId,
2691
- questionCount: result.totalQuestions,
2843
+ questionCount: finalResult.totalQuestions,
2692
2844
  durationMs: Date.now() - startedAtMs,
2693
2845
  error: null,
2694
2846
  willRetry: false,
@@ -2696,64 +2848,52 @@ async function harvest(rawOptions) {
2696
2848
  debug: attempt.debug,
2697
2849
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2698
2850
  });
2851
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2699
2852
  if (attemptOptions.format === "json" || attemptOptions.format === "both") {
2700
- await serializer.writeJSON(result, attemptOptions.outputDir);
2853
+ await serializer.writeJSON(finalResult, attemptOptions.outputDir);
2701
2854
  }
2702
2855
  if (attemptOptions.format === "csv" || attemptOptions.format === "both") {
2703
2856
  await Promise.all([
2704
- serializer.writeCSV(result.flat, attemptOptions.outputDir),
2705
- result.videos.length > 0 ? serializer.writeVideoCSV(result.videos, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2706
- result.forums.length > 0 ? serializer.writeForumCSV(result.forums, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2707
- result.aiOverview.detected ? serializer.writeAIOverviewCSV(result.aiOverview.citations, result.aiOverview.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2708
- result.aiMode.detected ? serializer.writeAIModeCSV(result.aiMode.citations, result.aiMode.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2709
- result.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(result.whatPeopleSaying, result.seed, attemptOptions.outputDir) : Promise.resolve("")
2857
+ serializer.writeCSV(finalResult.flat, attemptOptions.outputDir),
2858
+ finalResult.videos.length > 0 ? serializer.writeVideoCSV(finalResult.videos, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2859
+ finalResult.forums.length > 0 ? serializer.writeForumCSV(finalResult.forums, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2860
+ finalResult.aiOverview.detected ? serializer.writeAIOverviewCSV(finalResult.aiOverview.citations, finalResult.aiOverview.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2861
+ finalResult.aiMode.detected ? serializer.writeAIModeCSV(finalResult.aiMode.citations, finalResult.aiMode.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2862
+ finalResult.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(finalResult.whatPeopleSaying, finalResult.seed, attemptOptions.outputDir) : Promise.resolve("")
2710
2863
  ]);
2711
2864
  }
2712
- return result;
2865
+ return finalResult;
2713
2866
  } catch (err) {
2714
- if (err instanceof CaptchaError) {
2715
- const willRetry = i < MAX_ATTEMPTS - 1;
2867
+ const outcome = classifyAttemptError(err);
2868
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2869
+ if (outcome === "captcha") {
2716
2870
  console.warn(JSON.stringify({
2717
2871
  event: "harvest_attempt_captcha",
2718
2872
  attempt_number: attemptNumber,
2719
- max_attempts: MAX_ATTEMPTS,
2720
- message: err.message,
2873
+ max_attempts: maxAttempts,
2874
+ message: errorMessage(err),
2875
+ will_retry: willRetry
2876
+ }));
2877
+ } else if (willRetry) {
2878
+ console.warn(JSON.stringify({
2879
+ event: "harvest_attempt_proxy_retry",
2880
+ attempt_number: attemptNumber,
2881
+ max_attempts: maxAttempts,
2882
+ outcome,
2883
+ message: errorMessage(err),
2721
2884
  will_retry: willRetry
2722
2885
  }));
2723
- await emitAttemptEvent(onAttemptEvent, {
2724
- type: "finished",
2725
- attemptNumber,
2726
- maxAttempts: MAX_ATTEMPTS,
2727
- outcome: "captcha",
2728
- kernelSessionId: null,
2729
- questionCount: 0,
2730
- durationMs: Date.now() - startedAtMs,
2731
- error: err.message,
2732
- willRetry,
2733
- cleanup: {
2734
- kernelSessionId: null,
2735
- kernelDeleteStarted: false,
2736
- kernelDeleteSucceeded: null,
2737
- kernelDeleteError: null,
2738
- browserCloseSucceeded: null,
2739
- browserCloseError: null
2740
- },
2741
- debug: null,
2742
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2743
- });
2744
- if (willRetry) continue;
2745
- break;
2746
2886
  }
2747
2887
  await emitAttemptEvent(onAttemptEvent, {
2748
2888
  type: "finished",
2749
2889
  attemptNumber,
2750
- maxAttempts: MAX_ATTEMPTS,
2751
- outcome: classifyAttemptError(err),
2890
+ maxAttempts,
2891
+ outcome,
2752
2892
  kernelSessionId: null,
2753
2893
  questionCount: 0,
2754
2894
  durationMs: Date.now() - startedAtMs,
2755
2895
  error: errorMessage(err),
2756
- willRetry: false,
2896
+ willRetry,
2757
2897
  cleanup: {
2758
2898
  kernelSessionId: null,
2759
2899
  kernelDeleteStarted: false,
@@ -2765,15 +2905,19 @@ async function harvest(rawOptions) {
2765
2905
  debug: null,
2766
2906
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2767
2907
  });
2908
+ lastError = err;
2909
+ if (willRetry) continue;
2910
+ if (outcome === "captcha") break;
2768
2911
  throw err;
2769
2912
  }
2770
2913
  }
2914
+ if (lastError && !(lastError instanceof CaptchaError)) throw lastError;
2771
2915
  console.warn(JSON.stringify({
2772
2916
  event: "harvest_captcha_exhausted",
2773
- max_attempts: MAX_ATTEMPTS,
2917
+ max_attempts: maxAttempts,
2774
2918
  session_kind: kernelApiKey ? "kernel" : "local"
2775
2919
  }));
2776
- throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${MAX_ATTEMPTS} fresh sessions. Try again in a few minutes.`));
2920
+ throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${maxAttempts} fresh sessions. Try again in a few minutes.`));
2777
2921
  }
2778
2922
 
2779
2923
  export {
@@ -2788,7 +2932,8 @@ export {
2788
2932
  MapsSelectors,
2789
2933
  buildYouTubeChannelVideosUrl,
2790
2934
  BrowserDriver,
2935
+ deleteKernelProxyId,
2791
2936
  resolveKernelProxyId,
2792
2937
  harvest
2793
2938
  };
2794
- //# sourceMappingURL=chunk-TM22BLWP.js.map
2939
+ //# sourceMappingURL=chunk-MY3S7EX7.js.map