mcp-scraper 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +74 -8
  2. package/dist/bin/api-server.cjs +5615 -3733
  3. package/dist/bin/api-server.cjs.map +1 -1
  4. package/dist/bin/api-server.js +2 -2
  5. package/dist/bin/browser-agent-stdio-server.cjs +391 -0
  6. package/dist/bin/browser-agent-stdio-server.cjs.map +1 -0
  7. package/dist/bin/browser-agent-stdio-server.d.cts +1 -0
  8. package/dist/bin/browser-agent-stdio-server.d.ts +1 -0
  9. package/dist/bin/browser-agent-stdio-server.js +390 -0
  10. package/dist/bin/browser-agent-stdio-server.js.map +1 -0
  11. package/dist/bin/mcp-stdio-server.cjs +170 -12
  12. package/dist/bin/mcp-stdio-server.cjs.map +1 -1
  13. package/dist/bin/mcp-stdio-server.js +3 -2
  14. package/dist/bin/mcp-stdio-server.js.map +1 -1
  15. package/dist/bin/paa-harvest.cjs +223 -74
  16. package/dist/bin/paa-harvest.cjs.map +1 -1
  17. package/dist/bin/paa-harvest.js +2 -2
  18. package/dist/{chunk-ZK456YXN.js → chunk-IQOCZGJJ.js} +58 -4
  19. package/dist/chunk-IQOCZGJJ.js.map +1 -0
  20. package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
  21. package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
  22. package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
  23. package/dist/chunk-MY3S7EX7.js.map +1 -0
  24. package/dist/{chunk-JNC32DMS.js → chunk-OR7DLLH2.js} +175 -16
  25. package/dist/chunk-OR7DLLH2.js.map +1 -0
  26. package/dist/chunk-XR65SANX.js +7 -0
  27. package/dist/chunk-XR65SANX.js.map +1 -0
  28. package/dist/index.cjs +223 -74
  29. package/dist/index.cjs.map +1 -1
  30. package/dist/index.d.cts +1 -0
  31. package/dist/index.d.ts +1 -0
  32. package/dist/index.js +2 -2
  33. package/dist/{server-MTXAJG5J.js → server-CJMX2QUM.js} +1655 -194
  34. package/dist/server-CJMX2QUM.js.map +1 -0
  35. package/dist/{worker-AUCXFHEL.js → worker-NAKGTIF5.js} +4 -4
  36. package/docs/specs/api-forge-spec.md +234 -0
  37. package/docs/specs/deferred-work-spec.md +74 -0
  38. package/docs/specs/oauth-mcp-spec.md +213 -0
  39. package/package.json +3 -2
  40. package/dist/chunk-JNC32DMS.js.map +0 -1
  41. package/dist/chunk-TM22BLWP.js.map +0 -1
  42. package/dist/chunk-ZK456YXN.js.map +0 -1
  43. package/dist/server-MTXAJG5J.js.map +0 -1
  44. /package/dist/{worker-AUCXFHEL.js.map → worker-NAKGTIF5.js.map} +0 -0
package/dist/index.cjs CHANGED
@@ -77,8 +77,12 @@ var MapsSearchOptionsSchema = import_zod.z.object({
77
77
  gl: import_zod.z.string().length(2).default("us"),
78
78
  hl: import_zod.z.string().length(2).default("en"),
79
79
  maxResults: import_zod.z.number().int().min(1).max(50).default(10),
80
+ proxyMode: import_zod.z.enum(["location", "configured", "none"]).default("location"),
81
+ proxyZip: import_zod.z.string().regex(/^\d{5}$/).optional(),
82
+ debug: import_zod.z.boolean().default(false),
80
83
  kernelApiKey: import_zod.z.string().optional(),
81
84
  kernelProxyId: import_zod.z.string().optional(),
85
+ kernelProxyResolution: import_zod.z.unknown().optional(),
82
86
  headless: import_zod.z.boolean().default(true)
83
87
  });
84
88
  var RawPAAItemSchema = import_zod.z.object({
@@ -254,6 +258,12 @@ var RequestAbortedError = class extends Error {
254
258
  super(message);
255
259
  }
256
260
  };
261
+ var LocationMismatchError = class extends Error {
262
+ name = "LocationMismatchError";
263
+ constructor(message = "Google returned results for a different location than requested") {
264
+ super(message);
265
+ }
266
+ };
257
267
 
258
268
  // src/driver/BrowserDriver.ts
259
269
  import_playwright_extra.chromium.use((0, import_puppeteer_extra_plugin_stealth.default)());
@@ -2274,16 +2284,18 @@ var US_CITY_CENTER_ZIPS = {
2274
2284
  function proxyIdSuffix2(proxyId) {
2275
2285
  return proxyId ? proxyId.slice(-6) : null;
2276
2286
  }
2277
- function resolution(source, proxyMode, proxyId, target, error) {
2287
+ function resolution(source, proxyMode, proxyId, target, error, disposable = false) {
2278
2288
  return {
2279
2289
  kernelProxyId: proxyId,
2290
+ ...disposable && proxyId ? { disposableProxyId: proxyId } : {},
2280
2291
  resolution: {
2281
2292
  source,
2282
2293
  proxyMode,
2283
2294
  proxyIdPresent: Boolean(proxyId),
2284
2295
  proxyIdSuffix: proxyIdSuffix2(proxyId),
2285
2296
  target,
2286
- error
2297
+ error,
2298
+ disposable
2287
2299
  }
2288
2300
  };
2289
2301
  }
@@ -2313,6 +2325,10 @@ function kernelCityIdentifierCandidates(city) {
2313
2325
  function proxyName(country, state, city) {
2314
2326
  return city ? `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}-${city}` : `mcp-serp-residential-${country.toLowerCase()}-${state.toLowerCase()}`;
2315
2327
  }
2328
+ function freshProxyName(baseName, attemptIndex) {
2329
+ const stamp = `${Date.now()}-${attemptIndex ?? 0}-${Math.random().toString(36).slice(2, 8)}`;
2330
+ return `${baseName}-fresh-${stamp}`;
2331
+ }
2316
2332
  function zipProxyName(zip) {
2317
2333
  return `mcp-serp-residential-us-zip-${zip}`;
2318
2334
  }
@@ -2382,6 +2398,12 @@ function zipTarget(target, zip) {
2382
2398
  }
2383
2399
  };
2384
2400
  }
2401
+ function withProxyName(target, name) {
2402
+ return {
2403
+ ...target,
2404
+ proxyName: name
2405
+ };
2406
+ }
2385
2407
  function configMatches(config, target, city) {
2386
2408
  if (target.level === "zip") {
2387
2409
  return config?.country?.toUpperCase() === target.country && config?.zip === target.zip;
@@ -2420,6 +2442,55 @@ function escalatedTargetLevel(target, attemptIndex) {
2420
2442
  function errorText2(err) {
2421
2443
  return err instanceof Error ? err.message : String(err);
2422
2444
  }
2445
+ function freshTargetCandidates(target, explicitZip, attemptIndex) {
2446
+ const out = [];
2447
+ const zip = knownZipFor(target, explicitZip);
2448
+ if (zip) {
2449
+ const targetZip = zipTarget(target, zip);
2450
+ out.push(withProxyName(targetZip, freshProxyName(targetZip.proxyName, attemptIndex)));
2451
+ }
2452
+ for (const city of target.cityCandidates) {
2453
+ const cityTarget = {
2454
+ ...target,
2455
+ level: "city",
2456
+ city,
2457
+ proxyName: proxyName(target.country, target.state, city),
2458
+ config: {
2459
+ country: target.country,
2460
+ state: target.state,
2461
+ city
2462
+ }
2463
+ };
2464
+ out.push(withProxyName(cityTarget, freshProxyName(cityTarget.proxyName, attemptIndex)));
2465
+ }
2466
+ const fallbackTarget = stateTarget(target);
2467
+ out.push(withProxyName(fallbackTarget, freshProxyName(fallbackTarget.proxyName, attemptIndex)));
2468
+ return out;
2469
+ }
2470
+ async function createFreshLocationProxy(kernel, options, target) {
2471
+ const createErrors = [];
2472
+ for (const candidate of freshTargetCandidates(target, options.proxyZip, options.attemptIndex)) {
2473
+ try {
2474
+ const created = await kernel.proxies.create({
2475
+ type: "residential",
2476
+ name: candidate.proxyName,
2477
+ config: candidate.level === "zip" ? { country: candidate.country, zip: candidate.zip } : candidate.config
2478
+ });
2479
+ if (created.id) {
2480
+ return resolution("location_created", options.proxyMode, created.id, candidate, null, true);
2481
+ }
2482
+ createErrors.push(`${candidate.proxyName}: Kernel did not return a proxy id`);
2483
+ } catch (err) {
2484
+ createErrors.push(`${candidate.proxyName}: ${errorText2(err)}`);
2485
+ }
2486
+ }
2487
+ return resolution("configured_fallback", options.proxyMode, options.configuredKernelProxyId, target, createErrors.join(" | "));
2488
+ }
2489
+ async function deleteKernelProxyId(kernelApiKey, proxyId) {
2490
+ if (!kernelApiKey || !proxyId) return;
2491
+ const kernel = new import_sdk2.default({ apiKey: kernelApiKey });
2492
+ await kernel.proxies.delete(proxyId);
2493
+ }
2423
2494
  async function resolveKernelProxyId(options) {
2424
2495
  if (options.proxyMode === "none") {
2425
2496
  return resolution("disabled", options.proxyMode, void 0, null, null);
@@ -2434,6 +2505,9 @@ async function resolveKernelProxyId(options) {
2434
2505
  const kernel = new import_sdk2.default({ apiKey: options.kernelApiKey });
2435
2506
  try {
2436
2507
  const attemptIndex = options.attemptIndex ?? 0;
2508
+ if (options.fresh) {
2509
+ return await createFreshLocationProxy(kernel, options, target);
2510
+ }
2437
2511
  if (attemptIndex >= 1) {
2438
2512
  const escalatedTarget = escalatedTargetLevel(target, attemptIndex);
2439
2513
  const createErrors2 = [];
@@ -2537,6 +2611,7 @@ async function resolveKernelProxyId(options) {
2537
2611
 
2538
2612
  // src/harvest.ts
2539
2613
  var MAX_ATTEMPTS = 3;
2614
+ var LOCATION_PROXY_MAX_ATTEMPTS = 5;
2540
2615
  function abortReason(signal) {
2541
2616
  if (signal.reason instanceof DOMException && signal.reason.name === "TimeoutError") return signal.reason;
2542
2617
  return new RequestAbortedError();
@@ -2566,9 +2641,12 @@ async function emitAttemptEvent(sink, event) {
2566
2641
  }
2567
2642
  function classifyAttemptError(err) {
2568
2643
  if (err instanceof CaptchaError) return "captcha";
2644
+ if (err instanceof LocationMismatchError) return "location_mismatch";
2569
2645
  if (err instanceof RequestAbortedError) return "request_aborted";
2570
2646
  if (err instanceof DOMException && (err.name === "TimeoutError" || err.name === "AbortError")) return "timeout";
2571
2647
  const message = err instanceof Error ? err.message : String(err);
2648
+ if (looksLikeProxyTunnelFailure(message)) return "proxy_tunnel_failed";
2649
+ if (looksLikeProxyUnavailable(message)) return "proxy_unavailable";
2572
2650
  return /timeout|timed out|Timeout \d+ms exceeded|deadline/i.test(message) ? "timeout" : "error";
2573
2651
  }
2574
2652
  function classifyAttemptResult(result) {
@@ -2577,6 +2655,49 @@ function classifyAttemptResult(result) {
2577
2655
  function errorMessage(err) {
2578
2656
  return err instanceof Error ? err.message : String(err);
2579
2657
  }
2658
+ function maxAttemptsForProxyMode(proxyMode) {
2659
+ return proxyMode === "location" ? LOCATION_PROXY_MAX_ATTEMPTS : MAX_ATTEMPTS;
2660
+ }
2661
+ function looksLikeProxyTunnelFailure(message) {
2662
+ return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
2663
+ }
2664
+ function looksLikeProxyUnavailable(message) {
2665
+ return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
2666
+ }
2667
+ function retryableLocationProxyError(outcome) {
2668
+ return outcome === "captcha" || outcome === "proxy_tunnel_failed" || outcome === "proxy_unavailable";
2669
+ }
2670
+ function locationMismatchMessage(result) {
2671
+ const evidence = result.diagnostics.debug?.locationEvidence;
2672
+ const expected = evidence?.expected?.canonicalLocation ?? result.location ?? "requested location";
2673
+ const candidates = evidence?.candidates.slice(0, 3).map((candidate) => `${candidate.city}, ${candidate.regionCode}`).join("; ");
2674
+ return candidates ? `Google returned results for ${candidates}, not ${expected}` : `Google returned results for a different location than ${expected}`;
2675
+ }
2676
+ function shouldRetryLocationMismatch(result, proxyMode) {
2677
+ return proxyMode === "location" && result.diagnostics.debug?.locationEvidence?.status === "mismatch";
2678
+ }
2679
+ function stripInternalDebug(result, keepDebug) {
2680
+ if (keepDebug || !result.diagnostics.debug) return result;
2681
+ const diagnostics = { ...result.diagnostics };
2682
+ delete diagnostics.debug;
2683
+ return { ...result, diagnostics };
2684
+ }
2685
+ async function cleanupDisposableProxy(kernelApiKey, proxyId) {
2686
+ if (!kernelApiKey || !proxyId) return;
2687
+ try {
2688
+ await deleteKernelProxyId(kernelApiKey, proxyId);
2689
+ console.info(JSON.stringify({
2690
+ event: "kernel_proxy_deleted",
2691
+ proxy_id_suffix: proxyId.slice(-6)
2692
+ }));
2693
+ } catch (err) {
2694
+ console.warn(JSON.stringify({
2695
+ event: "kernel_proxy_delete_failed",
2696
+ proxy_id_suffix: proxyId.slice(-6),
2697
+ message: errorMessage(err)
2698
+ }));
2699
+ }
2700
+ }
2580
2701
  async function extractOnce(options, signal) {
2581
2702
  const driver = new BrowserDriver();
2582
2703
  const reporter = new ProgressReporter();
@@ -2644,26 +2765,35 @@ async function harvest(rawOptions) {
2644
2765
  proxyZip: typeof raw.proxyZip === "string" ? raw.proxyZip : void 0,
2645
2766
  gl: typeof raw.gl === "string" ? raw.gl : "us"
2646
2767
  };
2768
+ const requestedDebug = typeof raw.debug === "boolean" ? raw.debug : false;
2769
+ const needsLocationEvidence = proxyMode === "location" && Boolean(proxyOpts.location);
2770
+ const maxAttempts = maxAttemptsForProxyMode(proxyMode);
2647
2771
  const serializer = new OutputSerializer();
2648
- for (let i = 0; i < MAX_ATTEMPTS; i++) {
2772
+ let lastError = null;
2773
+ for (let i = 0; i < maxAttempts; i++) {
2649
2774
  const attemptNumber = i + 1;
2650
2775
  const startedAtMs = Date.now();
2651
2776
  try {
2652
2777
  if (signal?.aborted) throw abortReason(signal);
2653
- const resolution2 = await resolveKernelProxyId({ ...proxyOpts, attemptIndex: i });
2778
+ const resolution2 = await resolveKernelProxyId({
2779
+ ...proxyOpts,
2780
+ attemptIndex: i,
2781
+ fresh: proxyMode === "location"
2782
+ });
2654
2783
  const mergedAttempt = {
2655
2784
  ...raw,
2656
2785
  kernelApiKey,
2657
2786
  kernelProxyId: resolution2.kernelProxyId,
2658
2787
  kernelProxyResolution: resolution2.resolution,
2659
- proxyMode
2788
+ proxyMode,
2789
+ debug: requestedDebug || needsLocationEvidence
2660
2790
  };
2661
2791
  if (proxyMode === "none") mergedAttempt.kernelProxyId = void 0;
2662
2792
  const attemptOptions = HarvestOptionsSchema.parse(mergedAttempt);
2663
2793
  await emitAttemptEvent(onAttemptEvent, {
2664
2794
  type: "started",
2665
2795
  attemptNumber,
2666
- maxAttempts: MAX_ATTEMPTS,
2796
+ maxAttempts,
2667
2797
  query: attemptOptions.query,
2668
2798
  location: attemptOptions.location ?? null,
2669
2799
  maxQuestions: attemptOptions.maxQuestions,
@@ -2672,7 +2802,7 @@ async function harvest(rawOptions) {
2672
2802
  console.info(JSON.stringify({
2673
2803
  event: "harvest_attempt_started",
2674
2804
  attempt_number: attemptNumber,
2675
- max_attempts: MAX_ATTEMPTS,
2805
+ max_attempts: maxAttempts,
2676
2806
  query: attemptOptions.query,
2677
2807
  location: attemptOptions.location ?? null,
2678
2808
  max_questions: attemptOptions.maxQuestions
@@ -2680,57 +2810,84 @@ async function harvest(rawOptions) {
2680
2810
  const attempt = await extractOnce(attemptOptions, signal);
2681
2811
  if (attempt.error) {
2682
2812
  const err = attempt.error;
2683
- if (err instanceof CaptchaError) {
2684
- const willRetry = i < MAX_ATTEMPTS - 1;
2813
+ const outcome = classifyAttemptError(err);
2814
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2815
+ if (outcome === "captcha") {
2685
2816
  console.warn(JSON.stringify({
2686
2817
  event: "harvest_attempt_captcha",
2687
2818
  attempt_number: attemptNumber,
2688
- max_attempts: MAX_ATTEMPTS,
2689
- message: err.message,
2819
+ max_attempts: maxAttempts,
2820
+ message: errorMessage(err),
2821
+ will_retry: willRetry
2822
+ }));
2823
+ } else if (willRetry) {
2824
+ console.warn(JSON.stringify({
2825
+ event: "harvest_attempt_proxy_retry",
2826
+ attempt_number: attemptNumber,
2827
+ max_attempts: maxAttempts,
2828
+ outcome,
2829
+ message: errorMessage(err),
2690
2830
  will_retry: willRetry
2691
2831
  }));
2692
- await emitAttemptEvent(onAttemptEvent, {
2693
- type: "finished",
2694
- attemptNumber,
2695
- maxAttempts: MAX_ATTEMPTS,
2696
- outcome: "captcha",
2697
- kernelSessionId: attempt.cleanup.kernelSessionId,
2698
- questionCount: 0,
2699
- durationMs: Date.now() - startedAtMs,
2700
- error: err.message,
2701
- willRetry,
2702
- cleanup: attempt.cleanup,
2703
- debug: attempt.debug,
2704
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2705
- });
2706
- if (willRetry) continue;
2707
- break;
2708
2832
  }
2709
2833
  await emitAttemptEvent(onAttemptEvent, {
2710
2834
  type: "finished",
2711
2835
  attemptNumber,
2712
- maxAttempts: MAX_ATTEMPTS,
2713
- outcome: classifyAttemptError(err),
2836
+ maxAttempts,
2837
+ outcome,
2714
2838
  kernelSessionId: attempt.cleanup.kernelSessionId,
2715
2839
  questionCount: 0,
2716
2840
  durationMs: Date.now() - startedAtMs,
2717
2841
  error: errorMessage(err),
2718
- willRetry: false,
2842
+ willRetry,
2719
2843
  cleanup: attempt.cleanup,
2720
2844
  debug: attempt.debug,
2721
2845
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2722
2846
  });
2723
- throw err;
2847
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2848
+ lastError = err;
2849
+ if (willRetry) continue;
2850
+ break;
2724
2851
  }
2725
2852
  const result = attempt.result;
2726
2853
  if (!result) throw new Error("Harvest attempt completed without a result");
2854
+ if (shouldRetryLocationMismatch(result, proxyMode)) {
2855
+ const err = new LocationMismatchError(locationMismatchMessage(result));
2856
+ const willRetry = i < maxAttempts - 1;
2857
+ console.warn(JSON.stringify({
2858
+ event: "harvest_attempt_location_mismatch",
2859
+ attempt_number: attemptNumber,
2860
+ max_attempts: maxAttempts,
2861
+ message: err.message,
2862
+ will_retry: willRetry
2863
+ }));
2864
+ await emitAttemptEvent(onAttemptEvent, {
2865
+ type: "finished",
2866
+ attemptNumber,
2867
+ maxAttempts,
2868
+ outcome: "location_mismatch",
2869
+ kernelSessionId: attempt.cleanup.kernelSessionId,
2870
+ questionCount: result.totalQuestions,
2871
+ durationMs: Date.now() - startedAtMs,
2872
+ error: err.message,
2873
+ willRetry,
2874
+ cleanup: attempt.cleanup,
2875
+ debug: attempt.debug,
2876
+ completedAt: (/* @__PURE__ */ new Date()).toISOString()
2877
+ });
2878
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2879
+ lastError = err;
2880
+ if (willRetry) continue;
2881
+ break;
2882
+ }
2883
+ const finalResult = stripInternalDebug(result, requestedDebug);
2727
2884
  await emitAttemptEvent(onAttemptEvent, {
2728
2885
  type: "finished",
2729
2886
  attemptNumber,
2730
- maxAttempts: MAX_ATTEMPTS,
2731
- outcome: classifyAttemptResult(result),
2887
+ maxAttempts,
2888
+ outcome: classifyAttemptResult(finalResult),
2732
2889
  kernelSessionId: attempt.cleanup.kernelSessionId,
2733
- questionCount: result.totalQuestions,
2890
+ questionCount: finalResult.totalQuestions,
2734
2891
  durationMs: Date.now() - startedAtMs,
2735
2892
  error: null,
2736
2893
  willRetry: false,
@@ -2738,64 +2895,52 @@ async function harvest(rawOptions) {
2738
2895
  debug: attempt.debug,
2739
2896
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2740
2897
  });
2898
+ await cleanupDisposableProxy(kernelApiKey, resolution2.disposableProxyId);
2741
2899
  if (attemptOptions.format === "json" || attemptOptions.format === "both") {
2742
- await serializer.writeJSON(result, attemptOptions.outputDir);
2900
+ await serializer.writeJSON(finalResult, attemptOptions.outputDir);
2743
2901
  }
2744
2902
  if (attemptOptions.format === "csv" || attemptOptions.format === "both") {
2745
2903
  await Promise.all([
2746
- serializer.writeCSV(result.flat, attemptOptions.outputDir),
2747
- result.videos.length > 0 ? serializer.writeVideoCSV(result.videos, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2748
- result.forums.length > 0 ? serializer.writeForumCSV(result.forums, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2749
- result.aiOverview.detected ? serializer.writeAIOverviewCSV(result.aiOverview.citations, result.aiOverview.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2750
- result.aiMode.detected ? serializer.writeAIModeCSV(result.aiMode.citations, result.aiMode.text, result.seed, attemptOptions.outputDir) : Promise.resolve(""),
2751
- result.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(result.whatPeopleSaying, result.seed, attemptOptions.outputDir) : Promise.resolve("")
2904
+ serializer.writeCSV(finalResult.flat, attemptOptions.outputDir),
2905
+ finalResult.videos.length > 0 ? serializer.writeVideoCSV(finalResult.videos, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2906
+ finalResult.forums.length > 0 ? serializer.writeForumCSV(finalResult.forums, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2907
+ finalResult.aiOverview.detected ? serializer.writeAIOverviewCSV(finalResult.aiOverview.citations, finalResult.aiOverview.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2908
+ finalResult.aiMode.detected ? serializer.writeAIModeCSV(finalResult.aiMode.citations, finalResult.aiMode.text, finalResult.seed, attemptOptions.outputDir) : Promise.resolve(""),
2909
+ finalResult.whatPeopleSaying.length > 0 ? serializer.writeWhatPeopleSayingCSV(finalResult.whatPeopleSaying, finalResult.seed, attemptOptions.outputDir) : Promise.resolve("")
2752
2910
  ]);
2753
2911
  }
2754
- return result;
2912
+ return finalResult;
2755
2913
  } catch (err) {
2756
- if (err instanceof CaptchaError) {
2757
- const willRetry = i < MAX_ATTEMPTS - 1;
2914
+ const outcome = classifyAttemptError(err);
2915
+ const willRetry = i < maxAttempts - 1 && (outcome === "captcha" || proxyMode === "location" && retryableLocationProxyError(outcome));
2916
+ if (outcome === "captcha") {
2758
2917
  console.warn(JSON.stringify({
2759
2918
  event: "harvest_attempt_captcha",
2760
2919
  attempt_number: attemptNumber,
2761
- max_attempts: MAX_ATTEMPTS,
2762
- message: err.message,
2920
+ max_attempts: maxAttempts,
2921
+ message: errorMessage(err),
2922
+ will_retry: willRetry
2923
+ }));
2924
+ } else if (willRetry) {
2925
+ console.warn(JSON.stringify({
2926
+ event: "harvest_attempt_proxy_retry",
2927
+ attempt_number: attemptNumber,
2928
+ max_attempts: maxAttempts,
2929
+ outcome,
2930
+ message: errorMessage(err),
2763
2931
  will_retry: willRetry
2764
2932
  }));
2765
- await emitAttemptEvent(onAttemptEvent, {
2766
- type: "finished",
2767
- attemptNumber,
2768
- maxAttempts: MAX_ATTEMPTS,
2769
- outcome: "captcha",
2770
- kernelSessionId: null,
2771
- questionCount: 0,
2772
- durationMs: Date.now() - startedAtMs,
2773
- error: err.message,
2774
- willRetry,
2775
- cleanup: {
2776
- kernelSessionId: null,
2777
- kernelDeleteStarted: false,
2778
- kernelDeleteSucceeded: null,
2779
- kernelDeleteError: null,
2780
- browserCloseSucceeded: null,
2781
- browserCloseError: null
2782
- },
2783
- debug: null,
2784
- completedAt: (/* @__PURE__ */ new Date()).toISOString()
2785
- });
2786
- if (willRetry) continue;
2787
- break;
2788
2933
  }
2789
2934
  await emitAttemptEvent(onAttemptEvent, {
2790
2935
  type: "finished",
2791
2936
  attemptNumber,
2792
- maxAttempts: MAX_ATTEMPTS,
2793
- outcome: classifyAttemptError(err),
2937
+ maxAttempts,
2938
+ outcome,
2794
2939
  kernelSessionId: null,
2795
2940
  questionCount: 0,
2796
2941
  durationMs: Date.now() - startedAtMs,
2797
2942
  error: errorMessage(err),
2798
- willRetry: false,
2943
+ willRetry,
2799
2944
  cleanup: {
2800
2945
  kernelSessionId: null,
2801
2946
  kernelDeleteStarted: false,
@@ -2807,15 +2952,19 @@ async function harvest(rawOptions) {
2807
2952
  debug: null,
2808
2953
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
2809
2954
  });
2955
+ lastError = err;
2956
+ if (willRetry) continue;
2957
+ if (outcome === "captcha") break;
2810
2958
  throw err;
2811
2959
  }
2812
2960
  }
2961
+ if (lastError && !(lastError instanceof CaptchaError)) throw lastError;
2813
2962
  console.warn(JSON.stringify({
2814
2963
  event: "harvest_captcha_exhausted",
2815
- max_attempts: MAX_ATTEMPTS,
2964
+ max_attempts: maxAttempts,
2816
2965
  session_kind: kernelApiKey ? "kernel" : "local"
2817
2966
  }));
2818
- throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${MAX_ATTEMPTS} fresh sessions. Try again in a few minutes.`));
2967
+ throw new CaptchaError(sanitizeVendorName(`CAPTCHA on all ${maxAttempts} fresh sessions. Try again in a few minutes.`));
2819
2968
  }
2820
2969
 
2821
2970
  // src/video/VideoGenerator.ts