webpeel 0.21.29 → 0.21.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,18 +44,21 @@ declare class ProviderStatsTracker {
44
44
  private readonly windowSize;
45
45
  private readonly failThreshold;
46
46
  private readonly minSamples;
47
- constructor(windowSize?: number, failThreshold?: number, minSamples?: number);
47
+ private readonly decayMs;
48
+ constructor(windowSize?: number, failThreshold?: number, minSamples?: number, decayMs?: number);
48
49
  /** Record the outcome of a single attempt for the given source. */
49
50
  record(sourceId: string, success: boolean): void;
50
51
  /**
51
52
  * Returns the failure rate (0–1) for the given source based on
52
53
  * the sliding window of recorded attempts. Returns 0 if fewer
53
- * than minSamples have been recorded.
54
+ * than minSamples have been recorded, or if all samples are older
55
+ * than decayMs (failures expire so cold-start blips don't permanently
56
+ * lock out a provider).
54
57
  */
55
58
  getFailureRate(sourceId: string): number;
56
59
  /**
57
60
  * Returns true when the source should be skipped (failure rate >=
58
- * failThreshold with at least minSamples recorded).
61
+ * failThreshold with at least minSamples recent recorded).
59
62
  */
60
63
  shouldSkip(sourceId: string): boolean;
61
64
  /** Debug snapshot for a source. */
@@ -114,15 +114,17 @@ class ProviderStatsTracker {
114
114
  windowSize;
115
115
  failThreshold;
116
116
  minSamples;
117
- constructor(windowSize = 10, failThreshold = 0.8, minSamples = 3) {
117
+ decayMs; // failures older than this are ignored
118
+ constructor(windowSize = 10, failThreshold = 0.8, minSamples = 5, decayMs = 5 * 60 * 1000) {
118
119
  this.windowSize = windowSize;
119
120
  this.failThreshold = failThreshold;
120
121
  this.minSamples = minSamples;
122
+ this.decayMs = decayMs; // default 5 minutes: old failures don't permanently lock a provider
121
123
  }
122
124
  /** Record the outcome of a single attempt for the given source. */
123
125
  record(sourceId, success) {
124
126
  const arr = this.history.get(sourceId) ?? [];
125
- arr.push({ success });
127
+ arr.push({ success, ts: Date.now() });
126
128
  if (arr.length > this.windowSize)
127
129
  arr.splice(0, arr.length - this.windowSize);
128
130
  this.history.set(sourceId, arr);
@@ -130,18 +132,24 @@ class ProviderStatsTracker {
130
132
  /**
131
133
  * Returns the failure rate (0–1) for the given source based on
132
134
  * the sliding window of recorded attempts. Returns 0 if fewer
133
- * than minSamples have been recorded.
135
+ * than minSamples have been recorded, or if all samples are older
136
+ * than decayMs (failures expire so cold-start blips don't permanently
137
+ * lock out a provider).
134
138
  */
135
139
  getFailureRate(sourceId) {
136
140
  const arr = this.history.get(sourceId);
137
141
  if (!arr || arr.length < this.minSamples)
138
142
  return 0;
139
- const failures = arr.filter(a => !a.success).length;
140
- return failures / arr.length;
143
+ const cutoff = Date.now() - this.decayMs;
144
+ const recent = arr.filter(a => a.ts >= cutoff);
145
+ if (recent.length < this.minSamples)
146
+ return 0; // not enough recent samples
147
+ const failures = recent.filter(a => !a.success).length;
148
+ return failures / recent.length;
141
149
  }
142
150
  /**
143
151
  * Returns true when the source should be skipped (failure rate >=
144
- * failThreshold with at least minSamples recorded).
152
+ * failThreshold with at least minSamples recent recorded).
145
153
  */
146
154
  shouldSkip(sourceId) {
147
155
  return this.getFailureRate(sourceId) >= this.failThreshold;
@@ -676,25 +684,37 @@ export class DuckDuckGoProvider {
676
684
  'Upgrade-Insecure-Requests': '1',
677
685
  'Referer': 'https://duckduckgo.com/',
678
686
  };
679
- // Try with proxy first (bypasses datacenter IP blocks), fall back to direct
687
+ // Try direct first, then proxy as fallback.
688
+ // Webshare backbone IPs are blocked by DDG (returns empty results).
689
+ // Render datacenter IPs work intermittently — direct has better odds.
680
690
  let response;
681
- if (proxyUrl) {
691
+ let html;
692
+ // let usedProxy = false;
693
+ // Attempt 1: Direct fetch (no proxy)
694
+ try {
695
+ response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
696
+ html = response.ok ? await response.text() : '';
697
+ }
698
+ catch (directErr) {
699
+ log.debug('DDG direct fetch failed:', directErr instanceof Error ? directErr.message : directErr);
700
+ html = '';
701
+ }
702
+ // Check if direct returned actual results (not empty/CAPTCHA)
703
+ const hasResults = html.includes('class="result"') || html.includes('class="result ');
704
+ if (!hasResults && proxyUrl) {
705
+ // Attempt 2: Proxy fallback
706
+ log.debug('DDG direct returned no results, trying proxy...');
682
707
  try {
708
+ // usedProxy = true;
683
709
  const dispatcher = new ProxyAgent(proxyUrl);
684
710
  response = await undiciFetch(searchUrl, { headers: baseHeaders, signal, dispatcher });
711
+ if (response.ok)
712
+ html = await response.text();
685
713
  }
686
714
  catch (proxyErr) {
687
- log.debug('DDG proxy fetch failed, falling back to direct:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
688
- response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
715
+ log.debug('DDG proxy also failed:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
689
716
  }
690
717
  }
691
- else {
692
- response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
693
- }
694
- if (!response.ok) {
695
- throw new Error(`Search failed: HTTP ${response.status}`);
696
- }
697
- const html = await response.text();
698
718
  const $ = load(html);
699
719
  const results = [];
700
720
  const seen = new Set();
@@ -758,22 +778,25 @@ export class DuckDuckGoProvider {
758
778
  'Referer': 'https://lite.duckduckgo.com/',
759
779
  };
760
780
  const liteUrl = `https://lite.duckduckgo.com/lite/?${params.toString()}`;
761
- let response;
762
- if (liteProxyUrl) {
781
+ // Direct first, proxy fallback (same reasoning as searchOnce — Webshare IPs blocked by DDG)
782
+ let html = '';
783
+ try {
784
+ const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
785
+ if (resp.ok)
786
+ html = await resp.text();
787
+ }
788
+ catch { /* direct failed */ }
789
+ if (!html.includes('result-link') && liteProxyUrl) {
763
790
  try {
764
791
  const dispatcher = new ProxyAgent(liteProxyUrl);
765
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
792
+ const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
793
+ if (resp.ok)
794
+ html = await resp.text();
766
795
  }
767
- catch {
768
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
769
- }
770
- }
771
- else {
772
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
796
+ catch { /* proxy also failed */ }
773
797
  }
774
- if (!response.ok)
798
+ if (!html)
775
799
  return [];
776
- const html = await response.text();
777
800
  const $ = load(html);
778
801
  const results = [];
779
802
  const seen = new Set();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.29",
3
+ "version": "0.21.31",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",