webpeel 0.21.30 → 0.21.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -684,25 +684,37 @@ export class DuckDuckGoProvider {
684
684
  'Upgrade-Insecure-Requests': '1',
685
685
  'Referer': 'https://duckduckgo.com/',
686
686
  };
687
- // Try with proxy first (bypasses datacenter IP blocks), fall back to direct
687
+ // Try direct first, then proxy as fallback.
688
+ // Webshare backbone IPs are blocked by DDG (returns empty results).
689
+ // Render datacenter IPs work intermittently — direct has better odds.
688
690
  let response;
689
- if (proxyUrl) {
691
+ let html;
692
+ // let usedProxy = false;
693
+ // Attempt 1: Direct fetch (no proxy)
694
+ try {
695
+ response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
696
+ html = response.ok ? await response.text() : '';
697
+ }
698
+ catch (directErr) {
699
+ log.debug('DDG direct fetch failed:', directErr instanceof Error ? directErr.message : directErr);
700
+ html = '';
701
+ }
702
+ // Check if direct returned actual results (not empty/CAPTCHA)
703
+ const hasResults = html.includes('class="result"') || html.includes('class="result ');
704
+ if (!hasResults && proxyUrl) {
705
+ // Attempt 2: Proxy fallback
706
+ log.debug('DDG direct returned no results, trying proxy...');
690
707
  try {
708
+ // usedProxy = true;
691
709
  const dispatcher = new ProxyAgent(proxyUrl);
692
710
  response = await undiciFetch(searchUrl, { headers: baseHeaders, signal, dispatcher });
711
+ if (response.ok)
712
+ html = await response.text();
693
713
  }
694
714
  catch (proxyErr) {
695
- log.debug('DDG proxy fetch failed, falling back to direct:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
696
- response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
715
+ log.debug('DDG proxy also failed:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
697
716
  }
698
717
  }
699
- else {
700
- response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
701
- }
702
- if (!response.ok) {
703
- throw new Error(`Search failed: HTTP ${response.status}`);
704
- }
705
- const html = await response.text();
706
718
  const $ = load(html);
707
719
  const results = [];
708
720
  const seen = new Set();
@@ -766,22 +778,25 @@ export class DuckDuckGoProvider {
766
778
  'Referer': 'https://lite.duckduckgo.com/',
767
779
  };
768
780
  const liteUrl = `https://lite.duckduckgo.com/lite/?${params.toString()}`;
769
- let response;
770
- if (liteProxyUrl) {
781
+ // Direct first, proxy fallback (same reasoning as searchOnce — Webshare IPs blocked by DDG)
782
+ let html = '';
783
+ try {
784
+ const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
785
+ if (resp.ok)
786
+ html = await resp.text();
787
+ }
788
+ catch { /* direct failed */ }
789
+ if (!html.includes('result-link') && liteProxyUrl) {
771
790
  try {
772
791
  const dispatcher = new ProxyAgent(liteProxyUrl);
773
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
792
+ const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
793
+ if (resp.ok)
794
+ html = await resp.text();
774
795
  }
775
- catch {
776
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
777
- }
778
- }
779
- else {
780
- response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
796
+ catch { /* proxy also failed */ }
781
797
  }
782
- if (!response.ok)
798
+ if (!html)
783
799
  return [];
784
- const html = await response.text();
785
800
  const $ = load(html);
786
801
  const results = [];
787
802
  const seen = new Set();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.30",
3
+ "version": "0.21.31",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",