webpeel 0.21.30 → 0.21.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/search-provider.js +37 -22
- package/package.json +1 -1
|
@@ -684,25 +684,37 @@ export class DuckDuckGoProvider {
|
|
|
684
684
|
'Upgrade-Insecure-Requests': '1',
|
|
685
685
|
'Referer': 'https://duckduckgo.com/',
|
|
686
686
|
};
|
|
687
|
-
// Try
|
|
687
|
+
// Try direct first, then proxy as fallback.
|
|
688
|
+
// Webshare backbone IPs are blocked by DDG (returns empty results).
|
|
689
|
+
// Render datacenter IPs work intermittently — direct has better odds.
|
|
688
690
|
let response;
|
|
689
|
-
|
|
691
|
+
let html;
|
|
692
|
+
// let usedProxy = false;
|
|
693
|
+
// Attempt 1: Direct fetch (no proxy)
|
|
694
|
+
try {
|
|
695
|
+
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
|
|
696
|
+
html = response.ok ? await response.text() : '';
|
|
697
|
+
}
|
|
698
|
+
catch (directErr) {
|
|
699
|
+
log.debug('DDG direct fetch failed:', directErr instanceof Error ? directErr.message : directErr);
|
|
700
|
+
html = '';
|
|
701
|
+
}
|
|
702
|
+
// Check if direct returned actual results (not empty/CAPTCHA)
|
|
703
|
+
const hasResults = html.includes('class="result"') || html.includes('class="result ');
|
|
704
|
+
if (!hasResults && proxyUrl) {
|
|
705
|
+
// Attempt 2: Proxy fallback
|
|
706
|
+
log.debug('DDG direct returned no results, trying proxy...');
|
|
690
707
|
try {
|
|
708
|
+
// usedProxy = true;
|
|
691
709
|
const dispatcher = new ProxyAgent(proxyUrl);
|
|
692
710
|
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal, dispatcher });
|
|
711
|
+
if (response.ok)
|
|
712
|
+
html = await response.text();
|
|
693
713
|
}
|
|
694
714
|
catch (proxyErr) {
|
|
695
|
-
log.debug('DDG proxy
|
|
696
|
-
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
|
|
715
|
+
log.debug('DDG proxy also failed:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
|
|
697
716
|
}
|
|
698
717
|
}
|
|
699
|
-
else {
|
|
700
|
-
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
|
|
701
|
-
}
|
|
702
|
-
if (!response.ok) {
|
|
703
|
-
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
704
|
-
}
|
|
705
|
-
const html = await response.text();
|
|
706
718
|
const $ = load(html);
|
|
707
719
|
const results = [];
|
|
708
720
|
const seen = new Set();
|
|
@@ -766,22 +778,25 @@ export class DuckDuckGoProvider {
|
|
|
766
778
|
'Referer': 'https://lite.duckduckgo.com/',
|
|
767
779
|
};
|
|
768
780
|
const liteUrl = `https://lite.duckduckgo.com/lite/?${params.toString()}`;
|
|
769
|
-
|
|
770
|
-
|
|
781
|
+
// Direct first, proxy fallback (same reasoning as searchOnce — Webshare IPs blocked by DDG)
|
|
782
|
+
let html = '';
|
|
783
|
+
try {
|
|
784
|
+
const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
|
|
785
|
+
if (resp.ok)
|
|
786
|
+
html = await resp.text();
|
|
787
|
+
}
|
|
788
|
+
catch { /* direct failed */ }
|
|
789
|
+
if (!html.includes('result-link') && liteProxyUrl) {
|
|
771
790
|
try {
|
|
772
791
|
const dispatcher = new ProxyAgent(liteProxyUrl);
|
|
773
|
-
|
|
792
|
+
const resp = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
|
|
793
|
+
if (resp.ok)
|
|
794
|
+
html = await resp.text();
|
|
774
795
|
}
|
|
775
|
-
catch {
|
|
776
|
-
response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
else {
|
|
780
|
-
response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
|
|
796
|
+
catch { /* proxy also failed */ }
|
|
781
797
|
}
|
|
782
|
-
if (!
|
|
798
|
+
if (!html)
|
|
783
799
|
return [];
|
|
784
|
-
const html = await response.text();
|
|
785
800
|
const $ = load(html);
|
|
786
801
|
const results = [];
|
|
787
802
|
const seen = new Set();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.31",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|