webpeel 0.21.27 → 0.21.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/search-provider.js +42 -27
- package/package.json +1 -1
|
@@ -628,26 +628,34 @@ export class DuckDuckGoProvider {
|
|
|
628
628
|
// Use realistic browser headers to avoid DDG bot detection on datacenter IPs
|
|
629
629
|
// Route through residential proxy when available (datacenter IPs are blocked)
|
|
630
630
|
const proxyUrl = getWebshareProxyUrl();
|
|
631
|
-
const
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
'Referer': 'https://duckduckgo.com/',
|
|
644
|
-
},
|
|
645
|
-
signal,
|
|
631
|
+
const baseHeaders = {
|
|
632
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
633
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
634
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
635
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
636
|
+
'Cache-Control': 'no-cache',
|
|
637
|
+
'Sec-Fetch-Dest': 'document',
|
|
638
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
639
|
+
'Sec-Fetch-Site': 'none',
|
|
640
|
+
'Sec-Fetch-User': '?1',
|
|
641
|
+
'Upgrade-Insecure-Requests': '1',
|
|
642
|
+
'Referer': 'https://duckduckgo.com/',
|
|
646
643
|
};
|
|
644
|
+
// Try with proxy first (bypasses datacenter IP blocks), fall back to direct
|
|
645
|
+
let response;
|
|
647
646
|
if (proxyUrl) {
|
|
648
|
-
|
|
647
|
+
try {
|
|
648
|
+
const dispatcher = new ProxyAgent(proxyUrl);
|
|
649
|
+
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal, dispatcher });
|
|
650
|
+
}
|
|
651
|
+
catch (proxyErr) {
|
|
652
|
+
log.debug('DDG proxy fetch failed, falling back to direct:', proxyErr instanceof Error ? proxyErr.message : proxyErr);
|
|
653
|
+
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
else {
|
|
657
|
+
response = await undiciFetch(searchUrl, { headers: baseHeaders, signal });
|
|
649
658
|
}
|
|
650
|
-
const response = await undiciFetch(searchUrl, fetchOpts);
|
|
651
659
|
if (!response.ok) {
|
|
652
660
|
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
653
661
|
}
|
|
@@ -708,19 +716,26 @@ export class DuckDuckGoProvider {
|
|
|
708
716
|
const params = new URLSearchParams();
|
|
709
717
|
params.set('q', query);
|
|
710
718
|
const liteProxyUrl = getWebshareProxyUrl();
|
|
711
|
-
const
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
'Referer': 'https://lite.duckduckgo.com/',
|
|
717
|
-
},
|
|
718
|
-
signal,
|
|
719
|
+
const liteHeaders = {
|
|
720
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
721
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
722
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
723
|
+
'Referer': 'https://lite.duckduckgo.com/',
|
|
719
724
|
};
|
|
725
|
+
const liteUrl = `https://lite.duckduckgo.com/lite/?${params.toString()}`;
|
|
726
|
+
let response;
|
|
720
727
|
if (liteProxyUrl) {
|
|
721
|
-
|
|
728
|
+
try {
|
|
729
|
+
const dispatcher = new ProxyAgent(liteProxyUrl);
|
|
730
|
+
response = await undiciFetch(liteUrl, { headers: liteHeaders, signal, dispatcher });
|
|
731
|
+
}
|
|
732
|
+
catch {
|
|
733
|
+
response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
else {
|
|
737
|
+
response = await undiciFetch(liteUrl, { headers: liteHeaders, signal });
|
|
722
738
|
}
|
|
723
|
-
const response = await undiciFetch(`https://lite.duckduckgo.com/lite/?${params.toString()}`, liteFetchOpts);
|
|
724
739
|
if (!response.ok)
|
|
725
740
|
return [];
|
|
726
741
|
const html = await response.text();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.28",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|