webpeel 0.21.25 → 0.21.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/search-provider.js +17 -6
- package/package.json +1 -1
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
* In production with no API keys configured, getBestSearchProvider() returns
|
|
13
13
|
* StealthSearchProvider since DDG HTTP is often blocked on datacenter IPs.
|
|
14
14
|
*/
|
|
15
|
-
import { fetch as undiciFetch } from 'undici';
|
|
15
|
+
import { fetch as undiciFetch, ProxyAgent } from 'undici';
|
|
16
16
|
import { load } from 'cheerio';
|
|
17
17
|
import { getStealthBrowser, getRandomUserAgent, applyStealthScripts } from './browser-pool.js';
|
|
18
|
-
import { getWebshareProxy } from './proxy-config.js';
|
|
18
|
+
import { getWebshareProxy, getWebshareProxyUrl } from './proxy-config.js';
|
|
19
19
|
import { createLogger } from './logger.js';
|
|
20
20
|
const log = createLogger('search');
|
|
21
21
|
function decodeHtmlEntities(input) {
|
|
@@ -626,7 +626,9 @@ export class DuckDuckGoProvider {
|
|
|
626
626
|
const { count, signal } = options;
|
|
627
627
|
const searchUrl = this.buildSearchUrl(query, options);
|
|
628
628
|
// Use realistic browser headers to avoid DDG bot detection on datacenter IPs
|
|
629
|
-
|
|
629
|
+
// Route through residential proxy when available (datacenter IPs are blocked)
|
|
630
|
+
const proxyUrl = getWebshareProxyUrl();
|
|
631
|
+
const fetchOpts = {
|
|
630
632
|
headers: {
|
|
631
633
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
632
634
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
@@ -641,7 +643,11 @@ export class DuckDuckGoProvider {
|
|
|
641
643
|
'Referer': 'https://duckduckgo.com/',
|
|
642
644
|
},
|
|
643
645
|
signal,
|
|
644
|
-
}
|
|
646
|
+
};
|
|
647
|
+
if (proxyUrl) {
|
|
648
|
+
fetchOpts.dispatcher = new ProxyAgent(proxyUrl);
|
|
649
|
+
}
|
|
650
|
+
const response = await undiciFetch(searchUrl, fetchOpts);
|
|
645
651
|
if (!response.ok) {
|
|
646
652
|
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
647
653
|
}
|
|
@@ -701,7 +707,8 @@ export class DuckDuckGoProvider {
|
|
|
701
707
|
const { count, signal } = options;
|
|
702
708
|
const params = new URLSearchParams();
|
|
703
709
|
params.set('q', query);
|
|
704
|
-
const
|
|
710
|
+
const liteProxyUrl = getWebshareProxyUrl();
|
|
711
|
+
const liteFetchOpts = {
|
|
705
712
|
headers: {
|
|
706
713
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
707
714
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
@@ -709,7 +716,11 @@ export class DuckDuckGoProvider {
|
|
|
709
716
|
'Referer': 'https://lite.duckduckgo.com/',
|
|
710
717
|
},
|
|
711
718
|
signal,
|
|
712
|
-
}
|
|
719
|
+
};
|
|
720
|
+
if (liteProxyUrl) {
|
|
721
|
+
liteFetchOpts.dispatcher = new ProxyAgent(liteProxyUrl);
|
|
722
|
+
}
|
|
723
|
+
const response = await undiciFetch(`https://lite.duckduckgo.com/lite/?${params.toString()}`, liteFetchOpts);
|
|
713
724
|
if (!response.ok)
|
|
714
725
|
return [];
|
|
715
726
|
const html = await response.text();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.26",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|