webpeel 0.21.38 → 0.21.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/core/pipeline.js
CHANGED
|
@@ -307,7 +307,9 @@ export async function fetchContent(ctx) {
|
|
|
307
307
|
}
|
|
308
308
|
catch (e) {
|
|
309
309
|
// Domain API failed — fall through to normal fetch
|
|
310
|
-
|
|
310
|
+
const errMsg = e instanceof Error ? e.message : String(e);
|
|
311
|
+
log.warn('domain API first-pass failed, falling back to fetch:', errMsg);
|
|
312
|
+
ctx.warnings.push(`Domain API extraction failed: ${errMsg}`);
|
|
311
313
|
}
|
|
312
314
|
}
|
|
313
315
|
ctx.timer.mark('fetch');
|
|
@@ -959,7 +961,9 @@ export async function postProcess(ctx) {
|
|
|
959
961
|
}
|
|
960
962
|
catch (e) {
|
|
961
963
|
// Domain extraction failure is non-fatal; continue with normal content
|
|
962
|
-
|
|
964
|
+
const errMsg2 = e instanceof Error ? e.message : String(e);
|
|
965
|
+
log.warn('domain extraction (second pass) failed:', errMsg2);
|
|
966
|
+
ctx.warnings.push(`Domain extraction (second pass) failed: ${errMsg2}`);
|
|
963
967
|
}
|
|
964
968
|
}
|
|
965
969
|
// === Challenge / bot-protection page detection ===
|
|
@@ -1061,7 +1061,7 @@ export class DuckDuckGoProvider {
|
|
|
1061
1061
|
const searxResults = await searchViaSearXNG(query, {
|
|
1062
1062
|
count: options.count ?? 10,
|
|
1063
1063
|
signal: options.signal,
|
|
1064
|
-
timeoutMs:
|
|
1064
|
+
timeoutMs: 12000,
|
|
1065
1065
|
});
|
|
1066
1066
|
if (searxResults.length > 0) {
|
|
1067
1067
|
providerStats.record('searxng', true);
|
|
@@ -21,7 +21,7 @@ export async function searchViaSearXNG(query, options = {}) {
|
|
|
21
21
|
const baseUrl = process.env.SEARXNG_URL;
|
|
22
22
|
if (!baseUrl)
|
|
23
23
|
return [];
|
|
24
|
-
const { count = 10, signal, timeoutMs =
|
|
24
|
+
const { count = 10, signal, timeoutMs = 15000, engines = '', language = 'en', } = options;
|
|
25
25
|
const controller = new AbortController();
|
|
26
26
|
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
27
27
|
if (signal)
|
|
@@ -95,7 +95,7 @@ export async function searchViaSearXNG(query, options = {}) {
|
|
|
95
95
|
*/
|
|
96
96
|
export async function isSearXNGHealthy() {
|
|
97
97
|
try {
|
|
98
|
-
const results = await searchViaSearXNG('test', { count: 1, timeoutMs:
|
|
98
|
+
const results = await searchViaSearXNG('test', { count: 1, timeoutMs: 10000 });
|
|
99
99
|
return results.length > 0;
|
|
100
100
|
}
|
|
101
101
|
catch {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.40",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|