webpeel 0.21.39 → 0.21.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1061,7 +1061,7 @@ export class DuckDuckGoProvider {
1061
1061
  const searxResults = await searchViaSearXNG(query, {
1062
1062
  count: options.count ?? 10,
1063
1063
  signal: options.signal,
1064
- timeoutMs: 6000,
1064
+ timeoutMs: 12000,
1065
1065
  });
1066
1066
  if (searxResults.length > 0) {
1067
1067
  providerStats.record('searxng', true);
@@ -21,7 +21,7 @@ export async function searchViaSearXNG(query, options = {}) {
21
21
  const baseUrl = process.env.SEARXNG_URL;
22
22
  if (!baseUrl)
23
23
  return [];
24
- const { count = 10, signal, timeoutMs = 8000, engines = '', language = 'en', } = options;
24
+ const { count = 10, signal, timeoutMs = 15000, engines = '', language = 'en', } = options;
25
25
  const controller = new AbortController();
26
26
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
27
27
  if (signal)
@@ -95,7 +95,7 @@ export async function searchViaSearXNG(query, options = {}) {
95
95
  */
96
96
  export async function isSearXNGHealthy() {
97
97
  try {
98
- const results = await searchViaSearXNG('test', { count: 1, timeoutMs: 5000 });
98
+ const results = await searchViaSearXNG('test', { count: 1, timeoutMs: 10000 });
99
99
  return results.length > 0;
100
100
  }
101
101
  catch {
@@ -9,7 +9,8 @@
9
9
  * Body: ResearchRequest
10
10
  */
11
11
  import { Router } from 'express';
12
- import { peel } from '../../index.js';
12
+ import { simpleFetch } from '../../core/fetcher.js';
13
+ import { load as cheerioLoad } from 'cheerio';
13
14
  import { getSearchProvider } from '../../core/search-provider.js';
14
15
  import { callLLM, } from '../../core/llm-provider.js';
15
16
  import { sanitizeForLLM, hardenSystemPrompt, validateOutput } from '../../core/prompt-guard.js';
@@ -295,21 +296,21 @@ export function createResearchRouter() {
295
296
  break;
296
297
  const fetchStart = Date.now();
297
298
  try {
298
- const result = await Promise.race([
299
- peel(url, {
300
- format: 'markdown',
301
- noEscalate: true, // NEVER launch browser — 512MB container
302
- timeout: urlTimeout,
303
- readable: true,
304
- budget: 3000,
305
- }),
299
+ // Use simpleFetch + cheerio (no peel/pipeline) — keeps memory under 512MB
300
+ const fetchResult = await Promise.race([
301
+ simpleFetch(url, undefined, urlTimeout),
306
302
  new Promise((_, reject) => setTimeout(() => reject(new Error('per-url timeout')), urlTimeout)),
307
303
  ]);
308
304
  const fetchTime = Date.now() - fetchStart;
309
- const content = result.content || '';
305
+ // Extract clean text via cheerio (no Readability.js, no markdown pipeline)
306
+ const $ = cheerioLoad(fetchResult.html || '');
307
+ $('script,style,nav,footer,header,aside,noscript,[aria-hidden]').remove();
308
+ const pageTitle = ($('title').text() || $('h1').first().text() || title).trim().slice(0, 200);
309
+ const rawText = $('main, article, [role=main], body').first().text()
310
+ .replace(/\s+/g, ' ').trim();
311
+ const content = rawText.slice(0, 4000); // ~3000 words max
310
312
  const wordCount = content.split(/\s+/).filter(Boolean).length;
311
- const pageTitle = result.title || title;
312
- // Build snippet: prefer LLM-extracted summary, else first 500 chars of content
313
+ // Build snippet: first 500 chars of content
313
314
  const sourceSnippet = content.slice(0, 500).replace(/\s+/g, ' ').trim();
314
315
  sources.push({
315
316
  url,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.39",
3
+ "version": "0.21.41",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",