webpeel 0.21.35 → 0.21.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -153,7 +153,12 @@ async function fetchJson(url, customHeaders) {
153
153
  Accept: 'application/json',
154
154
  ...customHeaders,
155
155
  });
156
- return tryParseJson(result.html);
156
+ const parsed = tryParseJson(result.html);
157
+ if (parsed === null && result.html.length > 0) {
158
+ // Log when we get non-JSON back (likely an HTML error page)
159
+ console.warn(`[webpeel:fetchJson] Non-JSON response from ${url} (${result.html.length} bytes, status: ${result.statusCode}): ${result.html.slice(0, 120)}`);
160
+ }
161
+ return parsed;
157
162
  }
158
163
  /** Fetch JSON with exponential backoff retry on 429 / rate-limit errors. */
159
164
  async function fetchJsonWithRetry(url, headers, retries = 2, baseDelayMs = 1000) {
@@ -910,11 +915,17 @@ ${commentsMd || '*No comments.*'}`;
910
915
  if (pathParts.length >= 2) {
911
916
  // Sequential fetches to avoid secondary rate limits on popular repos
912
917
  const repoData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}`, ghHeaders, 2, 1000);
913
- if (!repoData || repoData.message === 'Not Found')
914
- return null;
915
- // Secondary rate limit check
916
- if (repoData.message?.includes('secondary rate limit') || repoData.message?.includes('abuse'))
918
+ if (!repoData) {
919
+ console.warn(`[webpeel:github] repo API returned null for ${owner}/${repo}`);
917
920
  return null;
921
+ }
922
+ if (repoData.message) {
923
+ console.warn(`[webpeel:github] repo API error for ${owner}/${repo}: ${repoData.message}`);
924
+ if (repoData.message === 'Not Found')
925
+ return null;
926
+ if (repoData.message.includes('secondary rate limit') || repoData.message.includes('abuse'))
927
+ return null;
928
+ }
918
929
  const readmeData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders, 1, 500).catch(() => null);
919
930
  // README content is base64 encoded
920
931
  let readmeText = '';
@@ -391,13 +391,52 @@ async function callOllama(config, options) {
391
391
  const endpoint = (config.endpoint || process.env.OLLAMA_URL || 'http://localhost:11434').replace(/\/$/, '');
392
392
  const model = config.model || process.env.OLLAMA_MODEL || defaultModel('ollama');
393
393
  const { messages, stream, onChunk, signal, maxTokens = 4096, temperature = 0.2 } = options;
394
- const url = `${endpoint}/v1/chat/completions`;
395
394
  // Support bearer token auth (for nginx reverse proxy on Hetzner)
396
395
  const headers = { 'Content-Type': 'application/json' };
397
396
  const secret = config.apiKey || process.env.OLLAMA_SECRET;
398
397
  if (secret)
399
398
  headers['Authorization'] = `Bearer ${secret}`;
400
- const resp = await fetch(url, {
399
+ // ── Non-streaming: use /api/generate with think:false for speed ──────
400
+ // Qwen3 thinking mode wastes 300-400 tokens on CoT and takes 25s+.
401
+ // With think:false via /api/generate, response comes in ~8s.
402
+ if (!stream) {
403
+ // Build a single prompt from messages (system + user)
404
+ const systemMsg = messages.find((m) => m.role === 'system')?.content || '';
405
+ const userMsg = messages.filter((m) => m.role === 'user').map((m) => m.content).join('\n\n');
406
+ const prompt = systemMsg ? `${systemMsg}\n\n${userMsg}` : userMsg;
407
+ const resp = await fetch(`${endpoint}/api/generate`, {
408
+ method: 'POST',
409
+ headers,
410
+ body: JSON.stringify({
411
+ model,
412
+ prompt,
413
+ stream: false,
414
+ think: false, // Critical: disables Qwen3 CoT thinking (8s vs 25s+)
415
+ options: {
416
+ temperature,
417
+ num_predict: maxTokens,
418
+ },
419
+ }),
420
+ signal,
421
+ });
422
+ if (!resp.ok) {
423
+ const text = await resp.text().catch(() => '');
424
+ throw new Error(`Ollama API error: HTTP ${resp.status}${text ? ` - ${text}` : ''}`);
425
+ }
426
+ const json = await resp.json();
427
+ let text = String(json?.response || '').trim();
428
+ // Strip any residual <think> tags
429
+ text = text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
430
+ return {
431
+ text,
432
+ usage: {
433
+ input: Number(json?.prompt_eval_count || 0),
434
+ output: Number(json?.eval_count || 0),
435
+ },
436
+ };
437
+ }
438
+ // ── Streaming: use OpenAI-compatible /v1/chat/completions ────────────
439
+ const resp = await fetch(`${endpoint}/v1/chat/completions`, {
401
440
  method: 'POST',
402
441
  headers,
403
442
  body: JSON.stringify({
@@ -405,7 +444,7 @@ async function callOllama(config, options) {
405
444
  messages,
406
445
  temperature,
407
446
  max_tokens: maxTokens,
408
- stream: stream ?? false,
447
+ stream: true,
409
448
  }),
410
449
  signal,
411
450
  });
@@ -413,23 +452,6 @@ async function callOllama(config, options) {
413
452
  const text = await resp.text().catch(() => '');
414
453
  throw new Error(`Ollama API error: HTTP ${resp.status}${text ? ` - ${text}` : ''}`);
415
454
  }
416
- if (!stream) {
417
- const json = await resp.json();
418
- const msg = json?.choices?.[0]?.message;
419
- // Ollama Qwen3 thinking: content may be empty, CoT goes to `reasoning` field
420
- let text = String(msg?.content || '').trim();
421
- if (!text && msg?.reasoning)
422
- text = String(msg.reasoning).trim();
423
- // Strip <think> tags from Qwen3 models
424
- text = text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
425
- return {
426
- text,
427
- usage: {
428
- input: Number(json?.usage?.prompt_tokens || 0),
429
- output: Number(json?.usage?.completion_tokens || 0),
430
- },
431
- };
432
- }
433
455
  if (!resp.body)
434
456
  throw new Error('Ollama stream: missing body');
435
457
  let out = '';
@@ -449,7 +471,9 @@ async function callOllama(config, options) {
449
471
  onChunk?.(delta);
450
472
  }
451
473
  }, signal);
452
- return { text: out.trim(), usage: { input: 0, output: 0 } };
474
+ // Strip thinking from streamed output
475
+ out = out.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
476
+ return { text: out, usage: { input: 0, output: 0 } };
453
477
  }
454
478
  // ---------------------------------------------------------------------------
455
479
  // Cerebras (OpenAI-compatible)
@@ -307,7 +307,7 @@ export async function fetchContent(ctx) {
307
307
  }
308
308
  catch (e) {
309
309
  // Domain API failed — fall through to normal fetch
310
- log.debug('domain API first-pass failed, falling back to fetch:', e instanceof Error ? e.message : e);
310
+ log.warn('domain API first-pass failed, falling back to fetch:', e instanceof Error ? e.message : e);
311
311
  }
312
312
  }
313
313
  ctx.timer.mark('fetch');
@@ -959,7 +959,7 @@ export async function postProcess(ctx) {
959
959
  }
960
960
  catch (e) {
961
961
  // Domain extraction failure is non-fatal; continue with normal content
962
- log.debug('domain extraction failed:', e instanceof Error ? e.message : e);
962
+ log.warn('domain extraction (second pass) failed:', e instanceof Error ? e.message : e);
963
963
  }
964
964
  }
965
965
  // === Challenge / bot-protection page detection ===
@@ -153,7 +153,7 @@ const VALID_LLM_PROVIDERS = [
153
153
  'cloudflare',
154
154
  ];
155
155
  const MAX_SOURCES_HARD_LIMIT = 8;
156
- const PER_URL_TIMEOUT_MS = 15_000;
156
+ const PER_URL_TIMEOUT_MS = 8_000;
157
157
  const TOTAL_TIMEOUT_MS = 60_000;
158
158
  export function createResearchRouter() {
159
159
  const router = Router();
@@ -318,9 +318,14 @@ export function createResearchRouter() {
318
318
  wordCount,
319
319
  fetchTime,
320
320
  });
321
- if (content.length > 0) {
321
+ if (wordCount >= 50) {
322
322
  fetchedContents.push({ url, content });
323
323
  }
324
+ else if (snippet.length > 20) {
325
+ // Content too thin — use search snippet + title as surrogate
326
+ const surrogateContent = `${pageTitle}\n\n${snippet}`;
327
+ fetchedContents.push({ url, content: surrogateContent });
328
+ }
324
329
  }
325
330
  catch {
326
331
  // Skip failed URLs, continue to next
@@ -349,10 +354,15 @@ export function createResearchRouter() {
349
354
  const effectiveLLMConfig = llmConfig ?? (process.env.OLLAMA_URL
350
355
  ? { provider: 'ollama', apiKey: process.env.OLLAMA_SECRET || '' }
351
356
  : undefined);
352
- if (effectiveLLMConfig && fetchedContents.length > 0 && Date.now() < overallDeadline - 3_000) {
357
+ if (effectiveLLMConfig && fetchedContents.length > 0 && Date.now() < overallDeadline - 1_000) {
353
358
  try {
359
+ // Filter to sources with 30+ words; fall back to all if none pass the threshold
360
+ const contentsForLLM = (() => {
361
+ const filtered = fetchedContents.filter(fc => fc.content.split(/\s+/).filter(Boolean).length >= 30);
362
+ return filtered.length > 0 ? filtered : fetchedContents;
363
+ })();
354
364
  // Sanitize web content before sending to LLM (prompt injection defense layer 1)
355
- const sourcesText = fetchedContents
365
+ const sourcesText = contentsForLLM
356
366
  .map((fc, i) => {
357
367
  const sanitized = sanitizeForLLM(fc.content.slice(0, 1200));
358
368
  if (sanitized.injectionDetected) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.35",
3
+ "version": "0.21.37",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",