webpeel 0.21.9 → 0.21.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -308,6 +308,7 @@ async function twitterExtractor(html, url) {
308
308
  if (fxData && fxData.code === 200 && fxData.user) {
309
309
  const u = fxData.user;
310
310
  const structured = {
311
+ title: `${u.name || ''} (@${u.screen_name || ''}) on X/Twitter`,
311
312
  name: u.name || '',
312
313
  handle: '@' + (u.screen_name || ''),
313
314
  bio: u.description || '',
@@ -1500,6 +1501,7 @@ async function npmExtractor(_html, url) {
1500
1501
  }
1501
1502
  catch { /* optional */ }
1502
1503
  const structured = {
1504
+ title: `${data.name}@${latest || 'unknown'}`,
1503
1505
  name: data.name,
1504
1506
  description: data.description || '',
1505
1507
  version: latest || 'unknown',
@@ -296,7 +296,13 @@ function heuristicExtractNumber(fieldName, content) {
296
296
  }
297
297
  // Year
298
298
  if (/year/.test(lf)) {
299
- const m = content.match(/\b(20\d{2})\b/);
299
+ // Match 4-digit years (1900-2099), prefer explicit "Year: YYYY" pattern first
300
+ const explicit = content.match(/\bYear[:\s]+(\d{4})\b/i);
301
+ if (explicit?.[1]) {
302
+ const n = parseInt(explicit[1]);
303
+ return isNaN(n) ? null : n;
304
+ }
305
+ const m = content.match(/\b((?:19|20)\d{2})\b/);
300
306
  if (m?.[1]) {
301
307
  const n = parseInt(m[1]);
302
308
  return isNaN(n) ? null : n;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.9",
3
+ "version": "0.21.10",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",