webpeel 0.21.9 → 0.21.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -308,6 +308,7 @@ async function twitterExtractor(html, url) {
|
|
|
308
308
|
if (fxData && fxData.code === 200 && fxData.user) {
|
|
309
309
|
const u = fxData.user;
|
|
310
310
|
const structured = {
|
|
311
|
+
title: `${u.name || ''} (@${u.screen_name || ''}) on X/Twitter`,
|
|
311
312
|
name: u.name || '',
|
|
312
313
|
handle: '@' + (u.screen_name || ''),
|
|
313
314
|
bio: u.description || '',
|
|
@@ -1500,6 +1501,7 @@ async function npmExtractor(_html, url) {
|
|
|
1500
1501
|
}
|
|
1501
1502
|
catch { /* optional */ }
|
|
1502
1503
|
const structured = {
|
|
1504
|
+
title: `${data.name}@${latest || 'unknown'}`,
|
|
1503
1505
|
name: data.name,
|
|
1504
1506
|
description: data.description || '',
|
|
1505
1507
|
version: latest || 'unknown',
|
|
@@ -296,7 +296,13 @@ function heuristicExtractNumber(fieldName, content) {
|
|
|
296
296
|
}
|
|
297
297
|
// Year
|
|
298
298
|
if (/year/.test(lf)) {
|
|
299
|
-
|
|
299
|
+
// Match 4-digit years (1900-2099), prefer explicit "Year: YYYY" pattern first
|
|
300
|
+
const explicit = content.match(/\bYear[:\s]+(\d{4})\b/i);
|
|
301
|
+
if (explicit?.[1]) {
|
|
302
|
+
const n = parseInt(explicit[1]);
|
|
303
|
+
return isNaN(n) ? null : n;
|
|
304
|
+
}
|
|
305
|
+
const m = content.match(/\b((?:19|20)\d{2})\b/);
|
|
300
306
|
if (m?.[1]) {
|
|
301
307
|
const n = parseInt(m[1]);
|
|
302
308
|
return isNaN(n) ? null : n;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.10",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|