webpeel 0.21.64 → 0.21.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/domain-extractors.js +1459 -60
- package/package.json +1 -1
|
@@ -93,6 +93,25 @@ const REGISTRY = [
|
|
|
93
93
|
{ match: (h) => h === 'www.producthunt.com' || h === 'producthunt.com', extractor: productHuntExtractor },
|
|
94
94
|
{ match: (h) => h === 'substack.com' || h === 'www.substack.com', extractor: substackRootExtractor },
|
|
95
95
|
{ match: (_h, url = '') => /\.pdf(\?|$|#)/i.test(url) || /\/pdf\//i.test(url), extractor: pdfExtractor },
|
|
96
|
+
// ── Prediction markets & trading ─────────────────────────────────────────
|
|
97
|
+
{ match: (h) => h === 'polymarket.com' || h === 'www.polymarket.com', extractor: polymarketExtractor },
|
|
98
|
+
{ match: (h) => h === 'kalshi.com' || h === 'www.kalshi.com', extractor: kalshiExtractor },
|
|
99
|
+
{ match: (h) => h === 'tradingview.com' || h === 'www.tradingview.com', extractor: tradingViewExtractor },
|
|
100
|
+
// ── Sports ───────────────────────────────────────────────────────────────
|
|
101
|
+
{ match: (h) => h === 'espn.com' || h === 'www.espn.com', extractor: espnExtractor },
|
|
102
|
+
{ match: (h) => h === 'draftkings.com' || h === 'www.draftkings.com' || h === 'sportsbook.draftkings.com', extractor: sportsBettingExtractor },
|
|
103
|
+
{ match: (h) => h === 'fanduel.com' || h === 'www.fanduel.com' || h === 'sportsbook.fanduel.com', extractor: sportsBettingExtractor },
|
|
104
|
+
{ match: (h) => h === 'betmgm.com' || h === 'www.betmgm.com', extractor: sportsBettingExtractor },
|
|
105
|
+
// ── Academic papers ───────────────────────────────────────────────────────
|
|
106
|
+
{ match: (h) => h === 'semanticscholar.org' || h === 'www.semanticscholar.org', extractor: semanticScholarExtractor },
|
|
107
|
+
{ match: (h) => h === 'pubmed.ncbi.nlm.nih.gov', extractor: pubmedExtractor },
|
|
108
|
+
// ── Crypto ───────────────────────────────────────────────────────────────
|
|
109
|
+
{ match: (h) => h === 'coingecko.com' || h === 'www.coingecko.com', extractor: coinGeckoExtractor },
|
|
110
|
+
{ match: (h) => h === 'coinmarketcap.com' || h === 'www.coinmarketcap.com', extractor: coinGeckoExtractor },
|
|
111
|
+
// ── Weather ──────────────────────────────────────────────────────────────
|
|
112
|
+
{ match: (h) => h === 'open-meteo.com' || h === 'api.open-meteo.com' || h === 'www.open-meteo.com', extractor: weatherExtractor },
|
|
113
|
+
{ match: (h) => h === 'weather.com' || h === 'www.weather.com', extractor: weatherExtractor },
|
|
114
|
+
{ match: (h) => h === 'accuweather.com' || h === 'www.accuweather.com', extractor: weatherExtractor },
|
|
96
115
|
];
|
|
97
116
|
/**
|
|
98
117
|
* Returns the domain extractor for a URL, or null if none matches.
|
|
@@ -943,21 +962,13 @@ ${commentsMd || '*No comments.*'}`;
|
|
|
943
962
|
if (repoData.message.includes('secondary rate limit') || repoData.message.includes('abuse'))
|
|
944
963
|
return null;
|
|
945
964
|
}
|
|
946
|
-
const readmeData = await fetchJsonWithRetry(`https://api.github.com/repos/${owner}/${repo}/readme`, ghHeaders, 1, 500).catch(() => null);
|
|
947
|
-
// README content is base64 encoded
|
|
948
|
-
let readmeText = '';
|
|
949
|
-
if (readmeData?.content) {
|
|
950
|
-
try {
|
|
951
|
-
readmeText = Buffer.from(readmeData.content, 'base64').toString('utf-8').slice(0, 5000);
|
|
952
|
-
}
|
|
953
|
-
catch { /* ignore */ }
|
|
954
|
-
}
|
|
955
965
|
const structured = {
|
|
956
966
|
title: `${owner}/${repo}`,
|
|
957
967
|
name: `${owner}/${repo}`,
|
|
958
968
|
description: repoData.description || '',
|
|
959
969
|
stars: repoData.stargazers_count ?? 0,
|
|
960
970
|
forks: repoData.forks_count ?? 0,
|
|
971
|
+
watchers: repoData.watchers_count ?? 0,
|
|
961
972
|
language: repoData.language || null,
|
|
962
973
|
topics: repoData.topics || [],
|
|
963
974
|
license: repoData.license?.spdx_id || null,
|
|
@@ -968,18 +979,27 @@ ${commentsMd || '*No comments.*'}`;
|
|
|
968
979
|
homepage: repoData.homepage || null,
|
|
969
980
|
archived: repoData.archived || false,
|
|
970
981
|
fork: repoData.fork || false,
|
|
971
|
-
|
|
982
|
+
url: repoData.html_url || `https://github.com/${owner}/${repo}`,
|
|
972
983
|
};
|
|
973
|
-
const topicsStr = structured.topics.length ? structured.topics.join(', ') : '
|
|
974
|
-
const
|
|
975
|
-
|
|
976
|
-
${structured.
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
${structured.
|
|
984
|
+
const topicsStr = structured.topics.length ? structured.topics.slice(0, 8).join(', ') : '';
|
|
985
|
+
const updatedDate = structured.lastPush ? structured.lastPush.slice(0, 10) : 'N/A';
|
|
986
|
+
const lines = [
|
|
987
|
+
`# 💻 ${structured.name}`,
|
|
988
|
+
'',
|
|
989
|
+
structured.description ? `**${structured.description}**` : '*No description.*',
|
|
990
|
+
'',
|
|
991
|
+
`- ⭐ Stars: ${structured.stars.toLocaleString()} | 🍴 Forks: ${structured.forks.toLocaleString()} | 📝 Language: ${structured.language || 'N/A'}`,
|
|
992
|
+
`- 📦 License: ${structured.license || 'None'} | 🔄 Updated: ${updatedDate}`,
|
|
993
|
+
`- 📊 Open Issues: ${structured.openIssues}${structured.archived ? ' | ⚠️ ARCHIVED' : ''}`,
|
|
994
|
+
];
|
|
995
|
+
if (topicsStr)
|
|
996
|
+
lines.push(`- 🏷️ Topics: ${topicsStr}`);
|
|
997
|
+
lines.push('');
|
|
998
|
+
const links = [`[Repository](${structured.url})`];
|
|
999
|
+
if (structured.homepage)
|
|
1000
|
+
links.push(`[Homepage](${structured.homepage})`);
|
|
1001
|
+
lines.push(`**Links:** ${links.join(' · ')}`);
|
|
1002
|
+
const cleanContent = lines.join('\n');
|
|
983
1003
|
return { domain, type: 'repository', structured, cleanContent };
|
|
984
1004
|
}
|
|
985
1005
|
return null;
|
|
@@ -1095,11 +1115,18 @@ ${commentsMd || '*No comments found.*'}`;
|
|
|
1095
1115
|
commentCount: s.descendants ?? 0,
|
|
1096
1116
|
url: s.url || `https://news.ycombinator.com/item?id=${s.id}`,
|
|
1097
1117
|
hnUrl: `https://news.ycombinator.com/item?id=${s.id}`,
|
|
1118
|
+
domain: s.url ? (() => { try {
|
|
1119
|
+
return new URL(s.url).hostname.replace(/^www\./, '');
|
|
1120
|
+
}
|
|
1121
|
+
catch {
|
|
1122
|
+
return '';
|
|
1123
|
+
} })() : '',
|
|
1098
1124
|
}));
|
|
1099
1125
|
const structured = { title: 'Hacker News — Front Page', stories };
|
|
1126
|
+
// Compact format: title (domain) | score pts | N comments
|
|
1100
1127
|
const cleanContent = `## 🟠 Hacker News — Front Page
|
|
1101
1128
|
|
|
1102
|
-
${stories.map((s, i) => `${i + 1}. **${s.title}
|
|
1129
|
+
${stories.map((s, i) => `${i + 1}. **${s.title}**${s.domain ? ` (${s.domain})` : ''} — ↑${s.score} · 💬${s.commentCount}`).join('\n')}`;
|
|
1103
1130
|
return { domain, type: 'frontpage', structured, cleanContent };
|
|
1104
1131
|
}
|
|
1105
1132
|
// User page: ?id=username
|
|
@@ -1141,7 +1168,7 @@ function cleanWikipediaContent(content) {
|
|
|
1141
1168
|
.replace(/\n{3,}/g, '\n\n')
|
|
1142
1169
|
.trim();
|
|
1143
1170
|
}
|
|
1144
|
-
async function wikipediaExtractor(_html, url) {
|
|
1171
|
+
async function wikipediaExtractor(_html, url, options) {
|
|
1145
1172
|
const urlObj = new URL(url);
|
|
1146
1173
|
const pathParts = urlObj.pathname.split('/').filter(Boolean);
|
|
1147
1174
|
// Only handle article pages: /wiki/Article_Title
|
|
@@ -1152,56 +1179,70 @@ async function wikipediaExtractor(_html, url) {
|
|
|
1152
1179
|
if (articleTitle.includes(':'))
|
|
1153
1180
|
return null;
|
|
1154
1181
|
const lang = urlObj.hostname.split('.')[0] || 'en';
|
|
1155
|
-
const
|
|
1182
|
+
const summaryUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/${encodeURIComponent(articleTitle)}`;
|
|
1156
1183
|
// Wikipedia REST API requires a descriptive User-Agent (https://meta.wikimedia.org/wiki/User-Agent_policy)
|
|
1157
1184
|
const wikiHeaders = { 'User-Agent': 'WebPeel/0.17.1 (https://webpeel.dev; jake@jakeliu.me) Node.js', 'Api-User-Agent': 'WebPeel/0.17.1 (https://webpeel.dev; jake@jakeliu.me)' };
|
|
1158
1185
|
try {
|
|
1159
|
-
const data = await fetchJson(
|
|
1186
|
+
const data = await fetchJson(summaryUrl, wikiHeaders);
|
|
1160
1187
|
if (!data || data.type === 'https://mediawiki.org/wiki/HyperSwitch/errors/not_found')
|
|
1161
1188
|
return null;
|
|
1162
|
-
// For full article content, use the mobile-html endpoint (mobile-sections is deprecated)
|
|
1163
|
-
let fullContent = '';
|
|
1164
|
-
let mobileHtmlSize;
|
|
1165
|
-
try {
|
|
1166
|
-
const fullUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/mobile-html/${encodeURIComponent(articleTitle)}`;
|
|
1167
|
-
const fullResult = await simpleFetch(fullUrl, undefined, 15000, {
|
|
1168
|
-
...wikiHeaders,
|
|
1169
|
-
'Accept': 'text/html',
|
|
1170
|
-
});
|
|
1171
|
-
if (fullResult?.html) {
|
|
1172
|
-
mobileHtmlSize = fullResult.html.length;
|
|
1173
|
-
// Parse sections from the mobile HTML
|
|
1174
|
-
const sectionMatches = fullResult.html.match(/<section[^>]*>([\s\S]*?)<\/section>/gi) || [];
|
|
1175
|
-
for (const section of sectionMatches) {
|
|
1176
|
-
// Extract section heading
|
|
1177
|
-
const headingMatch = section.match(/<h[2-6][^>]*id="([^"]*)"[^>]*class="[^"]*pcs-edit-section-title[^"]*"[^>]*>([\s\S]*?)<\/h[2-6]>/i);
|
|
1178
|
-
const heading = headingMatch ? stripHtml(headingMatch[2]).trim() : '';
|
|
1179
|
-
// Extract paragraphs
|
|
1180
|
-
const paragraphs = section.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
1181
|
-
const sectionText = paragraphs.map((p) => stripHtml(p).trim()).filter((t) => t.length > 0).join('\n\n');
|
|
1182
|
-
if (sectionText) {
|
|
1183
|
-
const prefix = heading ? `## ${heading}\n\n` : '';
|
|
1184
|
-
fullContent += `\n\n${prefix}${sectionText}`;
|
|
1185
|
-
}
|
|
1186
|
-
}
|
|
1187
|
-
}
|
|
1188
|
-
}
|
|
1189
|
-
catch (e) {
|
|
1190
|
-
// mobile-html failed — use summary extract as fallback
|
|
1191
|
-
if (process.env.DEBUG)
|
|
1192
|
-
console.debug('[webpeel]', 'Wikipedia mobile-html failed, using summary:', e instanceof Error ? e.message : e);
|
|
1193
|
-
}
|
|
1194
|
-
// Clean Wikipedia-specific noise
|
|
1195
|
-
fullContent = cleanWikipediaContent(fullContent);
|
|
1196
1189
|
const structured = {
|
|
1197
1190
|
title: data.title || articleTitle.replace(/_/g, ' '),
|
|
1198
1191
|
description: data.description || '',
|
|
1199
1192
|
extract: data.extract || '',
|
|
1193
|
+
extractHtml: data.extract_html || '',
|
|
1200
1194
|
thumbnail: data.thumbnail?.source || null,
|
|
1201
1195
|
url: data.content_urls?.desktop?.page || url,
|
|
1202
1196
|
lastModified: data.timestamp || null,
|
|
1197
|
+
coordinates: data.coordinates || null,
|
|
1203
1198
|
};
|
|
1204
|
-
|
|
1199
|
+
// Default: use summary API (200-400 tokens). Only fetch full article if budget > 5000.
|
|
1200
|
+
const budget = options?.budget ?? 0;
|
|
1201
|
+
const useFull = budget > 5000;
|
|
1202
|
+
let bodyContent = structured.extract;
|
|
1203
|
+
let mobileHtmlSize;
|
|
1204
|
+
if (useFull) {
|
|
1205
|
+
try {
|
|
1206
|
+
const fullUrl = `https://${lang}.wikipedia.org/api/rest_v1/page/mobile-html/${encodeURIComponent(articleTitle)}`;
|
|
1207
|
+
const fullResult = await simpleFetch(fullUrl, undefined, 15000, {
|
|
1208
|
+
...wikiHeaders,
|
|
1209
|
+
'Accept': 'text/html',
|
|
1210
|
+
});
|
|
1211
|
+
if (fullResult?.html) {
|
|
1212
|
+
mobileHtmlSize = fullResult.html.length;
|
|
1213
|
+
let fullContent = '';
|
|
1214
|
+
const sectionMatches = fullResult.html.match(/<section[^>]*>([\s\S]*?)<\/section>/gi) || [];
|
|
1215
|
+
for (const section of sectionMatches) {
|
|
1216
|
+
const headingMatch = section.match(/<h[2-6][^>]*id="([^"]*)"[^>]*class="[^"]*pcs-edit-section-title[^"]*"[^>]*>([\s\S]*?)<\/h[2-6]>/i);
|
|
1217
|
+
const heading = headingMatch ? stripHtml(headingMatch[2]).trim() : '';
|
|
1218
|
+
const paragraphs = section.match(/<p[^>]*>([\s\S]*?)<\/p>/gi) || [];
|
|
1219
|
+
const sectionText = paragraphs.map((p) => stripHtml(p).trim()).filter((t) => t.length > 0).join('\n\n');
|
|
1220
|
+
if (sectionText) {
|
|
1221
|
+
const prefix = heading ? `## ${heading}\n\n` : '';
|
|
1222
|
+
fullContent += `\n\n${prefix}${sectionText}`;
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
bodyContent = cleanWikipediaContent(fullContent) || structured.extract;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
catch (e) {
|
|
1229
|
+
if (process.env.DEBUG)
|
|
1230
|
+
console.debug('[webpeel]', 'Wikipedia mobile-html failed, using summary:', e instanceof Error ? e.message : e);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
const articleUrl = structured.url;
|
|
1234
|
+
const lines = [
|
|
1235
|
+
`# ${structured.title}`,
|
|
1236
|
+
'',
|
|
1237
|
+
];
|
|
1238
|
+
if (structured.description)
|
|
1239
|
+
lines.push(`*${structured.description}*`, '');
|
|
1240
|
+
lines.push(bodyContent);
|
|
1241
|
+
if (structured.coordinates) {
|
|
1242
|
+
lines.push('', `📍 Coordinates: ${structured.coordinates.lat}, ${structured.coordinates.lon}`);
|
|
1243
|
+
}
|
|
1244
|
+
lines.push('', `📖 [Read full article on Wikipedia](${articleUrl})`);
|
|
1245
|
+
const cleanContent = lines.join('\n');
|
|
1205
1246
|
return { domain: 'wikipedia.org', type: 'article', structured, cleanContent, rawHtmlSize: mobileHtmlSize };
|
|
1206
1247
|
}
|
|
1207
1248
|
catch (e) {
|
|
@@ -1403,6 +1444,67 @@ async function youtubeExtractor(_html, url) {
|
|
|
1403
1444
|
async function arxivExtractor(_html, url) {
|
|
1404
1445
|
const urlObj = new URL(url);
|
|
1405
1446
|
const path = urlObj.pathname;
|
|
1447
|
+
// --- Search page: /search/?query=... or /search/?searchtype=all&query=... ---
|
|
1448
|
+
if (path.startsWith('/search')) {
|
|
1449
|
+
const rawQuery = urlObj.searchParams.get('query') || '';
|
|
1450
|
+
if (!rawQuery)
|
|
1451
|
+
return null;
|
|
1452
|
+
try {
|
|
1453
|
+
const searchQuery = encodeURIComponent(`all:${rawQuery}`);
|
|
1454
|
+
const apiUrl = `https://export.arxiv.org/api/query?search_query=${searchQuery}&max_results=10&sortBy=relevance`;
|
|
1455
|
+
const result = await simpleFetch(apiUrl, 'WebPeel/0.21', 20000, { Accept: 'application/xml' });
|
|
1456
|
+
if (!result?.html)
|
|
1457
|
+
return null;
|
|
1458
|
+
const xml = result.html;
|
|
1459
|
+
// Parse total results count from opensearch:totalResults
|
|
1460
|
+
const totalMatch = xml.match(/<opensearch:totalResults[^>]*>(\d+)<\/opensearch:totalResults>/);
|
|
1461
|
+
const total = totalMatch ? parseInt(totalMatch[1], 10) : 0;
|
|
1462
|
+
// Parse all entries
|
|
1463
|
+
const entries = [...xml.matchAll(/<entry[\s\S]*?<\/entry>/g)].map(m => m[0]);
|
|
1464
|
+
const papers = entries.map(entryXml => {
|
|
1465
|
+
const getTag = (tag) => {
|
|
1466
|
+
const match = entryXml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
|
|
1467
|
+
return match ? stripHtml(match[1]).trim() : '';
|
|
1468
|
+
};
|
|
1469
|
+
const getAllTags = (tag) => {
|
|
1470
|
+
const matches = [...entryXml.matchAll(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'g'))];
|
|
1471
|
+
return matches.map(m => stripHtml(m[1]).trim()).filter(Boolean);
|
|
1472
|
+
};
|
|
1473
|
+
const title = getTag('title');
|
|
1474
|
+
const published = getTag('published');
|
|
1475
|
+
const authors = getAllTags('name');
|
|
1476
|
+
const summary = getTag('summary');
|
|
1477
|
+
// Extract arXiv ID from <id> tag
|
|
1478
|
+
const idTag = getTag('id');
|
|
1479
|
+
const idMatch2 = idTag.match(/abs\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
|
|
1480
|
+
const paperId2 = idMatch2 ? idMatch2[1] : '';
|
|
1481
|
+
// Categories
|
|
1482
|
+
const cats = [...entryXml.matchAll(/category[^>]*term="([^"]+)"/g)].map(m => m[1]);
|
|
1483
|
+
return { title, published: published?.split('T')[0], authors, summary, paperId: paperId2, categories: cats };
|
|
1484
|
+
}).filter(p => p.title);
|
|
1485
|
+
if (papers.length === 0)
|
|
1486
|
+
return null;
|
|
1487
|
+
const rows = papers.map((p, i) => {
|
|
1488
|
+
const authorLine = p.authors.length === 0 ? '—'
|
|
1489
|
+
: p.authors.length === 1 ? p.authors[0]
|
|
1490
|
+
: `${p.authors[0]} et al.`;
|
|
1491
|
+
const pdfLink = p.paperId ? ` [[PDF](https://arxiv.org/pdf/${p.paperId})]` : '';
|
|
1492
|
+
return `| ${i + 1} | [${p.title}](https://arxiv.org/abs/${p.paperId}) | ${p.published || '?'} | ${authorLine} |${pdfLink}`;
|
|
1493
|
+
}).join('\n');
|
|
1494
|
+
const cleanContent = `# 🔍 arXiv Search — "${rawQuery}"\n\n| # | Paper | Published | Authors |\n|---|-------|-----------|--------|\n${rows}\n\n*Source: arXiv API · Total results: ${total.toLocaleString()}*`;
|
|
1495
|
+
return {
|
|
1496
|
+
domain: 'arxiv.org',
|
|
1497
|
+
type: 'search',
|
|
1498
|
+
structured: { query: rawQuery, total, papers },
|
|
1499
|
+
cleanContent,
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
catch (e) {
|
|
1503
|
+
if (process.env.DEBUG)
|
|
1504
|
+
console.debug('[webpeel]', 'ArXiv search failed:', e instanceof Error ? e.message : e);
|
|
1505
|
+
return null;
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1406
1508
|
// Extract paper ID from URL patterns:
|
|
1407
1509
|
// /abs/2501.12948, /pdf/2501.12948, /abs/2501.12948v2
|
|
1408
1510
|
const idMatch = path.match(/\/(abs|pdf|html)\/(\d{4}\.\d{4,5}(?:v\d+)?)/);
|
|
@@ -1462,7 +1564,7 @@ async function arxivExtractor(_html, url) {
|
|
|
1462
1564
|
const authorLine = authors.length <= 5
|
|
1463
1565
|
? authors.join(', ')
|
|
1464
1566
|
: `${authors.slice(0, 5).join(', ')} et al. (${authors.length} authors)`;
|
|
1465
|
-
const cleanContent = `# ${title}\n\n**Authors:** ${authorLine}\n**
|
|
1567
|
+
const cleanContent = `# 📄 arXiv: ${title} (${paperId})\n\n**Authors:** ${authorLine}\n**Submitted:** ${published?.split('T')[0] || 'N/A'}${categories.length ? `\n**Categories:** ${categories.join(', ')}` : ''}${doi ? `\n**DOI:** ${doi}` : ''}${journalRef ? `\n**Journal:** ${journalRef}` : ''}\n\n## Abstract\n\n${summary}\n\n**PDF:** [Download](${structured.pdfUrl}) | **HTML:** [View](https://arxiv.org/html/${paperId})`;
|
|
1466
1568
|
return { domain: 'arxiv.org', type: 'paper', structured, cleanContent };
|
|
1467
1569
|
}
|
|
1468
1570
|
catch (e) {
|
|
@@ -3385,3 +3487,1300 @@ Browse newsletters at:
|
|
|
3385
3487
|
*WebPeel works best with individual Substack post URLs, not the root homepage.*`;
|
|
3386
3488
|
return { domain: 'substack.com', type: 'homepage', structured, cleanContent };
|
|
3387
3489
|
}
|
|
3490
|
+
// ---------------------------------------------------------------------------
|
|
3491
|
+
// 33. Polymarket extractor — prediction market data via Gamma API
|
|
3492
|
+
// ---------------------------------------------------------------------------
|
|
3493
|
+
async function polymarketExtractor(_html, url) {
|
|
3494
|
+
const urlObj = new URL(url);
|
|
3495
|
+
const path = urlObj.pathname;
|
|
3496
|
+
const domain = 'polymarket.com';
|
|
3497
|
+
// Helper: format price as percent
|
|
3498
|
+
const fmtPct = (p) => {
|
|
3499
|
+
const n = typeof p === 'string' ? parseFloat(p) : p;
|
|
3500
|
+
if (isNaN(n))
|
|
3501
|
+
return '?%';
|
|
3502
|
+
return (n * 100).toFixed(1) + '%';
|
|
3503
|
+
};
|
|
3504
|
+
// Helper: format large dollar amount
|
|
3505
|
+
const fmtVol = (v) => {
|
|
3506
|
+
const n = typeof v === 'string' ? parseFloat(v) : v;
|
|
3507
|
+
if (isNaN(n) || n === 0)
|
|
3508
|
+
return '$0';
|
|
3509
|
+
if (n >= 1_000_000)
|
|
3510
|
+
return `$${(n / 1_000_000).toFixed(1)}M`;
|
|
3511
|
+
if (n >= 1_000)
|
|
3512
|
+
return `$${(n / 1_000).toFixed(1)}K`;
|
|
3513
|
+
return `$${n.toFixed(0)}`;
|
|
3514
|
+
};
|
|
3515
|
+
// Helper: format date string
|
|
3516
|
+
const fmtDate = (d) => {
|
|
3517
|
+
if (!d)
|
|
3518
|
+
return '?';
|
|
3519
|
+
return d.slice(0, 10);
|
|
3520
|
+
};
|
|
3521
|
+
// --- Specific event page: /event/<slug> ---
|
|
3522
|
+
const eventMatch = path.match(/^\/event\/([^/?#]+)/);
|
|
3523
|
+
if (eventMatch) {
|
|
3524
|
+
const slug = eventMatch[1];
|
|
3525
|
+
try {
|
|
3526
|
+
// Fetch event by slug from gamma API
|
|
3527
|
+
const events = await fetchJson(`https://gamma-api.polymarket.com/events?slug=${encodeURIComponent(slug)}&limit=1`);
|
|
3528
|
+
if (Array.isArray(events) && events.length > 0) {
|
|
3529
|
+
const event = events[0];
|
|
3530
|
+
const markets = event.markets || [];
|
|
3531
|
+
const structured = {
|
|
3532
|
+
title: event.title || slug,
|
|
3533
|
+
slug: event.slug,
|
|
3534
|
+
volume: event.volume,
|
|
3535
|
+
volume24hr: event.volume24hr,
|
|
3536
|
+
endDate: event.endDate,
|
|
3537
|
+
markets: markets.map((m) => ({
|
|
3538
|
+
question: m.question,
|
|
3539
|
+
outcomes: m.outcomes,
|
|
3540
|
+
outcomePrices: m.outcomePrices,
|
|
3541
|
+
volume: m.volume,
|
|
3542
|
+
volume24hr: m.volume24hr,
|
|
3543
|
+
endDate: m.endDate,
|
|
3544
|
+
bestBid: m.bestBid,
|
|
3545
|
+
bestAsk: m.bestAsk,
|
|
3546
|
+
lastTradePrice: m.lastTradePrice,
|
|
3547
|
+
})),
|
|
3548
|
+
};
|
|
3549
|
+
const marketsMd = markets.map((m) => {
|
|
3550
|
+
const outcomes = JSON.parse(m.outcomes || '[]');
|
|
3551
|
+
const prices = JSON.parse(m.outcomePrices || '[]');
|
|
3552
|
+
const priceStr = outcomes.map((o, i) => `${o}: **${fmtPct(prices[i] ?? 0)}**`).join(' | ');
|
|
3553
|
+
const vol24 = m.volume24hr ? ` | Vol 24h: ${fmtVol(m.volume24hr)}` : '';
|
|
3554
|
+
const endDate = m.endDate ? ` | Ends: ${fmtDate(m.endDate)}` : '';
|
|
3555
|
+
return `- **${m.question}**\n ${priceStr}${vol24}${endDate}`;
|
|
3556
|
+
}).join('\n\n');
|
|
3557
|
+
const totalVol24 = fmtVol(event.volume24hr || 0);
|
|
3558
|
+
const totalVol = fmtVol(event.volume || 0);
|
|
3559
|
+
const cleanContent = `# 📊 Polymarket: ${event.title || slug}
|
|
3560
|
+
|
|
3561
|
+
**Volume (24h):** ${totalVol24} | **Total Volume:** ${totalVol} | **Ends:** ${fmtDate(event.endDate)}
|
|
3562
|
+
|
|
3563
|
+
## Markets
|
|
3564
|
+
|
|
3565
|
+
${marketsMd || '*No active markets found.*'}
|
|
3566
|
+
|
|
3567
|
+
---
|
|
3568
|
+
*Source: [Polymarket](https://polymarket.com/event/${slug}) · Data via Polymarket Gamma API*`;
|
|
3569
|
+
return { domain, type: 'event', structured, cleanContent };
|
|
3570
|
+
}
|
|
3571
|
+
// If event not found by slug, try a keyword search in markets
|
|
3572
|
+
const markets = await fetchJson(`https://gamma-api.polymarket.com/markets?closed=false&limit=10&order=volume24hr&ascending=false&q=${encodeURIComponent(slug.replace(/-/g, ' '))}`);
|
|
3573
|
+
if (Array.isArray(markets) && markets.length > 0) {
|
|
3574
|
+
return buildPolymarketMarketList(markets, domain, `Search: ${slug}`);
|
|
3575
|
+
}
|
|
3576
|
+
}
|
|
3577
|
+
catch (e) {
|
|
3578
|
+
if (process.env.DEBUG)
|
|
3579
|
+
console.debug('[webpeel]', 'Polymarket event fetch failed:', e instanceof Error ? e.message : e);
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
// --- Main page or /markets: show top markets by 24h volume ---
|
|
3583
|
+
try {
|
|
3584
|
+
const markets = await fetchJson('https://gamma-api.polymarket.com/markets?closed=false&limit=20&order=volume24hr&ascending=false');
|
|
3585
|
+
if (Array.isArray(markets)) {
|
|
3586
|
+
return buildPolymarketMarketList(markets, domain, 'Top Markets');
|
|
3587
|
+
}
|
|
3588
|
+
}
|
|
3589
|
+
catch (e) {
|
|
3590
|
+
if (process.env.DEBUG)
|
|
3591
|
+
console.debug('[webpeel]', 'Polymarket markets fetch failed:', e instanceof Error ? e.message : e);
|
|
3592
|
+
}
|
|
3593
|
+
return null;
|
|
3594
|
+
}
|
|
3595
|
+
function buildPolymarketMarketList(markets, domain, title) {
|
|
3596
|
+
const fmtPct = (p) => {
|
|
3597
|
+
const n = typeof p === 'string' ? parseFloat(p) : p;
|
|
3598
|
+
if (isNaN(n))
|
|
3599
|
+
return '?%';
|
|
3600
|
+
return (n * 100).toFixed(1) + '%';
|
|
3601
|
+
};
|
|
3602
|
+
const fmtVol = (v) => {
|
|
3603
|
+
const n = typeof v === 'string' ? parseFloat(v) : v;
|
|
3604
|
+
if (isNaN(n) || n === 0)
|
|
3605
|
+
return '$0';
|
|
3606
|
+
if (n >= 1_000_000)
|
|
3607
|
+
return `$${(n / 1_000_000).toFixed(1)}M`;
|
|
3608
|
+
if (n >= 1_000)
|
|
3609
|
+
return `$${(n / 1_000).toFixed(1)}K`;
|
|
3610
|
+
return `$${n.toFixed(0)}`;
|
|
3611
|
+
};
|
|
3612
|
+
const rows = markets.slice(0, 15).map((m) => {
|
|
3613
|
+
const outcomes = (() => { try {
|
|
3614
|
+
return JSON.parse(m.outcomes || '[]');
|
|
3615
|
+
}
|
|
3616
|
+
catch {
|
|
3617
|
+
return [];
|
|
3618
|
+
} })();
|
|
3619
|
+
const prices = (() => { try {
|
|
3620
|
+
return JSON.parse(m.outcomePrices || '[]');
|
|
3621
|
+
}
|
|
3622
|
+
catch {
|
|
3623
|
+
return [];
|
|
3624
|
+
} })();
|
|
3625
|
+
const yesPrice = outcomes[0] ? fmtPct(prices[0] ?? 0) : '?%';
|
|
3626
|
+
const vol24 = fmtVol(m.volume24hr || 0);
|
|
3627
|
+
const end = m.endDate ? m.endDate.slice(0, 10) : '?';
|
|
3628
|
+
return `| ${m.question} | ${yesPrice} | ${vol24} | ${end} |`;
|
|
3629
|
+
}).join('\n');
|
|
3630
|
+
const structured = {
|
|
3631
|
+
markets: markets.slice(0, 15).map((m) => ({
|
|
3632
|
+
question: m.question,
|
|
3633
|
+
slug: m.slug,
|
|
3634
|
+
outcomePrices: m.outcomePrices,
|
|
3635
|
+
outcomes: m.outcomes,
|
|
3636
|
+
volume24hr: m.volume24hr,
|
|
3637
|
+
endDate: m.endDate,
|
|
3638
|
+
})),
|
|
3639
|
+
fetchedAt: new Date().toISOString(),
|
|
3640
|
+
};
|
|
3641
|
+
const cleanContent = `# 📊 Polymarket — ${title}
|
|
3642
|
+
|
|
3643
|
+
| Question | Yes Price | Vol 24h | End Date |
|
|
3644
|
+
|----------|-----------|---------|----------|
|
|
3645
|
+
${rows}
|
|
3646
|
+
|
|
3647
|
+
---
|
|
3648
|
+
*Source: [Polymarket](https://polymarket.com) · Data via Polymarket Gamma API*`;
|
|
3649
|
+
return { domain, type: 'markets', structured, cleanContent };
|
|
3650
|
+
}
|
|
3651
|
+
// ---------------------------------------------------------------------------
|
|
3652
|
+
// 34. Kalshi extractor — prediction market data via Kalshi Elections API
|
|
3653
|
+
// ---------------------------------------------------------------------------
|
|
3654
|
+
async function kalshiExtractor(_html, url) {
|
|
3655
|
+
const urlObj = new URL(url);
|
|
3656
|
+
const path = urlObj.pathname;
|
|
3657
|
+
const domain = 'kalshi.com';
|
|
3658
|
+
// Helper: format Kalshi dollar price (they use dollars like 0.78 = 78¢ = 78%)
|
|
3659
|
+
const fmtPct = (v) => {
|
|
3660
|
+
const n = typeof v === 'string' ? parseFloat(v) : v;
|
|
3661
|
+
if (n == null || isNaN(n))
|
|
3662
|
+
return '?%';
|
|
3663
|
+
return (n * 100).toFixed(0) + '%';
|
|
3664
|
+
};
|
|
3665
|
+
const fmtVol = (v) => {
|
|
3666
|
+
const n = typeof v === 'string' ? parseFloat(v) : v;
|
|
3667
|
+
if (n == null || isNaN(n) || n === 0)
|
|
3668
|
+
return '$0';
|
|
3669
|
+
if (n >= 1_000_000)
|
|
3670
|
+
return `$${(n / 1_000_000).toFixed(1)}M`;
|
|
3671
|
+
if (n >= 1_000)
|
|
3672
|
+
return `$${(n / 1_000).toFixed(1)}K`;
|
|
3673
|
+
return `$${n.toFixed(0)}`;
|
|
3674
|
+
};
|
|
3675
|
+
// --- Specific market/event page: /markets/<ticker> or /events/<ticker> ---
|
|
3676
|
+
const tickerMatch = path.match(/^\/(?:markets|events)\/([^/?#]+)/);
|
|
3677
|
+
if (tickerMatch) {
|
|
3678
|
+
const ticker = tickerMatch[1].toUpperCase();
|
|
3679
|
+
try {
|
|
3680
|
+
// Try fetching the specific event by ticker
|
|
3681
|
+
const data = await fetchJson(`https://api.elections.kalshi.com/trade-api/v2/events/${ticker}?with_nested_markets=true`);
|
|
3682
|
+
const event = data?.event;
|
|
3683
|
+
if (event) {
|
|
3684
|
+
const markets = event.markets || [];
|
|
3685
|
+
const structured = {
|
|
3686
|
+
title: event.title,
|
|
3687
|
+
ticker: event.event_ticker,
|
|
3688
|
+
category: event.category,
|
|
3689
|
+
markets: markets.map((m) => ({
|
|
3690
|
+
title: m.title,
|
|
3691
|
+
ticker: m.ticker,
|
|
3692
|
+
yes_bid: m.yes_bid_dollars,
|
|
3693
|
+
yes_ask: m.yes_ask_dollars,
|
|
3694
|
+
volume: m.volume_fp,
|
|
3695
|
+
volume_24h: m.volume_24h_fp,
|
|
3696
|
+
last_price: m.last_price_dollars,
|
|
3697
|
+
expiration: m.expiration_time,
|
|
3698
|
+
})),
|
|
3699
|
+
};
|
|
3700
|
+
const marketsMd = markets.map((m) => {
|
|
3701
|
+
const yesBid = fmtPct(m.yes_bid_dollars);
|
|
3702
|
+
const yesAsk = fmtPct(m.yes_ask_dollars);
|
|
3703
|
+
const vol = fmtVol(m.volume_fp);
|
|
3704
|
+
const vol24 = fmtVol(m.volume_24h_fp);
|
|
3705
|
+
const expiry = m.expiration_time ? m.expiration_time.slice(0, 10) : '?';
|
|
3706
|
+
return `- **${m.title}**\n Yes: ${yesBid}–${yesAsk} | Vol: ${vol} | Vol 24h: ${vol24} | Expires: ${expiry}`;
|
|
3707
|
+
}).join('\n\n');
|
|
3708
|
+
const cleanContent = `# 🎯 Kalshi: ${event.title}
|
|
3709
|
+
|
|
3710
|
+
**Category:** ${event.category || 'General'} | **Ticker:** ${event.event_ticker}
|
|
3711
|
+
|
|
3712
|
+
## Markets
|
|
3713
|
+
|
|
3714
|
+
${marketsMd || '*No active markets found.*'}
|
|
3715
|
+
|
|
3716
|
+
---
|
|
3717
|
+
*Source: [Kalshi](https://kalshi.com/markets/${ticker.toLowerCase()}) · Data via Kalshi Trade API*`;
|
|
3718
|
+
return { domain, type: 'event', structured, cleanContent };
|
|
3719
|
+
}
|
|
3720
|
+
}
|
|
3721
|
+
catch (e) {
|
|
3722
|
+
if (process.env.DEBUG)
|
|
3723
|
+
console.debug('[webpeel]', 'Kalshi event fetch failed:', e instanceof Error ? e.message : e);
|
|
3724
|
+
}
|
|
3725
|
+
}
|
|
3726
|
+
// --- Main page or /markets: show top open events ---
|
|
3727
|
+
try {
|
|
3728
|
+
const data = await fetchJson('https://api.elections.kalshi.com/trade-api/v2/events?limit=20&status=open&with_nested_markets=true');
|
|
3729
|
+
const events = data?.events || [];
|
|
3730
|
+
if (events.length > 0) {
|
|
3731
|
+
const rows = events.slice(0, 15).map((e) => {
|
|
3732
|
+
const markets = e.markets || [];
|
|
3733
|
+
const firstMkt = markets[0];
|
|
3734
|
+
const yesBid = firstMkt ? fmtPct(firstMkt.yes_bid_dollars) : '?%';
|
|
3735
|
+
const vol24 = firstMkt ? fmtVol(firstMkt.volume_24h_fp) : '$0';
|
|
3736
|
+
const mktCount = markets.length > 1 ? ` (+${markets.length - 1} more)` : '';
|
|
3737
|
+
return `| ${e.title} | ${yesBid}${mktCount} | ${vol24} | ${e.category || '?'} |`;
|
|
3738
|
+
}).join('\n');
|
|
3739
|
+
const structured = {
|
|
3740
|
+
events: events.slice(0, 15).map((e) => ({
|
|
3741
|
+
title: e.title,
|
|
3742
|
+
ticker: e.event_ticker,
|
|
3743
|
+
category: e.category,
|
|
3744
|
+
markets: (e.markets || []).length,
|
|
3745
|
+
})),
|
|
3746
|
+
fetchedAt: new Date().toISOString(),
|
|
3747
|
+
};
|
|
3748
|
+
const cleanContent = `# 🎯 Kalshi — Top Open Events
|
|
3749
|
+
|
|
3750
|
+
| Event | Yes Price | Vol 24h | Category |
|
|
3751
|
+
|-------|-----------|---------|----------|
|
|
3752
|
+
${rows}
|
|
3753
|
+
|
|
3754
|
+
---
|
|
3755
|
+
*Source: [Kalshi](https://kalshi.com/markets) · Data via Kalshi Trade API*`;
|
|
3756
|
+
return { domain, type: 'markets', structured, cleanContent };
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
catch (e) {
|
|
3760
|
+
if (process.env.DEBUG)
|
|
3761
|
+
console.debug('[webpeel]', 'Kalshi markets fetch failed:', e instanceof Error ? e.message : e);
|
|
3762
|
+
}
|
|
3763
|
+
return null;
|
|
3764
|
+
}
|
|
3765
|
+
// ---------------------------------------------------------------------------
|
|
3766
|
+
// 35. TradingView extractor — stock/index data via TradingView Scanner API
|
|
3767
|
+
// ---------------------------------------------------------------------------
|
|
3768
|
+
async function tradingViewExtractor(_html, url) {
|
|
3769
|
+
const urlObj = new URL(url);
|
|
3770
|
+
const path = urlObj.pathname;
|
|
3771
|
+
const domain = 'tradingview.com';
|
|
3772
|
+
const scannerHeaders = {
|
|
3773
|
+
'Origin': 'https://www.tradingview.com',
|
|
3774
|
+
'Referer': 'https://www.tradingview.com/',
|
|
3775
|
+
'Content-Type': 'application/json',
|
|
3776
|
+
};
|
|
3777
|
+
// Helper: format price
|
|
3778
|
+
const fmtPrice = (v) => {
|
|
3779
|
+
if (v == null)
|
|
3780
|
+
return '?';
|
|
3781
|
+
if (v >= 1_000_000_000_000)
|
|
3782
|
+
return `${(v / 1_000_000_000_000).toFixed(2)}T`;
|
|
3783
|
+
if (v >= 1_000_000_000)
|
|
3784
|
+
return `${(v / 1_000_000_000).toFixed(2)}B`;
|
|
3785
|
+
if (v >= 1_000_000)
|
|
3786
|
+
return `${(v / 1_000_000).toFixed(2)}M`;
|
|
3787
|
+
if (v >= 1_000)
|
|
3788
|
+
return `${(v / 1_000).toFixed(2)}K`;
|
|
3789
|
+
return v.toFixed(2);
|
|
3790
|
+
};
|
|
3791
|
+
const fmtChange = (c) => {
|
|
3792
|
+
if (c == null)
|
|
3793
|
+
return '';
|
|
3794
|
+
const sign = c >= 0 ? '+' : '';
|
|
3795
|
+
return `${sign}${c.toFixed(2)}%`;
|
|
3796
|
+
};
|
|
3797
|
+
// --- Symbol page: /symbols/<TICKER>/ or /chart?symbol=<TICKER> ---
|
|
3798
|
+
const symbolMatch = path.match(/^\/symbols\/([^/?#]+)\/?/);
|
|
3799
|
+
const chartSymbolParam = urlObj.searchParams.get('symbol');
|
|
3800
|
+
let ticker = symbolMatch?.[1] || chartSymbolParam || null;
|
|
3801
|
+
if (ticker) {
|
|
3802
|
+
ticker = ticker.toUpperCase().replace(/-/g, '');
|
|
3803
|
+
try {
|
|
3804
|
+
// Try symbol search to resolve exchange
|
|
3805
|
+
const searchResp = await fetch(`https://symbol-search.tradingview.com/symbol_search/?text=${encodeURIComponent(ticker)}&hl=0&lang=en&type=stock,fund,crypto,futures,forex&limit=5`, {
|
|
3806
|
+
headers: {
|
|
3807
|
+
'User-Agent': 'webpeel/0.21 (https://webpeel.dev)',
|
|
3808
|
+
'Origin': 'https://www.tradingview.com',
|
|
3809
|
+
'Referer': 'https://www.tradingview.com/',
|
|
3810
|
+
},
|
|
3811
|
+
signal: AbortSignal.timeout(10000),
|
|
3812
|
+
});
|
|
3813
|
+
const searchData = await searchResp.json().catch(() => []);
|
|
3814
|
+
// Find exact match
|
|
3815
|
+
const exactMatch = searchData.find(s => s.symbol === ticker || s.symbol.replace(/<\/?em>/g, '') === ticker);
|
|
3816
|
+
const symbolInfo = exactMatch || searchData[0];
|
|
3817
|
+
if (symbolInfo) {
|
|
3818
|
+
const exchange = symbolInfo.source_id || symbolInfo.exchange || 'NASDAQ';
|
|
3819
|
+
// Fetch quote data via scanner
|
|
3820
|
+
const scannerUrl = exchange === 'CRYPTO' || exchange === 'COINBASE' || exchange === 'BINANCE'
|
|
3821
|
+
? 'https://scanner.tradingview.com/crypto/scan'
|
|
3822
|
+
: 'https://scanner.tradingview.com/america/scan';
|
|
3823
|
+
const scanBody = {
|
|
3824
|
+
filter: [{ left: 'name', operation: 'equal', right: symbolInfo.symbol?.replace(/<\/?em>/g, '') || ticker }],
|
|
3825
|
+
columns: ['name', 'description', 'close', 'open', 'high', 'low', 'volume', 'change', 'change_abs', 'market_cap_basic', 'sector', 'industry', 'country', 'currency'],
|
|
3826
|
+
range: [0, 1],
|
|
3827
|
+
};
|
|
3828
|
+
const scanResp = await fetch(scannerUrl, {
|
|
3829
|
+
method: 'POST',
|
|
3830
|
+
headers: { ...scannerHeaders, 'User-Agent': 'webpeel/0.21 (https://webpeel.dev)' },
|
|
3831
|
+
body: JSON.stringify(scanBody),
|
|
3832
|
+
signal: AbortSignal.timeout(10000),
|
|
3833
|
+
});
|
|
3834
|
+
const scanData = await scanResp.json().catch(() => null);
|
|
3835
|
+
const row = scanData?.data?.[0]?.d;
|
|
3836
|
+
if (row) {
|
|
3837
|
+
const [name, desc, close, open, high, low, volume, changePct, changeAbs, mktCap, sector, industry, country, currency] = row;
|
|
3838
|
+
const currStr = currency || 'USD';
|
|
3839
|
+
const mktCapStr = mktCap ? fmtPrice(mktCap) : null;
|
|
3840
|
+
const structured = {
|
|
3841
|
+
symbol: name,
|
|
3842
|
+
description: desc,
|
|
3843
|
+
price: close,
|
|
3844
|
+
open,
|
|
3845
|
+
high,
|
|
3846
|
+
low,
|
|
3847
|
+
volume,
|
|
3848
|
+
change_pct: changePct,
|
|
3849
|
+
change_abs: changeAbs,
|
|
3850
|
+
market_cap: mktCap,
|
|
3851
|
+
sector,
|
|
3852
|
+
industry,
|
|
3853
|
+
country,
|
|
3854
|
+
currency: currStr,
|
|
3855
|
+
exchange,
|
|
3856
|
+
fetchedAt: new Date().toISOString(),
|
|
3857
|
+
};
|
|
3858
|
+
const changeStr = fmtChange(changePct);
|
|
3859
|
+
const changeIcon = (changePct ?? 0) >= 0 ? '📈' : '📉';
|
|
3860
|
+
const cleanContent = `# ${changeIcon} TradingView: ${desc || name} (${name})
|
|
3861
|
+
|
|
3862
|
+
## Quote
|
|
3863
|
+
- **Price:** ${close?.toFixed(2) ?? '?'} ${currStr}
|
|
3864
|
+
- **Change:** ${changeStr} (${changeAbs?.toFixed(2) ?? '?'} ${currStr})
|
|
3865
|
+
- **Open:** ${open?.toFixed(2) ?? '?'} | **High:** ${high?.toFixed(2) ?? '?'} | **Low:** ${low?.toFixed(2) ?? '?'}
|
|
3866
|
+
- **Volume:** ${fmtPrice(volume ?? 0)}
|
|
3867
|
+
${mktCapStr ? `- **Market Cap:** ${mktCapStr} ${currStr}` : ''}
|
|
3868
|
+
|
|
3869
|
+
## Details
|
|
3870
|
+
${sector ? `- **Sector:** ${sector}` : ''}
|
|
3871
|
+
${industry ? `- **Industry:** ${industry}` : ''}
|
|
3872
|
+
${country ? `- **Country:** ${country}` : ''}
|
|
3873
|
+
- **Exchange:** ${exchange}
|
|
3874
|
+
|
|
3875
|
+
---
|
|
3876
|
+
*Source: [TradingView](https://www.tradingview.com/symbols/${name}/) · Data via TradingView Scanner API*`;
|
|
3877
|
+
return { domain, type: 'symbol', structured, cleanContent };
|
|
3878
|
+
}
|
|
3879
|
+
}
|
|
3880
|
+
}
|
|
3881
|
+
catch (e) {
|
|
3882
|
+
if (process.env.DEBUG)
|
|
3883
|
+
console.debug('[webpeel]', 'TradingView symbol fetch failed:', e instanceof Error ? e.message : e);
|
|
3884
|
+
}
|
|
3885
|
+
}
|
|
3886
|
+
// --- Markets overview page or fallback: show major indices ---
|
|
3887
|
+
try {
|
|
3888
|
+
// Fetch major indices + top stocks
|
|
3889
|
+
const scanBody = {
|
|
3890
|
+
filter: [
|
|
3891
|
+
{ left: 'name', operation: 'in_range', right: ['SPX', 'NDX', 'DJI', 'RUT', 'VIX', 'AAPL', 'MSFT', 'NVDA', 'AMZN', 'GOOGL', 'META', 'TSLA'] },
|
|
3892
|
+
],
|
|
3893
|
+
columns: ['name', 'description', 'close', 'change', 'volume', 'market_cap_basic'],
|
|
3894
|
+
sort: { sortBy: 'market_cap_basic', sortOrder: 'desc' },
|
|
3895
|
+
range: [0, 20],
|
|
3896
|
+
};
|
|
3897
|
+
const resp = await fetch('https://scanner.tradingview.com/global/scan', {
|
|
3898
|
+
method: 'POST',
|
|
3899
|
+
headers: { ...scannerHeaders, 'User-Agent': 'webpeel/0.21 (https://webpeel.dev)' },
|
|
3900
|
+
body: JSON.stringify(scanBody),
|
|
3901
|
+
signal: AbortSignal.timeout(10000),
|
|
3902
|
+
});
|
|
3903
|
+
const data = await resp.json().catch(() => null);
|
|
3904
|
+
const rows = data?.data || [];
|
|
3905
|
+
if (rows.length > 0) {
|
|
3906
|
+
const tableRows = rows.map((row) => {
|
|
3907
|
+
const [name, desc, close, changePct] = row.d;
|
|
3908
|
+
const changeStr = changePct != null ? `${changePct >= 0 ? '+' : ''}${changePct.toFixed(2)}%` : '?%';
|
|
3909
|
+
const icon = (changePct ?? 0) >= 0 ? '🟢' : '🔴';
|
|
3910
|
+
return `| ${name} | ${desc} | ${close?.toFixed(2) ?? '?'} | ${icon} ${changeStr} |`;
|
|
3911
|
+
}).join('\n');
|
|
3912
|
+
const structured = {
|
|
3913
|
+
symbols: rows.map((r) => ({
|
|
3914
|
+
symbol: r.d[0],
|
|
3915
|
+
description: r.d[1],
|
|
3916
|
+
price: r.d[2],
|
|
3917
|
+
change_pct: r.d[3],
|
|
3918
|
+
})),
|
|
3919
|
+
fetchedAt: new Date().toISOString(),
|
|
3920
|
+
};
|
|
3921
|
+
const now = new Date().toLocaleString('en-US', { timeZone: 'America/New_York', hour12: false });
|
|
3922
|
+
const cleanContent = `# 📈 TradingView — Market Overview
|
|
3923
|
+
|
|
3924
|
+
*As of ${now} ET*
|
|
3925
|
+
|
|
3926
|
+
| Symbol | Name | Price | Change |
|
|
3927
|
+
|--------|------|-------|--------|
|
|
3928
|
+
${tableRows}
|
|
3929
|
+
|
|
3930
|
+
---
|
|
3931
|
+
*Source: [TradingView](https://www.tradingview.com/markets/) · Data via TradingView Scanner API*`;
|
|
3932
|
+
return { domain, type: 'markets', structured, cleanContent };
|
|
3933
|
+
}
|
|
3934
|
+
}
|
|
3935
|
+
catch (e) {
|
|
3936
|
+
if (process.env.DEBUG)
|
|
3937
|
+
console.debug('[webpeel]', 'TradingView markets fetch failed:', e instanceof Error ? e.message : e);
|
|
3938
|
+
}
|
|
3939
|
+
return null;
|
|
3940
|
+
}
|
|
3941
|
+
// ---------------------------------------------------------------------------
|
|
3942
|
+
// 36. ESPN extractor — live scores, standings, schedules via ESPN public API
|
|
3943
|
+
// ---------------------------------------------------------------------------
|
|
3944
|
+
/** Map ESPN URL path prefixes to sport/league identifiers for the API. */
|
|
3945
|
+
function matchESPN(url) {
|
|
3946
|
+
let u;
|
|
3947
|
+
try {
|
|
3948
|
+
u = new URL(url);
|
|
3949
|
+
}
|
|
3950
|
+
catch {
|
|
3951
|
+
return null;
|
|
3952
|
+
}
|
|
3953
|
+
if (!u.hostname.includes('espn.com'))
|
|
3954
|
+
return null;
|
|
3955
|
+
const path = u.pathname.toLowerCase();
|
|
3956
|
+
// Map URL path prefixes to [sport, league]
|
|
3957
|
+
const sportMap = {
|
|
3958
|
+
'/nba': ['basketball', 'nba'],
|
|
3959
|
+
'/wnba': ['basketball', 'wnba'],
|
|
3960
|
+
'/nfl': ['football', 'nfl'],
|
|
3961
|
+
'/mlb': ['baseball', 'mlb'],
|
|
3962
|
+
'/nhl': ['hockey', 'nhl'],
|
|
3963
|
+
'/college-football': ['football', 'college-football'],
|
|
3964
|
+
'/mens-college-basketball': ['basketball', 'mens-college-basketball'],
|
|
3965
|
+
'/womens-college-basketball': ['basketball', 'womens-college-basketball'],
|
|
3966
|
+
'/soccer': ['soccer', 'eng.1'],
|
|
3967
|
+
'/mma': ['mma', 'ufc'],
|
|
3968
|
+
};
|
|
3969
|
+
for (const [prefix, [sport, league]] of Object.entries(sportMap)) {
|
|
3970
|
+
if (path.startsWith(prefix)) {
|
|
3971
|
+
// Override soccer league if explicitly in URL path (e.g. /soccer/scoreboard/_/league/usa.1)
|
|
3972
|
+
let resolvedLeague = league;
|
|
3973
|
+
if (sport === 'soccer') {
|
|
3974
|
+
const leagueMatch = path.match(/\/league\/([^/?#]+)/);
|
|
3975
|
+
if (leagueMatch)
|
|
3976
|
+
resolvedLeague = leagueMatch[1];
|
|
3977
|
+
}
|
|
3978
|
+
if (path.includes('standings'))
|
|
3979
|
+
return { sport, league: resolvedLeague, type: 'standings' };
|
|
3980
|
+
if (path.includes('/team/') || path.includes('/teams/')) {
|
|
3981
|
+
const nameMatch = path.split('/name/')[1]?.split('/')[0];
|
|
3982
|
+
return { sport, league: resolvedLeague, type: 'team', param: nameMatch };
|
|
3983
|
+
}
|
|
3984
|
+
if (path.includes('scores') || path.includes('scoreboard'))
|
|
3985
|
+
return { sport, league: resolvedLeague, type: 'scoreboard' };
|
|
3986
|
+
return { sport, league: resolvedLeague, type: 'scoreboard' }; // default to scoreboard
|
|
3987
|
+
}
|
|
3988
|
+
}
|
|
3989
|
+
// Fallback: espn.com root or unknown path → NBA scoreboard
|
|
3990
|
+
return { sport: 'basketball', league: 'nba', type: 'scoreboard' };
|
|
3991
|
+
}
|
|
3992
|
+
/** Sport emoji mapping. */
|
|
3993
|
+
function espnSportEmoji(sport, league) {
|
|
3994
|
+
if (league === 'nba' || league === 'wnba')
|
|
3995
|
+
return '🏀';
|
|
3996
|
+
if (sport === 'football')
|
|
3997
|
+
return '🏈';
|
|
3998
|
+
if (sport === 'baseball')
|
|
3999
|
+
return '⚾';
|
|
4000
|
+
if (sport === 'hockey')
|
|
4001
|
+
return '🏒';
|
|
4002
|
+
if (sport === 'soccer')
|
|
4003
|
+
return '⚽';
|
|
4004
|
+
if (sport === 'mma' || league === 'ufc')
|
|
4005
|
+
return '🥊';
|
|
4006
|
+
return '🏆';
|
|
4007
|
+
}
|
|
4008
|
+
/** Format a UTC ISO date string to "7:30 PM ET" style. */
|
|
4009
|
+
function fmtEspnTime(isoDate) {
|
|
4010
|
+
try {
|
|
4011
|
+
const d = new Date(isoDate);
|
|
4012
|
+
return d.toLocaleTimeString('en-US', {
|
|
4013
|
+
timeZone: 'America/New_York',
|
|
4014
|
+
hour: 'numeric',
|
|
4015
|
+
minute: '2-digit',
|
|
4016
|
+
hour12: true,
|
|
4017
|
+
}) + ' ET';
|
|
4018
|
+
}
|
|
4019
|
+
catch {
|
|
4020
|
+
return isoDate;
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
/** Format today's date nicely: "March 18, 2026". */
|
|
4024
|
+
function fmtTodayESPN() {
|
|
4025
|
+
return new Date().toLocaleDateString('en-US', {
|
|
4026
|
+
timeZone: 'America/New_York',
|
|
4027
|
+
month: 'long',
|
|
4028
|
+
day: 'numeric',
|
|
4029
|
+
year: 'numeric',
|
|
4030
|
+
});
|
|
4031
|
+
}
|
|
4032
|
+
async function fetchEspnScoreboard(sport, league) {
|
|
4033
|
+
try {
|
|
4034
|
+
const apiUrl = `https://site.api.espn.com/apis/site/v2/sports/${sport}/${league}/scoreboard`;
|
|
4035
|
+
const data = await fetchJson(apiUrl);
|
|
4036
|
+
const events = data?.events || [];
|
|
4037
|
+
const emoji = espnSportEmoji(sport, league);
|
|
4038
|
+
const leagueName = data?.leagues?.[0]?.name || league.toUpperCase();
|
|
4039
|
+
const today = fmtTodayESPN();
|
|
4040
|
+
if (events.length === 0) {
|
|
4041
|
+
return `# ${emoji} ${leagueName} Scoreboard — ${today}\n\n*No games scheduled today.*`;
|
|
4042
|
+
}
|
|
4043
|
+
const rows = events.map((e) => {
|
|
4044
|
+
const comp = e.competitions?.[0] || {};
|
|
4045
|
+
const status = comp.status?.type || {};
|
|
4046
|
+
const competitors = comp.competitors || [];
|
|
4047
|
+
// Away team first, home team second (standard display)
|
|
4048
|
+
const away = competitors.find((c) => c.homeAway === 'away') || competitors[0];
|
|
4049
|
+
const home = competitors.find((c) => c.homeAway === 'home') || competitors[1];
|
|
4050
|
+
const awayName = away?.team?.displayName || away?.team?.name || '?';
|
|
4051
|
+
const homeName = home?.team?.displayName || home?.team?.name || '?';
|
|
4052
|
+
const gameLabel = `${awayName} at ${homeName}`;
|
|
4053
|
+
let scoreStr = '-';
|
|
4054
|
+
let statusStr = '';
|
|
4055
|
+
const state = status.state || 'pre';
|
|
4056
|
+
const description = status.description || 'Scheduled';
|
|
4057
|
+
if (state === 'pre') {
|
|
4058
|
+
scoreStr = '-';
|
|
4059
|
+
statusStr = fmtEspnTime(comp.startDate || e.date || '');
|
|
4060
|
+
}
|
|
4061
|
+
else if (state === 'in') {
|
|
4062
|
+
const awayScore = away?.score ?? '0';
|
|
4063
|
+
const homeScore = home?.score ?? '0';
|
|
4064
|
+
const awayAbbr = away?.team?.abbreviation || '?';
|
|
4065
|
+
const homeAbbr = home?.team?.abbreviation || '?';
|
|
4066
|
+
scoreStr = `${awayAbbr} ${awayScore}, ${homeAbbr} ${homeScore}`;
|
|
4067
|
+
const period = comp.status?.period ?? '';
|
|
4068
|
+
const clock = comp.status?.displayClock ?? '';
|
|
4069
|
+
statusStr = period && clock ? `Q${period} ${clock}` : 'Live';
|
|
4070
|
+
}
|
|
4071
|
+
else {
|
|
4072
|
+
const awayScore = away?.score ?? '0';
|
|
4073
|
+
const homeScore = home?.score ?? '0';
|
|
4074
|
+
const awayAbbr = away?.team?.abbreviation || '?';
|
|
4075
|
+
const homeAbbr = home?.team?.abbreviation || '?';
|
|
4076
|
+
scoreStr = `${awayAbbr} ${awayScore}, ${homeAbbr} ${homeScore}`;
|
|
4077
|
+
statusStr = description || 'Final';
|
|
4078
|
+
}
|
|
4079
|
+
return `| ${gameLabel} | ${scoreStr} | ${statusStr} |`;
|
|
4080
|
+
}).join('\n');
|
|
4081
|
+
return `# ${emoji} ${leagueName} Scoreboard — ${today}\n\n| Game | Score | Status |\n|------|-------|--------|\n${rows}`;
|
|
4082
|
+
}
|
|
4083
|
+
catch (e) {
|
|
4084
|
+
if (process.env.DEBUG)
|
|
4085
|
+
console.debug('[webpeel]', 'ESPN scoreboard fetch failed:', e instanceof Error ? e.message : e);
|
|
4086
|
+
return null;
|
|
4087
|
+
}
|
|
4088
|
+
}
|
|
4089
|
+
async function fetchEspnStandings(sport, league) {
|
|
4090
|
+
try {
|
|
4091
|
+
const apiUrl = `https://site.web.api.espn.com/apis/v2/sports/${sport}/${league}/standings?sort=winpercent:desc`;
|
|
4092
|
+
const data = await fetchJson(apiUrl);
|
|
4093
|
+
const children = data?.children || [];
|
|
4094
|
+
const emoji = espnSportEmoji(sport, league);
|
|
4095
|
+
const leagueName = data?.name || league.toUpperCase();
|
|
4096
|
+
const today = fmtTodayESPN();
|
|
4097
|
+
if (children.length === 0)
|
|
4098
|
+
return null;
|
|
4099
|
+
let output = `# ${emoji} ${leagueName} Standings — ${today}\n\n`;
|
|
4100
|
+
for (const conf of children) {
|
|
4101
|
+
const confName = conf.name || conf.abbreviation || 'Conference';
|
|
4102
|
+
const entries = conf.standings?.entries || [];
|
|
4103
|
+
output += `## ${confName}\n\n`;
|
|
4104
|
+
output += `| # | Team | W | L | PCT | Streak |\n`;
|
|
4105
|
+
output += `|---|------|---|---|-----|--------|\n`;
|
|
4106
|
+
// Sort by playoff seed
|
|
4107
|
+
const sorted = entries.slice().sort((a, b) => {
|
|
4108
|
+
const seedA = a.stats?.find((s) => s.name === 'playoffSeed')?.value ?? 99;
|
|
4109
|
+
const seedB = b.stats?.find((s) => s.name === 'playoffSeed')?.value ?? 99;
|
|
4110
|
+
return seedA - seedB;
|
|
4111
|
+
});
|
|
4112
|
+
for (const entry of sorted) {
|
|
4113
|
+
const team = entry.team?.displayName || '?';
|
|
4114
|
+
const stats = entry.stats || [];
|
|
4115
|
+
const getDisplay = (name) => stats.find((s) => s.name === name)?.displayValue || '?';
|
|
4116
|
+
const getStat = (name) => stats.find((s) => s.name === name)?.value ?? '?';
|
|
4117
|
+
const seed = getStat('playoffSeed');
|
|
4118
|
+
const wins = getDisplay('wins');
|
|
4119
|
+
const losses = getDisplay('losses');
|
|
4120
|
+
const pct = getDisplay('winPercent');
|
|
4121
|
+
const streak = getDisplay('streak');
|
|
4122
|
+
output += `| ${seed} | ${team} | ${wins} | ${losses} | ${pct} | ${streak} |\n`;
|
|
4123
|
+
}
|
|
4124
|
+
output += '\n';
|
|
4125
|
+
}
|
|
4126
|
+
return output.trim();
|
|
4127
|
+
}
|
|
4128
|
+
catch (e) {
|
|
4129
|
+
if (process.env.DEBUG)
|
|
4130
|
+
console.debug('[webpeel]', 'ESPN standings fetch failed:', e instanceof Error ? e.message : e);
|
|
4131
|
+
return null;
|
|
4132
|
+
}
|
|
4133
|
+
}
|
|
4134
|
+
async function espnExtractor(_html, url) {
|
|
4135
|
+
const match = matchESPN(url);
|
|
4136
|
+
if (!match)
|
|
4137
|
+
return null;
|
|
4138
|
+
const { sport, league, type } = match;
|
|
4139
|
+
const domain = 'espn.com';
|
|
4140
|
+
if (type === 'standings') {
|
|
4141
|
+
const content = await fetchEspnStandings(sport, league);
|
|
4142
|
+
if (!content)
|
|
4143
|
+
return null;
|
|
4144
|
+
return {
|
|
4145
|
+
domain,
|
|
4146
|
+
type: 'standings',
|
|
4147
|
+
structured: { sport, league, dataType: 'standings' },
|
|
4148
|
+
cleanContent: content,
|
|
4149
|
+
};
|
|
4150
|
+
}
|
|
4151
|
+
if (type === 'team') {
|
|
4152
|
+
// Try to get team info from the teams API
|
|
4153
|
+
try {
|
|
4154
|
+
const teamsUrl = `https://site.api.espn.com/apis/site/v2/sports/${sport}/${league}/teams`;
|
|
4155
|
+
const teamsData = await fetchJson(teamsUrl);
|
|
4156
|
+
const teams = teamsData?.sports?.[0]?.leagues?.[0]?.teams || [];
|
|
4157
|
+
const param = match.param?.toLowerCase();
|
|
4158
|
+
const teamEntry = param
|
|
4159
|
+
? teams.find((t) => {
|
|
4160
|
+
const td = t.team || t;
|
|
4161
|
+
return td.abbreviation?.toLowerCase() === param ||
|
|
4162
|
+
td.slug?.toLowerCase() === param ||
|
|
4163
|
+
td.displayName?.toLowerCase().includes(param);
|
|
4164
|
+
})
|
|
4165
|
+
: teams[0];
|
|
4166
|
+
if (teamEntry) {
|
|
4167
|
+
const td = teamEntry.team || teamEntry;
|
|
4168
|
+
const emoji = espnSportEmoji(sport, league);
|
|
4169
|
+
const content = `# ${emoji} ${td.displayName}\n\n**League:** ${league.toUpperCase()}\n\n*For live scores and standings, use:*\n- \`webpeel "https://espn.com/${league}/scoreboard"\`\n- \`webpeel "https://espn.com/${league}/standings"\``;
|
|
4170
|
+
return {
|
|
4171
|
+
domain,
|
|
4172
|
+
type: 'team',
|
|
4173
|
+
structured: { sport, league, teamName: td.displayName, abbreviation: td.abbreviation },
|
|
4174
|
+
cleanContent: content,
|
|
4175
|
+
};
|
|
4176
|
+
}
|
|
4177
|
+
}
|
|
4178
|
+
catch (e) {
|
|
4179
|
+
if (process.env.DEBUG)
|
|
4180
|
+
console.debug('[webpeel]', 'ESPN team fetch failed:', e instanceof Error ? e.message : e);
|
|
4181
|
+
}
|
|
4182
|
+
// Fallback to scoreboard
|
|
4183
|
+
}
|
|
4184
|
+
// Default: scoreboard
|
|
4185
|
+
const content = await fetchEspnScoreboard(sport, league);
|
|
4186
|
+
if (!content)
|
|
4187
|
+
return null;
|
|
4188
|
+
return {
|
|
4189
|
+
domain,
|
|
4190
|
+
type: 'scoreboard',
|
|
4191
|
+
structured: { sport, league, dataType: 'scoreboard' },
|
|
4192
|
+
cleanContent: content,
|
|
4193
|
+
};
|
|
4194
|
+
}
|
|
4195
|
+
// ---------------------------------------------------------------------------
|
|
4196
|
+
// 37. Sports betting sites — helpful redirect message
|
|
4197
|
+
// ---------------------------------------------------------------------------
|
|
4198
|
+
async function sportsBettingExtractor(_html, url) {
|
|
4199
|
+
let brandName = 'Sports Betting Site';
|
|
4200
|
+
let domain = 'sportsbook';
|
|
4201
|
+
try {
|
|
4202
|
+
const hostname = new URL(url).hostname.replace('www.', '').replace('sportsbook.', '');
|
|
4203
|
+
domain = hostname;
|
|
4204
|
+
if (hostname.includes('draftkings'))
|
|
4205
|
+
brandName = 'DraftKings Sportsbook';
|
|
4206
|
+
else if (hostname.includes('fanduel'))
|
|
4207
|
+
brandName = 'FanDuel Sportsbook';
|
|
4208
|
+
else if (hostname.includes('betmgm'))
|
|
4209
|
+
brandName = 'BetMGM Sportsbook';
|
|
4210
|
+
}
|
|
4211
|
+
catch { /* ignore */ }
|
|
4212
|
+
const cleanContent = `# ⚠️ ${brandName}
|
|
4213
|
+
|
|
4214
|
+
${brandName} requires authentication and geo-verification. WebPeel cannot scrape live odds directly.
|
|
4215
|
+
|
|
4216
|
+
**For live sports odds, use these alternatives:**
|
|
4217
|
+
- \`webpeel "https://espn.com/nba/scoreboard"\` — Live scores and schedules
|
|
4218
|
+
- \`webpeel "https://polymarket.com"\` — Prediction market prices
|
|
4219
|
+
- The Odds API (theOddsApi.com) — Aggregated odds from all sportsbooks (requires API key)
|
|
4220
|
+
|
|
4221
|
+
**For team schedules and standings:**
|
|
4222
|
+
- \`webpeel "https://espn.com/nba/standings"\` — NBA standings
|
|
4223
|
+
- \`webpeel "https://espn.com/nfl/scoreboard"\` — NFL scores
|
|
4224
|
+
- \`webpeel "https://espn.com/mlb/scoreboard"\` — MLB scores`;
|
|
4225
|
+
return {
|
|
4226
|
+
domain,
|
|
4227
|
+
type: 'blocked',
|
|
4228
|
+
structured: { site: brandName, reason: 'authentication and geo-verification required' },
|
|
4229
|
+
cleanContent,
|
|
4230
|
+
};
|
|
4231
|
+
}
|
|
4232
|
+
// ---------------------------------------------------------------------------
|
|
4233
|
+
// Semantic Scholar extractor (Semantic Scholar API — free, no key needed)
|
|
4234
|
+
// ---------------------------------------------------------------------------
|
|
4235
|
+
async function semanticScholarExtractor(_html, url) {
|
|
4236
|
+
const urlObj = new URL(url);
|
|
4237
|
+
const path = urlObj.pathname;
|
|
4238
|
+
const domain = 'semanticscholar.org';
|
|
4239
|
+
// --- Paper page: /paper/<title-slug>/<paperId> ---
|
|
4240
|
+
const paperMatch = path.match(/^\/paper\/(?:[^/]+\/)?([a-f0-9]{40})/i);
|
|
4241
|
+
if (paperMatch) {
|
|
4242
|
+
const paperId = paperMatch[1];
|
|
4243
|
+
try {
|
|
4244
|
+
const fields = 'title,abstract,authors,year,citationCount,referenceCount,url,openAccessPdf,venue,publicationDate,tldr';
|
|
4245
|
+
const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/${paperId}?fields=${fields}`;
|
|
4246
|
+
const data = await fetchJson(apiUrl);
|
|
4247
|
+
if (!data)
|
|
4248
|
+
return null;
|
|
4249
|
+
// Rate limited — return null so pipeline falls back to browser rendering
|
|
4250
|
+
if (data.code === '429' || (data.message && String(data.message).includes('Too Many Requests'))) {
|
|
4251
|
+
return null;
|
|
4252
|
+
}
|
|
4253
|
+
if (!data.title)
|
|
4254
|
+
return null;
|
|
4255
|
+
const authors = data.authors || [];
|
|
4256
|
+
const authorNames = authors.map((a) => a.name);
|
|
4257
|
+
const authorLine = authorNames.length <= 5
|
|
4258
|
+
? authorNames.join(', ')
|
|
4259
|
+
: `${authorNames.slice(0, 5).join(', ')} (+${authorNames.length - 5} more)`;
|
|
4260
|
+
const pdfObj = data.openAccessPdf;
|
|
4261
|
+
const pdfUrl = pdfObj?.url || null;
|
|
4262
|
+
const tldrText = data.tldr?.text || null;
|
|
4263
|
+
const citations = data.citationCount;
|
|
4264
|
+
const citStr = citations != null ? citations.toLocaleString() : '?';
|
|
4265
|
+
const structured = {
|
|
4266
|
+
paperId,
|
|
4267
|
+
title: data.title,
|
|
4268
|
+
authors: authorNames,
|
|
4269
|
+
year: data.year,
|
|
4270
|
+
venue: data.venue,
|
|
4271
|
+
citationCount: data.citationCount,
|
|
4272
|
+
referenceCount: data.referenceCount,
|
|
4273
|
+
abstract: data.abstract,
|
|
4274
|
+
tldr: tldrText,
|
|
4275
|
+
pdfUrl,
|
|
4276
|
+
url: data.url,
|
|
4277
|
+
publicationDate: data.publicationDate,
|
|
4278
|
+
};
|
|
4279
|
+
const lines = [
|
|
4280
|
+
`# 📄 ${data.title}`,
|
|
4281
|
+
'',
|
|
4282
|
+
`**Authors:** ${authorLine}`,
|
|
4283
|
+
`**Year:** ${data.year || '?'} | **Venue:** ${data.venue || 'N/A'} | **Citations:** ${citStr}`,
|
|
4284
|
+
];
|
|
4285
|
+
if (data.referenceCount != null)
|
|
4286
|
+
lines.push(`**References:** ${data.referenceCount.toLocaleString()}`);
|
|
4287
|
+
if (tldrText) {
|
|
4288
|
+
lines.push('', '## TL;DR', '', tldrText);
|
|
4289
|
+
}
|
|
4290
|
+
if (data.abstract) {
|
|
4291
|
+
lines.push('', '## Abstract', '', data.abstract);
|
|
4292
|
+
}
|
|
4293
|
+
lines.push('');
|
|
4294
|
+
if (pdfUrl)
|
|
4295
|
+
lines.push(`**PDF:** [Open Access](${pdfUrl})`);
|
|
4296
|
+
lines.push(`**Link:** [Semantic Scholar](${data.url || `https://www.semanticscholar.org/paper/${paperId}`})`);
|
|
4297
|
+
return {
|
|
4298
|
+
domain,
|
|
4299
|
+
type: 'paper',
|
|
4300
|
+
structured,
|
|
4301
|
+
cleanContent: lines.join('\n'),
|
|
4302
|
+
};
|
|
4303
|
+
}
|
|
4304
|
+
catch (e) {
|
|
4305
|
+
if (process.env.DEBUG)
|
|
4306
|
+
console.debug('[webpeel]', 'Semantic Scholar paper API failed:', e instanceof Error ? e.message : e);
|
|
4307
|
+
return null;
|
|
4308
|
+
}
|
|
4309
|
+
}
|
|
4310
|
+
// --- Search page: /search?q=... ---
|
|
4311
|
+
const query = urlObj.searchParams.get('q') || urlObj.searchParams.get('query');
|
|
4312
|
+
if (path === '/search' || path.startsWith('/search/')) {
|
|
4313
|
+
if (!query)
|
|
4314
|
+
return null;
|
|
4315
|
+
try {
|
|
4316
|
+
const fields = 'title,authors,year,citationCount,url,openAccessPdf';
|
|
4317
|
+
const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/search?query=${encodeURIComponent(query)}&limit=10&fields=${fields}`;
|
|
4318
|
+
const data = await fetchJson(apiUrl);
|
|
4319
|
+
// Rate limited or no data — return null so pipeline falls back to browser rendering
|
|
4320
|
+
if (!data)
|
|
4321
|
+
return null;
|
|
4322
|
+
if (data.code === '429' || (data.message && String(data.message).includes('Too Many Requests'))) {
|
|
4323
|
+
return null;
|
|
4324
|
+
}
|
|
4325
|
+
if (!Array.isArray(data.data))
|
|
4326
|
+
return null;
|
|
4327
|
+
const papers = data.data;
|
|
4328
|
+
const total = data.total || 0;
|
|
4329
|
+
const rows = papers.map((p, i) => {
|
|
4330
|
+
const authors = p.authors || [];
|
|
4331
|
+
const authorLine = authors.length === 0 ? '—'
|
|
4332
|
+
: authors.length === 1 ? authors[0].name
|
|
4333
|
+
: `${authors[0].name} et al.`;
|
|
4334
|
+
const paperUrl = p.url || `https://www.semanticscholar.org/paper/${p.paperId}`;
|
|
4335
|
+
const cits = p.citationCount != null ? p.citationCount.toLocaleString() : '?';
|
|
4336
|
+
return `| ${i + 1} | [${p.title}](${paperUrl}) | ${p.year || '?'} | ${cits} | ${authorLine} |`;
|
|
4337
|
+
}).join('\n');
|
|
4338
|
+
const cleanContent = [
|
|
4339
|
+
`# 🔍 Semantic Scholar — "${query}"`,
|
|
4340
|
+
'',
|
|
4341
|
+
'| # | Paper | Year | Citations | Authors |',
|
|
4342
|
+
'|---|-------|------|-----------|---------|',
|
|
4343
|
+
rows,
|
|
4344
|
+
'',
|
|
4345
|
+
`*Source: Semantic Scholar API · Total results: ${total.toLocaleString()}*`,
|
|
4346
|
+
].join('\n');
|
|
4347
|
+
return {
|
|
4348
|
+
domain,
|
|
4349
|
+
type: 'search',
|
|
4350
|
+
structured: { query, total, papers },
|
|
4351
|
+
cleanContent,
|
|
4352
|
+
};
|
|
4353
|
+
}
|
|
4354
|
+
catch (e) {
|
|
4355
|
+
if (process.env.DEBUG)
|
|
4356
|
+
console.debug('[webpeel]', 'Semantic Scholar search API failed:', e instanceof Error ? e.message : e);
|
|
4357
|
+
return null;
|
|
4358
|
+
}
|
|
4359
|
+
}
|
|
4360
|
+
return null;
|
|
4361
|
+
}
|
|
4362
|
+
// ---------------------------------------------------------------------------
|
|
4363
|
+
// PubMed extractor (NCBI E-utilities API — free, no key needed)
|
|
4364
|
+
// ---------------------------------------------------------------------------
|
|
4365
|
+
async function pubmedExtractor(_html, url) {
|
|
4366
|
+
const urlObj = new URL(url);
|
|
4367
|
+
const path = urlObj.pathname;
|
|
4368
|
+
const domain = 'pubmed.ncbi.nlm.nih.gov';
|
|
4369
|
+
// --- Article page: /XXXXXX/ or /XXXXXX ---
|
|
4370
|
+
const pmidMatch = path.match(/^\/(\d+)\/?$/);
|
|
4371
|
+
if (pmidMatch) {
|
|
4372
|
+
const pmid = pmidMatch[1];
|
|
4373
|
+
try {
|
|
4374
|
+
// Fetch summary
|
|
4375
|
+
const summaryUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json`;
|
|
4376
|
+
const summaryData = await fetchJson(summaryUrl);
|
|
4377
|
+
if (!summaryData?.result)
|
|
4378
|
+
return null;
|
|
4379
|
+
const result = summaryData.result;
|
|
4380
|
+
const article = result[pmid];
|
|
4381
|
+
if (!article)
|
|
4382
|
+
return null;
|
|
4383
|
+
// Fetch abstract via efetch
|
|
4384
|
+
let abstract = '';
|
|
4385
|
+
try {
|
|
4386
|
+
const efetchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=${pmid}&retmode=xml&rettype=abstract`;
|
|
4387
|
+
const efetchResult = await simpleFetch(efetchUrl, 'WebPeel/0.21', 15000, { Accept: 'application/xml' });
|
|
4388
|
+
if (efetchResult?.html) {
|
|
4389
|
+
const abstractMatch = efetchResult.html.match(/<AbstractText[^>]*>([\s\S]*?)<\/AbstractText>/g);
|
|
4390
|
+
if (abstractMatch) {
|
|
4391
|
+
abstract = abstractMatch.map((m) => {
|
|
4392
|
+
const labelMatch = m.match(/Label="([^"]+)"/);
|
|
4393
|
+
const textMatch = m.match(/<AbstractText[^>]*>([\s\S]*?)<\/AbstractText>/);
|
|
4394
|
+
const text = textMatch ? stripHtml(textMatch[1]).trim() : '';
|
|
4395
|
+
return labelMatch ? `**${labelMatch[1]}:** ${text}` : text;
|
|
4396
|
+
}).join('\n\n');
|
|
4397
|
+
}
|
|
4398
|
+
}
|
|
4399
|
+
}
|
|
4400
|
+
catch { /* abstract is optional */ }
|
|
4401
|
+
const authors = article.authors || [];
|
|
4402
|
+
const authorNames = authors.filter(a => a.authtype !== 'CollectiveName').map(a => a.name);
|
|
4403
|
+
const authorLine = authorNames.length <= 6
|
|
4404
|
+
? authorNames.join(', ')
|
|
4405
|
+
: `${authorNames.slice(0, 6).join(', ')} et al.`;
|
|
4406
|
+
const doi = article.elocationid?.replace(/^doi:\s*/i, '') || null;
|
|
4407
|
+
const pubDate = article.pubdate || '?';
|
|
4408
|
+
const journal = article.source || '?';
|
|
4409
|
+
const volume = article.volume ? ` ${article.volume}` : '';
|
|
4410
|
+
const issue = article.issue ? `(${article.issue})` : '';
|
|
4411
|
+
const pages = article.pages ? `:${article.pages}` : '';
|
|
4412
|
+
const structured = {
|
|
4413
|
+
pmid,
|
|
4414
|
+
title: article.title,
|
|
4415
|
+
authors: authorNames,
|
|
4416
|
+
journal,
|
|
4417
|
+
pubDate,
|
|
4418
|
+
volume: article.volume,
|
|
4419
|
+
issue: article.issue,
|
|
4420
|
+
pages: article.pages,
|
|
4421
|
+
doi,
|
|
4422
|
+
abstract: abstract || undefined,
|
|
4423
|
+
url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
|
|
4424
|
+
};
|
|
4425
|
+
const lines = [
|
|
4426
|
+
`# 🧬 ${article.title}`,
|
|
4427
|
+
'',
|
|
4428
|
+
`**Authors:** ${authorLine}`,
|
|
4429
|
+
`**Journal:** *${journal}*${volume}${issue}${pages} (${pubDate})`,
|
|
4430
|
+
`**PMID:** ${pmid}`,
|
|
4431
|
+
];
|
|
4432
|
+
if (doi)
|
|
4433
|
+
lines.push(`**DOI:** [${doi}](https://doi.org/${doi})`);
|
|
4434
|
+
if (abstract) {
|
|
4435
|
+
lines.push('', '## Abstract', '', abstract);
|
|
4436
|
+
}
|
|
4437
|
+
lines.push('', `**Link:** [PubMed](https://pubmed.ncbi.nlm.nih.gov/${pmid}/)`);
|
|
4438
|
+
return {
|
|
4439
|
+
domain,
|
|
4440
|
+
type: 'article',
|
|
4441
|
+
structured,
|
|
4442
|
+
cleanContent: lines.join('\n'),
|
|
4443
|
+
};
|
|
4444
|
+
}
|
|
4445
|
+
catch (e) {
|
|
4446
|
+
if (process.env.DEBUG)
|
|
4447
|
+
console.debug('[webpeel]', 'PubMed article API failed:', e instanceof Error ? e.message : e);
|
|
4448
|
+
return null;
|
|
4449
|
+
}
|
|
4450
|
+
}
|
|
4451
|
+
// --- Search page: /?term=... or /?query=... ---
|
|
4452
|
+
const term = urlObj.searchParams.get('term') || urlObj.searchParams.get('query');
|
|
4453
|
+
if (term) {
|
|
4454
|
+
try {
|
|
4455
|
+
// Step 1: search for IDs
|
|
4456
|
+
const searchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=${encodeURIComponent(term)}&retmode=json&retmax=10`;
|
|
4457
|
+
const searchData = await fetchJson(searchUrl);
|
|
4458
|
+
if (!searchData?.esearchresult)
|
|
4459
|
+
return null;
|
|
4460
|
+
const esearch = searchData.esearchresult;
|
|
4461
|
+
const ids = esearch.idlist || [];
|
|
4462
|
+
const total = parseInt(esearch.count || '0', 10);
|
|
4463
|
+
if (ids.length === 0) {
|
|
4464
|
+
return {
|
|
4465
|
+
domain,
|
|
4466
|
+
type: 'search',
|
|
4467
|
+
structured: { query: term, total: 0, articles: [] },
|
|
4468
|
+
cleanContent: `# 🔍 PubMed — "${term}"\n\n*No results found.*`,
|
|
4469
|
+
};
|
|
4470
|
+
}
|
|
4471
|
+
// Step 2: fetch summaries
|
|
4472
|
+
const summaryUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${ids.join(',')}&retmode=json`;
|
|
4473
|
+
const summaryData = await fetchJson(summaryUrl);
|
|
4474
|
+
if (!summaryData?.result)
|
|
4475
|
+
return null;
|
|
4476
|
+
const result = summaryData.result;
|
|
4477
|
+
const articles = (result.uids || ids).map((id) => {
|
|
4478
|
+
const a = result[id];
|
|
4479
|
+
if (!a)
|
|
4480
|
+
return null;
|
|
4481
|
+
const authors = a.authors || [];
|
|
4482
|
+
return {
|
|
4483
|
+
pmid: id,
|
|
4484
|
+
title: a.title,
|
|
4485
|
+
journal: a.source,
|
|
4486
|
+
pubDate: a.pubdate,
|
|
4487
|
+
authors: authors.map(x => x.name),
|
|
4488
|
+
doi: a.elocationid?.replace(/^doi:\s*/i, '') || null,
|
|
4489
|
+
};
|
|
4490
|
+
}).filter(Boolean);
|
|
4491
|
+
const rows = articles.map((a, i) => {
|
|
4492
|
+
const authorLine = a.authors.length === 0 ? '—'
|
|
4493
|
+
: a.authors.length === 1 ? a.authors[0]
|
|
4494
|
+
: `${a.authors[0]} et al.`;
|
|
4495
|
+
const link = `https://pubmed.ncbi.nlm.nih.gov/${a.pmid}/`;
|
|
4496
|
+
return `| ${i + 1} | [${a.title}](${link}) | *${a.journal}* | ${a.pubDate} | ${authorLine} |`;
|
|
4497
|
+
}).join('\n');
|
|
4498
|
+
const cleanContent = [
|
|
4499
|
+
`# 🔍 PubMed — "${term}"`,
|
|
4500
|
+
'',
|
|
4501
|
+
'| # | Article | Journal | Date | Authors |',
|
|
4502
|
+
'|---|---------|---------|------|---------|',
|
|
4503
|
+
rows,
|
|
4504
|
+
'',
|
|
4505
|
+
`*Source: NCBI PubMed E-utilities · Total results: ${total.toLocaleString()}*`,
|
|
4506
|
+
].join('\n');
|
|
4507
|
+
return {
|
|
4508
|
+
domain,
|
|
4509
|
+
type: 'search',
|
|
4510
|
+
structured: { query: term, total, articles },
|
|
4511
|
+
cleanContent,
|
|
4512
|
+
};
|
|
4513
|
+
}
|
|
4514
|
+
catch (e) {
|
|
4515
|
+
if (process.env.DEBUG)
|
|
4516
|
+
console.debug('[webpeel]', 'PubMed search API failed:', e instanceof Error ? e.message : e);
|
|
4517
|
+
return null;
|
|
4518
|
+
}
|
|
4519
|
+
}
|
|
4520
|
+
return null;
|
|
4521
|
+
}
|
|
4522
|
+
// ---------------------------------------------------------------------------
|
|
4523
|
+
// 38. CoinGecko extractor — crypto prices via free CoinGecko API
|
|
4524
|
+
// ---------------------------------------------------------------------------
|
|
4525
|
+
async function coinGeckoExtractor(_html, url) {
|
|
4526
|
+
const urlObj = new URL(url);
|
|
4527
|
+
const path = urlObj.pathname;
|
|
4528
|
+
const domain = 'coingecko.com';
|
|
4529
|
+
const cgHeaders = {
|
|
4530
|
+
'Accept': 'application/json',
|
|
4531
|
+
'User-Agent': 'webpeel/0.21 (https://webpeel.dev)',
|
|
4532
|
+
};
|
|
4533
|
+
// Helper: compact number formatting
|
|
4534
|
+
const fmtMoney = (v) => {
|
|
4535
|
+
if (v == null || isNaN(v))
|
|
4536
|
+
return '?';
|
|
4537
|
+
if (v >= 1_000_000_000_000)
|
|
4538
|
+
return `$${(v / 1_000_000_000_000).toFixed(2)}T`;
|
|
4539
|
+
if (v >= 1_000_000_000)
|
|
4540
|
+
return `$${(v / 1_000_000_000).toFixed(2)}B`;
|
|
4541
|
+
if (v >= 1_000_000)
|
|
4542
|
+
return `$${(v / 1_000_000).toFixed(2)}M`;
|
|
4543
|
+
return `$${v.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
|
4544
|
+
};
|
|
4545
|
+
const fmtPrice = (v) => {
|
|
4546
|
+
if (v == null || isNaN(v))
|
|
4547
|
+
return '?';
|
|
4548
|
+
if (v >= 1000)
|
|
4549
|
+
return `$${v.toLocaleString('en-US', { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
|
4550
|
+
if (v >= 1)
|
|
4551
|
+
return `$${v.toFixed(4)}`;
|
|
4552
|
+
return `$${v.toFixed(8)}`;
|
|
4553
|
+
};
|
|
4554
|
+
const fmtChange = (c) => {
|
|
4555
|
+
if (c == null || isNaN(c))
|
|
4556
|
+
return '?';
|
|
4557
|
+
const sign = c >= 0 ? '+' : '';
|
|
4558
|
+
return `${sign}${c.toFixed(1)}%`;
|
|
4559
|
+
};
|
|
4560
|
+
// Coin detail page: /en/coins/<coin-id>
|
|
4561
|
+
const coinMatch = path.match(/^\/en\/coins\/([^/?#]+)\/?/);
|
|
4562
|
+
if (coinMatch) {
|
|
4563
|
+
const coinId = coinMatch[1].toLowerCase();
|
|
4564
|
+
try {
|
|
4565
|
+
const apiUrl = `https://api.coingecko.com/api/v3/coins/${encodeURIComponent(coinId)}?localization=false&tickers=false&community_data=false&developer_data=false`;
|
|
4566
|
+
const data = await fetchJson(apiUrl, cgHeaders);
|
|
4567
|
+
if (!data || data.error)
|
|
4568
|
+
return null;
|
|
4569
|
+
const md = data.market_data || {};
|
|
4570
|
+
const price = md.current_price?.usd;
|
|
4571
|
+
const change24h = md.price_change_percentage_24h;
|
|
4572
|
+
const change7d = md.price_change_percentage_7d;
|
|
4573
|
+
const marketCap = md.market_cap?.usd;
|
|
4574
|
+
const volume = md.total_volume?.usd;
|
|
4575
|
+
const ath = md.ath?.usd;
|
|
4576
|
+
const circulatingSupply = md.circulating_supply;
|
|
4577
|
+
const maxSupply = md.max_supply;
|
|
4578
|
+
const name = data.name || coinId;
|
|
4579
|
+
const symbol = (data.symbol || '').toUpperCase();
|
|
4580
|
+
const description = data.description?.en?.replace(/<[^>]+>/g, '').split('\r\n')[0]?.slice(0, 500) || '';
|
|
4581
|
+
const updatedAt = data.last_updated || new Date().toISOString();
|
|
4582
|
+
const structuredData = {
|
|
4583
|
+
id: coinId,
|
|
4584
|
+
name,
|
|
4585
|
+
symbol,
|
|
4586
|
+
price_usd: price,
|
|
4587
|
+
change_24h: change24h,
|
|
4588
|
+
change_7d: change7d,
|
|
4589
|
+
market_cap_usd: marketCap,
|
|
4590
|
+
volume_24h_usd: volume,
|
|
4591
|
+
ath_usd: ath,
|
|
4592
|
+
circulating_supply: circulatingSupply,
|
|
4593
|
+
max_supply: maxSupply,
|
|
4594
|
+
last_updated: updatedAt,
|
|
4595
|
+
};
|
|
4596
|
+
let cleanContent = `# 🪙 ${name} (${symbol})\n\n`;
|
|
4597
|
+
cleanContent += `## Quote\n`;
|
|
4598
|
+
cleanContent += `- **Price:** ${fmtPrice(price)}\n`;
|
|
4599
|
+
cleanContent += `- **24h Change:** ${fmtChange(change24h)}\n`;
|
|
4600
|
+
if (change7d != null)
|
|
4601
|
+
cleanContent += `- **7d Change:** ${fmtChange(change7d)}\n`;
|
|
4602
|
+
cleanContent += `- **Market Cap:** ${fmtMoney(marketCap)}\n`;
|
|
4603
|
+
cleanContent += `- **24h Volume:** ${fmtMoney(volume)}\n`;
|
|
4604
|
+
if (ath != null)
|
|
4605
|
+
cleanContent += `- **All-Time High:** ${fmtPrice(ath)}\n`;
|
|
4606
|
+
if (circulatingSupply) {
|
|
4607
|
+
const supply = circulatingSupply >= 1_000_000_000
|
|
4608
|
+
? `${(circulatingSupply / 1_000_000_000).toFixed(2)}B`
|
|
4609
|
+
: circulatingSupply >= 1_000_000
|
|
4610
|
+
? `${(circulatingSupply / 1_000_000).toFixed(2)}M`
|
|
4611
|
+
: circulatingSupply.toLocaleString();
|
|
4612
|
+
cleanContent += `- **Circulating Supply:** ${supply} ${symbol}\n`;
|
|
4613
|
+
}
|
|
4614
|
+
if (description) {
|
|
4615
|
+
cleanContent += `\n## Description\n${description}\n`;
|
|
4616
|
+
}
|
|
4617
|
+
cleanContent += `\n---\n*Source: CoinGecko API · Updated: ${updatedAt}*`;
|
|
4618
|
+
return { domain, type: 'coin', structured: structuredData, cleanContent };
|
|
4619
|
+
}
|
|
4620
|
+
catch (e) {
|
|
4621
|
+
if (process.env.DEBUG)
|
|
4622
|
+
console.debug('[webpeel]', 'CoinGecko coin API failed:', e instanceof Error ? e.message : e);
|
|
4623
|
+
return null;
|
|
4624
|
+
}
|
|
4625
|
+
}
|
|
4626
|
+
// Main page / markets overview: coingecko.com or coingecko.com/en
|
|
4627
|
+
try {
|
|
4628
|
+
const apiUrl = `https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=15&page=1`;
|
|
4629
|
+
const coins = await fetchJson(apiUrl, cgHeaders);
|
|
4630
|
+
if (!Array.isArray(coins) || coins.length === 0)
|
|
4631
|
+
return null;
|
|
4632
|
+
const rows = coins.slice(0, 15).map((c, i) => {
|
|
4633
|
+
const change = c.price_change_percentage_24h;
|
|
4634
|
+
const changeStr = change != null ? `${change >= 0 ? '+' : ''}${change.toFixed(1)}%` : '?';
|
|
4635
|
+
return `| ${i + 1} | ${c.name} (${(c.symbol || '').toUpperCase()}) | ${fmtPrice(c.current_price)} | ${changeStr} | ${fmtMoney(c.market_cap)} |`;
|
|
4636
|
+
});
|
|
4637
|
+
const cleanContent = `# 🪙 CoinGecko — Top Cryptocurrencies\n\n` +
|
|
4638
|
+
`| # | Coin | Price | 24h | Market Cap |\n` +
|
|
4639
|
+
`|---|------|-------|-----|------------|\n` +
|
|
4640
|
+
rows.join('\n') +
|
|
4641
|
+
`\n\n---\n*Source: CoinGecko API · Updated: ${new Date().toISOString()}*`;
|
|
4642
|
+
return {
|
|
4643
|
+
domain,
|
|
4644
|
+
type: 'markets',
|
|
4645
|
+
structured: { coins: coins.slice(0, 15) },
|
|
4646
|
+
cleanContent,
|
|
4647
|
+
};
|
|
4648
|
+
}
|
|
4649
|
+
catch (e) {
|
|
4650
|
+
if (process.env.DEBUG)
|
|
4651
|
+
console.debug('[webpeel]', 'CoinGecko markets API failed:', e instanceof Error ? e.message : e);
|
|
4652
|
+
return null;
|
|
4653
|
+
}
|
|
4654
|
+
}
|
|
4655
|
+
// ---------------------------------------------------------------------------
|
|
4656
|
+
// 39. Weather extractor — Open-Meteo free API (no key required)
|
|
4657
|
+
// ---------------------------------------------------------------------------
|
|
4658
|
+
// Weather code descriptions (WMO)
|
|
4659
|
+
const WMO_CODES = {
|
|
4660
|
+
0: 'Clear sky', 1: 'Mainly clear', 2: 'Partly cloudy', 3: 'Overcast',
|
|
4661
|
+
45: 'Foggy', 48: 'Icy fog',
|
|
4662
|
+
51: 'Light drizzle', 53: 'Moderate drizzle', 55: 'Dense drizzle',
|
|
4663
|
+
61: 'Slight rain', 63: 'Moderate rain', 65: 'Heavy rain',
|
|
4664
|
+
71: 'Slight snow', 73: 'Moderate snow', 75: 'Heavy snow',
|
|
4665
|
+
80: 'Slight showers', 81: 'Moderate showers', 82: 'Violent showers',
|
|
4666
|
+
85: 'Slight snow showers', 86: 'Heavy snow showers',
|
|
4667
|
+
95: 'Thunderstorm', 96: 'Thunderstorm w/ hail', 99: 'Thunderstorm w/ heavy hail',
|
|
4668
|
+
};
|
|
4669
|
+
const WEATHER_ICONS = {
|
|
4670
|
+
0: '☀️', 1: '🌤️', 2: '⛅', 3: '☁️',
|
|
4671
|
+
45: '🌫️', 48: '🌫️',
|
|
4672
|
+
51: '🌦️', 53: '🌦️', 55: '🌧️',
|
|
4673
|
+
61: '🌦️', 63: '🌧️', 65: '🌧️',
|
|
4674
|
+
71: '🌨️', 73: '❄️', 75: '❄️',
|
|
4675
|
+
80: '🌦️', 81: '🌧️', 82: '⛈️',
|
|
4676
|
+
85: '🌨️', 86: '❄️',
|
|
4677
|
+
95: '⛈️', 96: '⛈️', 99: '⛈️',
|
|
4678
|
+
};
|
|
4679
|
+
// Default city coordinates for common weather sites
|
|
4680
|
+
const DEFAULT_CITY = { name: 'New York City', lat: 40.7128, lon: -74.0060, tz: 'America/New_York' };
|
|
4681
|
+
async function weatherExtractor(_html, url) {
|
|
4682
|
+
const urlObj = new URL(url);
|
|
4683
|
+
const hostname = urlObj.hostname;
|
|
4684
|
+
// Determine lat/lon from URL params (for open-meteo.com direct API links)
|
|
4685
|
+
let lat = null;
|
|
4686
|
+
let lon = null;
|
|
4687
|
+
let cityName = DEFAULT_CITY.name;
|
|
4688
|
+
let timezone = DEFAULT_CITY.tz;
|
|
4689
|
+
if (hostname.includes('open-meteo.com')) {
|
|
4690
|
+
const latParam = urlObj.searchParams.get('latitude');
|
|
4691
|
+
const lonParam = urlObj.searchParams.get('longitude');
|
|
4692
|
+
const tzParam = urlObj.searchParams.get('timezone');
|
|
4693
|
+
if (latParam && lonParam) {
|
|
4694
|
+
lat = parseFloat(latParam);
|
|
4695
|
+
lon = parseFloat(lonParam);
|
|
4696
|
+
cityName = `${lat.toFixed(2)}°N, ${lon.toFixed(2)}°E`;
|
|
4697
|
+
if (tzParam)
|
|
4698
|
+
timezone = tzParam;
|
|
4699
|
+
}
|
|
4700
|
+
}
|
|
4701
|
+
// For weather.com / accuweather: try to extract city from URL path
|
|
4702
|
+
if (hostname.includes('weather.com') || hostname.includes('accuweather.com')) {
|
|
4703
|
+
const path = urlObj.pathname;
|
|
4704
|
+
// weather.com: /weather/today/l/40.71,-74.01:4:US or similar
|
|
4705
|
+
const coordMatch = path.match(/\/l\/(-?\d+\.?\d*),(-?\d+\.?\d*)/);
|
|
4706
|
+
if (coordMatch) {
|
|
4707
|
+
lat = parseFloat(coordMatch[1]);
|
|
4708
|
+
lon = parseFloat(coordMatch[2]);
|
|
4709
|
+
cityName = `${lat.toFixed(2)}, ${lon.toFixed(2)}`;
|
|
4710
|
+
}
|
|
4711
|
+
}
|
|
4712
|
+
// Default to NYC if no coords found
|
|
4713
|
+
if (lat == null || lon == null) {
|
|
4714
|
+
lat = DEFAULT_CITY.lat;
|
|
4715
|
+
lon = DEFAULT_CITY.lon;
|
|
4716
|
+
cityName = DEFAULT_CITY.name;
|
|
4717
|
+
timezone = DEFAULT_CITY.tz;
|
|
4718
|
+
}
|
|
4719
|
+
try {
|
|
4720
|
+
const apiUrl = `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lon}¤t=temperature_2m,relative_humidity_2m,wind_speed_10m,weather_code&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,weather_code&timezone=${encodeURIComponent(timezone)}&forecast_days=7`;
|
|
4721
|
+
const data = await fetchJson(apiUrl);
|
|
4722
|
+
if (!data || data.error)
|
|
4723
|
+
return null;
|
|
4724
|
+
const current = data.current || {};
|
|
4725
|
+
const daily = data.daily || {};
|
|
4726
|
+
const tempC = current.temperature_2m;
|
|
4727
|
+
const tempF = tempC != null ? Math.round(tempC * 9 / 5 + 32) : null;
|
|
4728
|
+
const humidity = current.relative_humidity_2m;
|
|
4729
|
+
const wind = current.wind_speed_10m;
|
|
4730
|
+
const wCode = current.weather_code;
|
|
4731
|
+
const condition = WMO_CODES[wCode] || 'Unknown';
|
|
4732
|
+
const icon = WEATHER_ICONS[wCode] || '🌡️';
|
|
4733
|
+
let cleanContent = `# ${icon} Weather Forecast — ${cityName}\n\n`;
|
|
4734
|
+
if (tempC != null) {
|
|
4735
|
+
cleanContent += `**Current:** ${tempC}°C (${tempF}°F)`;
|
|
4736
|
+
if (wind != null)
|
|
4737
|
+
cleanContent += `, Wind: ${wind} km/h`;
|
|
4738
|
+
if (humidity != null)
|
|
4739
|
+
cleanContent += `, Humidity: ${humidity}%`;
|
|
4740
|
+
cleanContent += `, ${condition}\n\n`;
|
|
4741
|
+
}
|
|
4742
|
+
if (daily.time?.length) {
|
|
4743
|
+
cleanContent += `| Date | Low | High | Precip | Condition |\n`;
|
|
4744
|
+
cleanContent += `|------|-----|------|--------|----------|\n`;
|
|
4745
|
+
for (let i = 0; i < Math.min(daily.time.length, 7); i++) {
|
|
4746
|
+
const date = daily.time[i];
|
|
4747
|
+
const low = daily.temperature_2m_min?.[i];
|
|
4748
|
+
const high = daily.temperature_2m_max?.[i];
|
|
4749
|
+
const precip = daily.precipitation_sum?.[i];
|
|
4750
|
+
const dayCode = daily.weather_code?.[i];
|
|
4751
|
+
const dayIcon = WEATHER_ICONS[dayCode] || '';
|
|
4752
|
+
const dayCondition = WMO_CODES[dayCode] || '';
|
|
4753
|
+
const lowStr = low != null ? `${low}°C` : '?';
|
|
4754
|
+
const highStr = high != null ? `${high}°C` : '?';
|
|
4755
|
+
const precipStr = precip != null ? `${precip}mm` : '0mm';
|
|
4756
|
+
cleanContent += `| ${date} | ${lowStr} | ${highStr} | ${precipStr} | ${dayIcon} ${dayCondition} |\n`;
|
|
4757
|
+
}
|
|
4758
|
+
}
|
|
4759
|
+
cleanContent += `\n---\n*Source: Open-Meteo API · Coordinates: ${lat}, ${lon} · Updated: ${data.current?.time || new Date().toISOString()}*`;
|
|
4760
|
+
return {
|
|
4761
|
+
domain: 'open-meteo.com',
|
|
4762
|
+
type: 'forecast',
|
|
4763
|
+
structured: {
|
|
4764
|
+
city: cityName,
|
|
4765
|
+
lat,
|
|
4766
|
+
lon,
|
|
4767
|
+
timezone,
|
|
4768
|
+
current: {
|
|
4769
|
+
temperature_c: tempC,
|
|
4770
|
+
temperature_f: tempF,
|
|
4771
|
+
humidity,
|
|
4772
|
+
wind_speed_kmh: wind,
|
|
4773
|
+
condition,
|
|
4774
|
+
weather_code: wCode,
|
|
4775
|
+
},
|
|
4776
|
+
daily: daily,
|
|
4777
|
+
},
|
|
4778
|
+
cleanContent,
|
|
4779
|
+
};
|
|
4780
|
+
}
|
|
4781
|
+
catch (e) {
|
|
4782
|
+
if (process.env.DEBUG)
|
|
4783
|
+
console.debug('[webpeel]', 'Weather API failed:', e instanceof Error ? e.message : e);
|
|
4784
|
+
return null;
|
|
4785
|
+
}
|
|
4786
|
+
}
|