@apitap/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +60 -0
- package/README.md +362 -0
- package/SKILL.md +270 -0
- package/dist/auth/crypto.d.ts +31 -0
- package/dist/auth/crypto.js +66 -0
- package/dist/auth/crypto.js.map +1 -0
- package/dist/auth/handoff.d.ts +29 -0
- package/dist/auth/handoff.js +180 -0
- package/dist/auth/handoff.js.map +1 -0
- package/dist/auth/manager.d.ts +46 -0
- package/dist/auth/manager.js +127 -0
- package/dist/auth/manager.js.map +1 -0
- package/dist/auth/oauth-refresh.d.ts +16 -0
- package/dist/auth/oauth-refresh.js +91 -0
- package/dist/auth/oauth-refresh.js.map +1 -0
- package/dist/auth/refresh.d.ts +43 -0
- package/dist/auth/refresh.js +217 -0
- package/dist/auth/refresh.js.map +1 -0
- package/dist/capture/anti-bot.d.ts +15 -0
- package/dist/capture/anti-bot.js +43 -0
- package/dist/capture/anti-bot.js.map +1 -0
- package/dist/capture/blocklist.d.ts +6 -0
- package/dist/capture/blocklist.js +70 -0
- package/dist/capture/blocklist.js.map +1 -0
- package/dist/capture/body-diff.d.ts +8 -0
- package/dist/capture/body-diff.js +102 -0
- package/dist/capture/body-diff.js.map +1 -0
- package/dist/capture/body-variables.d.ts +13 -0
- package/dist/capture/body-variables.js +142 -0
- package/dist/capture/body-variables.js.map +1 -0
- package/dist/capture/domain.d.ts +8 -0
- package/dist/capture/domain.js +34 -0
- package/dist/capture/domain.js.map +1 -0
- package/dist/capture/entropy.d.ts +33 -0
- package/dist/capture/entropy.js +100 -0
- package/dist/capture/entropy.js.map +1 -0
- package/dist/capture/filter.d.ts +11 -0
- package/dist/capture/filter.js +49 -0
- package/dist/capture/filter.js.map +1 -0
- package/dist/capture/graphql.d.ts +21 -0
- package/dist/capture/graphql.js +99 -0
- package/dist/capture/graphql.js.map +1 -0
- package/dist/capture/idle.d.ts +23 -0
- package/dist/capture/idle.js +44 -0
- package/dist/capture/idle.js.map +1 -0
- package/dist/capture/monitor.d.ts +26 -0
- package/dist/capture/monitor.js +183 -0
- package/dist/capture/monitor.js.map +1 -0
- package/dist/capture/oauth-detector.d.ts +18 -0
- package/dist/capture/oauth-detector.js +96 -0
- package/dist/capture/oauth-detector.js.map +1 -0
- package/dist/capture/pagination.d.ts +9 -0
- package/dist/capture/pagination.js +40 -0
- package/dist/capture/pagination.js.map +1 -0
- package/dist/capture/parameterize.d.ts +17 -0
- package/dist/capture/parameterize.js +63 -0
- package/dist/capture/parameterize.js.map +1 -0
- package/dist/capture/scrubber.d.ts +5 -0
- package/dist/capture/scrubber.js +38 -0
- package/dist/capture/scrubber.js.map +1 -0
- package/dist/capture/session.d.ts +46 -0
- package/dist/capture/session.js +445 -0
- package/dist/capture/session.js.map +1 -0
- package/dist/capture/token-detector.d.ts +16 -0
- package/dist/capture/token-detector.js +62 -0
- package/dist/capture/token-detector.js.map +1 -0
- package/dist/capture/verifier.d.ts +17 -0
- package/dist/capture/verifier.js +147 -0
- package/dist/capture/verifier.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +930 -0
- package/dist/cli.js.map +1 -0
- package/dist/discovery/auth.d.ts +17 -0
- package/dist/discovery/auth.js +81 -0
- package/dist/discovery/auth.js.map +1 -0
- package/dist/discovery/fetch.d.ts +17 -0
- package/dist/discovery/fetch.js +59 -0
- package/dist/discovery/fetch.js.map +1 -0
- package/dist/discovery/frameworks.d.ts +11 -0
- package/dist/discovery/frameworks.js +249 -0
- package/dist/discovery/frameworks.js.map +1 -0
- package/dist/discovery/index.d.ts +21 -0
- package/dist/discovery/index.js +219 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/openapi.d.ts +13 -0
- package/dist/discovery/openapi.js +175 -0
- package/dist/discovery/openapi.js.map +1 -0
- package/dist/discovery/probes.d.ts +9 -0
- package/dist/discovery/probes.js +70 -0
- package/dist/discovery/probes.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect/report.d.ts +52 -0
- package/dist/inspect/report.js +191 -0
- package/dist/inspect/report.js.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.js +526 -0
- package/dist/mcp.js.map +1 -0
- package/dist/orchestration/browse.d.ts +38 -0
- package/dist/orchestration/browse.js +198 -0
- package/dist/orchestration/browse.js.map +1 -0
- package/dist/orchestration/cache.d.ts +15 -0
- package/dist/orchestration/cache.js +24 -0
- package/dist/orchestration/cache.js.map +1 -0
- package/dist/plugin.d.ts +17 -0
- package/dist/plugin.js +158 -0
- package/dist/plugin.js.map +1 -0
- package/dist/read/decoders/deepwiki.d.ts +2 -0
- package/dist/read/decoders/deepwiki.js +148 -0
- package/dist/read/decoders/deepwiki.js.map +1 -0
- package/dist/read/decoders/grokipedia.d.ts +2 -0
- package/dist/read/decoders/grokipedia.js +210 -0
- package/dist/read/decoders/grokipedia.js.map +1 -0
- package/dist/read/decoders/hackernews.d.ts +2 -0
- package/dist/read/decoders/hackernews.js +168 -0
- package/dist/read/decoders/hackernews.js.map +1 -0
- package/dist/read/decoders/index.d.ts +2 -0
- package/dist/read/decoders/index.js +12 -0
- package/dist/read/decoders/index.js.map +1 -0
- package/dist/read/decoders/reddit.d.ts +2 -0
- package/dist/read/decoders/reddit.js +142 -0
- package/dist/read/decoders/reddit.js.map +1 -0
- package/dist/read/decoders/twitter.d.ts +12 -0
- package/dist/read/decoders/twitter.js +187 -0
- package/dist/read/decoders/twitter.js.map +1 -0
- package/dist/read/decoders/wikipedia.d.ts +2 -0
- package/dist/read/decoders/wikipedia.js +66 -0
- package/dist/read/decoders/wikipedia.js.map +1 -0
- package/dist/read/decoders/youtube.d.ts +2 -0
- package/dist/read/decoders/youtube.js +69 -0
- package/dist/read/decoders/youtube.js.map +1 -0
- package/dist/read/extract.d.ts +25 -0
- package/dist/read/extract.js +320 -0
- package/dist/read/extract.js.map +1 -0
- package/dist/read/index.d.ts +14 -0
- package/dist/read/index.js +66 -0
- package/dist/read/index.js.map +1 -0
- package/dist/read/peek.d.ts +9 -0
- package/dist/read/peek.js +137 -0
- package/dist/read/peek.js.map +1 -0
- package/dist/read/types.d.ts +44 -0
- package/dist/read/types.js +3 -0
- package/dist/read/types.js.map +1 -0
- package/dist/replay/engine.d.ts +53 -0
- package/dist/replay/engine.js +441 -0
- package/dist/replay/engine.js.map +1 -0
- package/dist/replay/truncate.d.ts +16 -0
- package/dist/replay/truncate.js +92 -0
- package/dist/replay/truncate.js.map +1 -0
- package/dist/serve.d.ts +31 -0
- package/dist/serve.js +149 -0
- package/dist/serve.js.map +1 -0
- package/dist/skill/generator.d.ts +44 -0
- package/dist/skill/generator.js +419 -0
- package/dist/skill/generator.js.map +1 -0
- package/dist/skill/importer.d.ts +26 -0
- package/dist/skill/importer.js +80 -0
- package/dist/skill/importer.js.map +1 -0
- package/dist/skill/search.d.ts +19 -0
- package/dist/skill/search.js +51 -0
- package/dist/skill/search.js.map +1 -0
- package/dist/skill/signing.d.ts +16 -0
- package/dist/skill/signing.js +34 -0
- package/dist/skill/signing.js.map +1 -0
- package/dist/skill/ssrf.d.ts +27 -0
- package/dist/skill/ssrf.js +210 -0
- package/dist/skill/ssrf.js.map +1 -0
- package/dist/skill/store.d.ts +7 -0
- package/dist/skill/store.js +93 -0
- package/dist/skill/store.js.map +1 -0
- package/dist/stats/report.d.ts +26 -0
- package/dist/stats/report.js +157 -0
- package/dist/stats/report.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
- package/src/auth/crypto.ts +92 -0
- package/src/auth/handoff.ts +229 -0
- package/src/auth/manager.ts +140 -0
- package/src/auth/oauth-refresh.ts +120 -0
- package/src/auth/refresh.ts +300 -0
- package/src/capture/anti-bot.ts +63 -0
- package/src/capture/blocklist.ts +75 -0
- package/src/capture/body-diff.ts +109 -0
- package/src/capture/body-variables.ts +156 -0
- package/src/capture/domain.ts +34 -0
- package/src/capture/entropy.ts +121 -0
- package/src/capture/filter.ts +56 -0
- package/src/capture/graphql.ts +124 -0
- package/src/capture/idle.ts +45 -0
- package/src/capture/monitor.ts +224 -0
- package/src/capture/oauth-detector.ts +106 -0
- package/src/capture/pagination.ts +49 -0
- package/src/capture/parameterize.ts +68 -0
- package/src/capture/scrubber.ts +49 -0
- package/src/capture/session.ts +502 -0
- package/src/capture/token-detector.ts +76 -0
- package/src/capture/verifier.ts +171 -0
- package/src/cli.ts +1031 -0
- package/src/discovery/auth.ts +99 -0
- package/src/discovery/fetch.ts +85 -0
- package/src/discovery/frameworks.ts +231 -0
- package/src/discovery/index.ts +256 -0
- package/src/discovery/openapi.ts +230 -0
- package/src/discovery/probes.ts +76 -0
- package/src/index.ts +26 -0
- package/src/inspect/report.ts +247 -0
- package/src/mcp.ts +618 -0
- package/src/orchestration/browse.ts +250 -0
- package/src/orchestration/cache.ts +37 -0
- package/src/plugin.ts +188 -0
- package/src/read/decoders/deepwiki.ts +180 -0
- package/src/read/decoders/grokipedia.ts +246 -0
- package/src/read/decoders/hackernews.ts +198 -0
- package/src/read/decoders/index.ts +15 -0
- package/src/read/decoders/reddit.ts +158 -0
- package/src/read/decoders/twitter.ts +211 -0
- package/src/read/decoders/wikipedia.ts +75 -0
- package/src/read/decoders/youtube.ts +75 -0
- package/src/read/extract.ts +396 -0
- package/src/read/index.ts +78 -0
- package/src/read/peek.ts +175 -0
- package/src/read/types.ts +37 -0
- package/src/replay/engine.ts +559 -0
- package/src/replay/truncate.ts +116 -0
- package/src/serve.ts +189 -0
- package/src/skill/generator.ts +473 -0
- package/src/skill/importer.ts +107 -0
- package/src/skill/search.ts +76 -0
- package/src/skill/signing.ts +36 -0
- package/src/skill/ssrf.ts +238 -0
- package/src/skill/store.ts +107 -0
- package/src/stats/report.ts +208 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// src/read/decoders/twitter.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_API_BASE = 'https://api.fxtwitter.com';
|
|
6
|
+
|
|
7
|
+
function estimateTokens(text: string): number {
|
|
8
|
+
return Math.ceil(text.length / 4);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Twitter/X decoder — uses fxtwitter.com public API as side channel.
|
|
13
|
+
*
|
|
14
|
+
* Handles:
|
|
15
|
+
* - Individual tweets/posts (with text, media, quotes, articles)
|
|
16
|
+
* - Profile URLs (basic profile info)
|
|
17
|
+
*
|
|
18
|
+
* fxtwitter API returns full tweet JSON including embedded articles,
|
|
19
|
+
* media URLs, quote tweets, and engagement metrics — all without auth.
|
|
20
|
+
*/
|
|
21
|
+
export const twitterDecoder: Decoder = {
|
|
22
|
+
name: 'twitter',
|
|
23
|
+
patterns: [
|
|
24
|
+
/(?:twitter\.com|x\.com)\/\w+\/status\/\d+/,
|
|
25
|
+
/(?:twitter\.com|x\.com)\/(\w+)\/?$/,
|
|
26
|
+
],
|
|
27
|
+
|
|
28
|
+
async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
29
|
+
try {
|
|
30
|
+
const apiBase = options._apiBaseUrl || DEFAULT_API_BASE;
|
|
31
|
+
|
|
32
|
+
// Tweet/status URL
|
|
33
|
+
const statusMatch = url.match(/(?:twitter\.com|x\.com)\/(\w+)\/status\/(\d+)/);
|
|
34
|
+
if (statusMatch) {
|
|
35
|
+
return decodeTweet(apiBase, statusMatch[1], statusMatch[2], url, options);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Profile URL
|
|
39
|
+
const profileMatch = url.match(/(?:twitter\.com|x\.com)\/(\w+)\/?$/);
|
|
40
|
+
if (profileMatch && !isReservedPath(profileMatch[1])) {
|
|
41
|
+
return decodeProfile(apiBase, profileMatch[1], url, options);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return null;
|
|
45
|
+
} catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
function isReservedPath(path: string): boolean {
|
|
52
|
+
const reserved = new Set([
|
|
53
|
+
'home', 'explore', 'search', 'notifications', 'messages',
|
|
54
|
+
'settings', 'i', 'compose', 'hashtag', 'login', 'signup',
|
|
55
|
+
]);
|
|
56
|
+
return reserved.has(path.toLowerCase());
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function decodeTweet(
|
|
60
|
+
apiBase: string,
|
|
61
|
+
username: string,
|
|
62
|
+
tweetId: string,
|
|
63
|
+
url: string,
|
|
64
|
+
options: { skipSsrf?: boolean; [key: string]: any },
|
|
65
|
+
): Promise<ReadResult | null> {
|
|
66
|
+
const apiUrl = `${apiBase}/${username}/status/${tweetId}`;
|
|
67
|
+
const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf });
|
|
68
|
+
if (!result || result.status !== 200) return null;
|
|
69
|
+
|
|
70
|
+
let data: any;
|
|
71
|
+
try {
|
|
72
|
+
data = JSON.parse(result.body);
|
|
73
|
+
} catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const tweet = data?.tweet;
|
|
78
|
+
if (!tweet) return null;
|
|
79
|
+
|
|
80
|
+
const author = tweet.author;
|
|
81
|
+
const authorName = author ? `${author.name} (@${author.screen_name})` : username;
|
|
82
|
+
|
|
83
|
+
// Build content
|
|
84
|
+
const parts: string[] = [];
|
|
85
|
+
|
|
86
|
+
// Author info
|
|
87
|
+
if (author) {
|
|
88
|
+
parts.push(`**${author.name}** (@${author.screen_name})`);
|
|
89
|
+
if (author.description) parts.push(author.description);
|
|
90
|
+
if (author.followers) parts.push(`Followers: ${Number(author.followers).toLocaleString()}`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Tweet text
|
|
94
|
+
const tweetText = tweet.text || tweet.raw_text?.text || '';
|
|
95
|
+
if (tweetText) parts.push(`\n${tweetText}`);
|
|
96
|
+
|
|
97
|
+
// Engagement
|
|
98
|
+
const engagement: string[] = [];
|
|
99
|
+
if (tweet.likes) engagement.push(`${Number(tweet.likes).toLocaleString()} likes`);
|
|
100
|
+
if (tweet.retweets) engagement.push(`${Number(tweet.retweets).toLocaleString()} RTs`);
|
|
101
|
+
if (tweet.views) engagement.push(`${Number(tweet.views).toLocaleString()} views`);
|
|
102
|
+
if (tweet.bookmarks) engagement.push(`${Number(tweet.bookmarks).toLocaleString()} bookmarks`);
|
|
103
|
+
if (engagement.length > 0) parts.push(engagement.join(' · '));
|
|
104
|
+
|
|
105
|
+
// Embedded article (X Articles / long-form posts)
|
|
106
|
+
if (tweet.article) {
|
|
107
|
+
const article = tweet.article;
|
|
108
|
+
parts.push(`\n## ${article.title || 'Article'}`);
|
|
109
|
+
|
|
110
|
+
if (article.content?.blocks) {
|
|
111
|
+
const articleText = extractArticleBlocks(article.content.blocks);
|
|
112
|
+
parts.push(articleText);
|
|
113
|
+
} else if (article.preview_text) {
|
|
114
|
+
parts.push(article.preview_text);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Quote tweet
|
|
119
|
+
if (tweet.quote) {
|
|
120
|
+
const q = tweet.quote;
|
|
121
|
+
const qAuthor = q.author ? `${q.author.name} (@${q.author.screen_name})` : 'Unknown';
|
|
122
|
+
const qText = q.text || '';
|
|
123
|
+
parts.push(`\n> Quoting ${qAuthor}:\n> ${qText}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const content = parts.join('\n');
|
|
127
|
+
|
|
128
|
+
// Links
|
|
129
|
+
const links: Array<{ text: string; href: string }> = [];
|
|
130
|
+
if (author?.website?.url) {
|
|
131
|
+
links.push({ text: author.website.display_url || 'Website', href: author.website.url });
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Images
|
|
135
|
+
const images: Array<{ alt: string; src: string }> = [];
|
|
136
|
+
if (tweet.media?.photos) {
|
|
137
|
+
for (const photo of tweet.media.photos.slice(0, 4)) {
|
|
138
|
+
images.push({ alt: 'Tweet image', src: photo.url });
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
if (tweet.article?.cover_media?.media_info?.original_img_url) {
|
|
142
|
+
images.push({
|
|
143
|
+
alt: tweet.article.title || 'Article cover',
|
|
144
|
+
src: tweet.article.cover_media.media_info.original_img_url,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
url,
|
|
150
|
+
title: tweet.article?.title || (tweetText ? `${authorName}: ${tweetText.slice(0, 80)}${tweetText.length > 80 ? '…' : ''}` : `Tweet by ${authorName}`),
|
|
151
|
+
author: author?.name || username,
|
|
152
|
+
description: tweetText?.slice(0, 200) || null,
|
|
153
|
+
content,
|
|
154
|
+
links,
|
|
155
|
+
images,
|
|
156
|
+
metadata: {
|
|
157
|
+
type: tweet.article ? 'article' : 'social',
|
|
158
|
+
publishedAt: tweet.created_at || null,
|
|
159
|
+
source: 'twitter-fxtwitter',
|
|
160
|
+
canonical: url,
|
|
161
|
+
siteName: 'X (Twitter)',
|
|
162
|
+
},
|
|
163
|
+
cost: { tokens: estimateTokens(content) },
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function extractArticleBlocks(blocks: any[]): string {
|
|
168
|
+
const parts: string[] = [];
|
|
169
|
+
for (const block of blocks) {
|
|
170
|
+
const text = block.text || '';
|
|
171
|
+
if (!text) continue;
|
|
172
|
+
|
|
173
|
+
switch (block.type) {
|
|
174
|
+
case 'header-one':
|
|
175
|
+
parts.push(`\n# ${text}`);
|
|
176
|
+
break;
|
|
177
|
+
case 'header-two':
|
|
178
|
+
parts.push(`\n## ${text}`);
|
|
179
|
+
break;
|
|
180
|
+
case 'header-three':
|
|
181
|
+
parts.push(`\n### ${text}`);
|
|
182
|
+
break;
|
|
183
|
+
case 'unordered-list-item':
|
|
184
|
+
parts.push(`• ${text}`);
|
|
185
|
+
break;
|
|
186
|
+
case 'ordered-list-item':
|
|
187
|
+
parts.push(`1. ${text}`);
|
|
188
|
+
break;
|
|
189
|
+
case 'blockquote':
|
|
190
|
+
parts.push(`> ${text}`);
|
|
191
|
+
break;
|
|
192
|
+
default:
|
|
193
|
+
parts.push(text);
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return parts.join('\n');
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async function decodeProfile(
|
|
201
|
+
apiBase: string,
|
|
202
|
+
username: string,
|
|
203
|
+
url: string,
|
|
204
|
+
options: { skipSsrf?: boolean; [key: string]: any },
|
|
205
|
+
): Promise<ReadResult | null> {
|
|
206
|
+
// fxtwitter doesn't have a dedicated profile endpoint,
|
|
207
|
+
// but we can get profile data from any tweet by the user.
|
|
208
|
+
// For now, return null and let generic decoder handle profiles.
|
|
209
|
+
// Profile data is included in tweet responses anyway.
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// src/read/decoders/wikipedia.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
function estimateTokens(text: string): number {
|
|
6
|
+
return Math.ceil(text.length / 4);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export const wikipediaDecoder: Decoder = {
|
|
10
|
+
name: 'wikipedia',
|
|
11
|
+
patterns: [
|
|
12
|
+
/([a-z]{2,3})\.wikipedia\.org\/wiki\/([^#?]+)/,
|
|
13
|
+
],
|
|
14
|
+
|
|
15
|
+
async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
16
|
+
try {
|
|
17
|
+
const match = url.match(/([a-z]{2,3})\.wikipedia\.org\/wiki\/([^#?]+)/);
|
|
18
|
+
if (!match) return null;
|
|
19
|
+
|
|
20
|
+
const lang = match[1];
|
|
21
|
+
const title = match[2];
|
|
22
|
+
|
|
23
|
+
const apiBase = options._apiBaseUrl || `https://${lang}.wikipedia.org`;
|
|
24
|
+
const apiUrl = `${apiBase}/api/rest_v1/page/summary/${title}`;
|
|
25
|
+
|
|
26
|
+
const result = await safeFetch(apiUrl, { skipSsrf: options.skipSsrf });
|
|
27
|
+
if (!result || result.status !== 200) return null;
|
|
28
|
+
|
|
29
|
+
let data: any;
|
|
30
|
+
try {
|
|
31
|
+
data = JSON.parse(result.body);
|
|
32
|
+
} catch {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (!data) return null;
|
|
37
|
+
|
|
38
|
+
const articleTitle = data.title || data.displaytitle || decodeURIComponent(title);
|
|
39
|
+
const extract = data.extract || '';
|
|
40
|
+
const description = data.description || null;
|
|
41
|
+
|
|
42
|
+
const content = extract;
|
|
43
|
+
|
|
44
|
+
const links: Array<{ text: string; href: string }> = [];
|
|
45
|
+
if (data.content_urls?.desktop?.page) {
|
|
46
|
+
links.push({ text: 'Full article', href: data.content_urls.desktop.page });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const images: Array<{ alt: string; src: string }> = [];
|
|
50
|
+
if (data.thumbnail?.source) {
|
|
51
|
+
images.push({ alt: articleTitle, src: data.thumbnail.source });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
url,
|
|
56
|
+
title: articleTitle,
|
|
57
|
+
author: null,
|
|
58
|
+
description,
|
|
59
|
+
content,
|
|
60
|
+
links,
|
|
61
|
+
images,
|
|
62
|
+
metadata: {
|
|
63
|
+
type: 'article',
|
|
64
|
+
publishedAt: data.timestamp || null,
|
|
65
|
+
source: 'wikipedia-rest',
|
|
66
|
+
canonical: data.content_urls?.desktop?.page || null,
|
|
67
|
+
siteName: 'Wikipedia',
|
|
68
|
+
},
|
|
69
|
+
cost: { tokens: estimateTokens(content) },
|
|
70
|
+
};
|
|
71
|
+
} catch {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
};
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// src/read/decoders/youtube.ts
|
|
2
|
+
import type { Decoder, ReadResult } from '../types.js';
|
|
3
|
+
import { safeFetch } from '../../discovery/fetch.js';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_OEMBED_BASE = 'https://noembed.com';
|
|
6
|
+
|
|
7
|
+
function estimateTokens(text: string): number {
|
|
8
|
+
return Math.ceil(text.length / 4);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const youtubeDecoder: Decoder = {
|
|
12
|
+
name: 'youtube',
|
|
13
|
+
patterns: [
|
|
14
|
+
/youtube\.com\/watch\?v=/,
|
|
15
|
+
/youtu\.be\//,
|
|
16
|
+
],
|
|
17
|
+
|
|
18
|
+
async decode(url: string, options: { skipSsrf?: boolean; [key: string]: any } = {}): Promise<ReadResult | null> {
|
|
19
|
+
try {
|
|
20
|
+
const base = options._oembedBaseUrl || DEFAULT_OEMBED_BASE;
|
|
21
|
+
const oembedUrl = `${base}/embed?url=${encodeURIComponent(url)}`;
|
|
22
|
+
|
|
23
|
+
const result = await safeFetch(oembedUrl, { skipSsrf: options.skipSsrf });
|
|
24
|
+
if (!result || result.status !== 200) return null;
|
|
25
|
+
|
|
26
|
+
let data: any;
|
|
27
|
+
try {
|
|
28
|
+
data = JSON.parse(result.body);
|
|
29
|
+
} catch {
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (!data || !data.title) return null;
|
|
34
|
+
|
|
35
|
+
const title = data.title || null;
|
|
36
|
+
const author = data.author_name || null;
|
|
37
|
+
|
|
38
|
+
const contentParts: string[] = [];
|
|
39
|
+
if (title) contentParts.push(`Title: ${title}`);
|
|
40
|
+
if (author) contentParts.push(`Author: ${author}`);
|
|
41
|
+
if (data.author_url) contentParts.push(`Channel: ${data.author_url}`);
|
|
42
|
+
const content = contentParts.join('\n');
|
|
43
|
+
|
|
44
|
+
const links: Array<{ text: string; href: string }> = [];
|
|
45
|
+
if (data.author_url) {
|
|
46
|
+
links.push({ text: author || 'Channel', href: data.author_url });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const images: Array<{ alt: string; src: string }> = [];
|
|
50
|
+
if (data.thumbnail_url) {
|
|
51
|
+
images.push({ alt: title || 'Thumbnail', src: data.thumbnail_url });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
url,
|
|
56
|
+
title,
|
|
57
|
+
author,
|
|
58
|
+
description: author ? `Video by ${author}` : 'YouTube video',
|
|
59
|
+
content,
|
|
60
|
+
links,
|
|
61
|
+
images,
|
|
62
|
+
metadata: {
|
|
63
|
+
type: 'video',
|
|
64
|
+
publishedAt: null,
|
|
65
|
+
source: 'youtube-oembed',
|
|
66
|
+
canonical: url,
|
|
67
|
+
siteName: 'YouTube',
|
|
68
|
+
},
|
|
69
|
+
cost: { tokens: estimateTokens(content) },
|
|
70
|
+
};
|
|
71
|
+
} catch {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
};
|