webpeel 0.20.21 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/fetch.js +51 -0
- package/dist/core/dns-cache.js +15 -0
- package/dist/core/http-fetch.js +4 -2
- package/dist/core/pipeline.d.ts +2 -0
- package/dist/core/pipeline.js +14 -2
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/server/app.js +32 -4
- package/dist/server/auth-store.d.ts +1 -0
- package/dist/server/middleware/auth.d.ts +7 -0
- package/dist/server/middleware/auth.js +4 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/pg-auth-store.d.ts +7 -0
- package/dist/server/pg-auth-store.js +32 -0
- package/dist/server/routes/activity.js +5 -0
- package/dist/server/routes/fetch.js +22 -0
- package/dist/server/routes/reader.js +5 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/users.js +10 -5
- package/package.json +1 -1
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript export endpoint
|
|
3
|
+
*
|
|
4
|
+
* GET /v1/transcript/export?url=<youtube_url>&format=srt|txt|md|json
|
|
5
|
+
*
|
|
6
|
+
* Downloads a YouTube transcript in the requested format with appropriate
|
|
7
|
+
* Content-Type and Content-Disposition headers.
|
|
8
|
+
*/
|
|
9
|
+
import { Router } from 'express';
|
|
10
|
+
import crypto from 'crypto';
|
|
11
|
+
import { getYouTubeTranscript, parseYouTubeUrl } from '../../core/youtube.js';
|
|
12
|
+
import { toSRT, toTXT, toMarkdownDoc, toJSON } from '../../core/transcript-export.js';
|
|
13
|
+
// Valid export format values
|
|
14
|
+
const VALID_FORMATS = ['srt', 'txt', 'md', 'json'];
|
|
15
|
+
// Content-Type and file extension per format
|
|
16
|
+
const FORMAT_META = {
|
|
17
|
+
srt: { contentType: 'text/plain; charset=utf-8', ext: 'srt' },
|
|
18
|
+
txt: { contentType: 'text/plain; charset=utf-8', ext: 'txt' },
|
|
19
|
+
md: { contentType: 'text/markdown; charset=utf-8', ext: 'md' },
|
|
20
|
+
json: { contentType: 'application/json; charset=utf-8', ext: 'json' },
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Sanitise a video title so it is safe to use as a filename.
|
|
24
|
+
* Strips special characters, collapses spaces to underscores, truncates to 80 chars.
|
|
25
|
+
*/
|
|
26
|
+
function safeFilename(title, fallback) {
|
|
27
|
+
const base = (title || fallback)
|
|
28
|
+
.replace(/[^\w\s\-._]/g, '')
|
|
29
|
+
.replace(/\s+/g, '_')
|
|
30
|
+
.replace(/_+/g, '_')
|
|
31
|
+
.slice(0, 80)
|
|
32
|
+
.replace(/^_+|_+$/g, '');
|
|
33
|
+
return base || fallback;
|
|
34
|
+
}
|
|
35
|
+
export function createTranscriptExportRouter() {
|
|
36
|
+
const router = Router();
|
|
37
|
+
/**
|
|
38
|
+
* GET /v1/transcript/export
|
|
39
|
+
*
|
|
40
|
+
* Query params:
|
|
41
|
+
* url - YouTube video URL (required)
|
|
42
|
+
* format - Output format: srt | txt | md | json (default: txt)
|
|
43
|
+
* language - Preferred transcript language code, e.g. "en" (default: "en")
|
|
44
|
+
*
|
|
45
|
+
* Response:
|
|
46
|
+
* - 200 file download with appropriate Content-Type / Content-Disposition
|
|
47
|
+
* - 400 invalid URL or format
|
|
48
|
+
* - 401 missing API key
|
|
49
|
+
* - 404 video has no captions
|
|
50
|
+
* - 500 extraction failure
|
|
51
|
+
*/
|
|
52
|
+
router.get('/v1/transcript/export', async (req, res) => {
|
|
53
|
+
// ── Auth ───────────────────────────────────────────────────────────────
|
|
54
|
+
const authId = req.auth?.keyInfo?.accountId || req.user?.userId;
|
|
55
|
+
if (!authId) {
|
|
56
|
+
res.status(401).json({
|
|
57
|
+
success: false,
|
|
58
|
+
error: {
|
|
59
|
+
type: 'authentication_required',
|
|
60
|
+
message: 'API key required. Get one at https://app.webpeel.dev/keys',
|
|
61
|
+
hint: 'Pass your API key in the Authorization header: Bearer <key>',
|
|
62
|
+
docs: 'https://webpeel.dev/docs/errors#authentication-required',
|
|
63
|
+
},
|
|
64
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
65
|
+
});
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const { url, format, language } = req.query;
|
|
69
|
+
// ── URL validation ─────────────────────────────────────────────────────
|
|
70
|
+
if (!url || typeof url !== 'string') {
|
|
71
|
+
res.status(400).json({
|
|
72
|
+
success: false,
|
|
73
|
+
error: {
|
|
74
|
+
type: 'invalid_request',
|
|
75
|
+
message: 'Missing or invalid "url" parameter. Pass a YouTube URL: GET /v1/transcript/export?url=https://youtu.be/VIDEO_ID&format=srt',
|
|
76
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
77
|
+
},
|
|
78
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
79
|
+
});
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
const videoId = parseYouTubeUrl(url);
|
|
83
|
+
if (!videoId) {
|
|
84
|
+
res.status(400).json({
|
|
85
|
+
success: false,
|
|
86
|
+
error: {
|
|
87
|
+
type: 'invalid_youtube_url',
|
|
88
|
+
message: 'The provided URL is not a valid YouTube video URL.',
|
|
89
|
+
hint: 'Supported formats: https://www.youtube.com/watch?v=VIDEO_ID, https://youtu.be/VIDEO_ID',
|
|
90
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
|
|
91
|
+
},
|
|
92
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
93
|
+
});
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
// ── Format validation ──────────────────────────────────────────────────
|
|
97
|
+
const rawFormat = (typeof format === 'string' ? format : 'txt').toLowerCase();
|
|
98
|
+
if (!VALID_FORMATS.includes(rawFormat)) {
|
|
99
|
+
res.status(400).json({
|
|
100
|
+
success: false,
|
|
101
|
+
error: {
|
|
102
|
+
type: 'invalid_format',
|
|
103
|
+
message: `Invalid format "${format}". Supported formats: ${VALID_FORMATS.join(', ')}`,
|
|
104
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-format',
|
|
105
|
+
},
|
|
106
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
107
|
+
});
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
const fmt = rawFormat;
|
|
111
|
+
// ── Extract transcript ─────────────────────────────────────────────────
|
|
112
|
+
try {
|
|
113
|
+
const lang = typeof language === 'string' ? language : 'en';
|
|
114
|
+
const transcript = await getYouTubeTranscript(url, { language: lang });
|
|
115
|
+
// ── Convert to requested format ──────────────────────────────────────
|
|
116
|
+
let content;
|
|
117
|
+
switch (fmt) {
|
|
118
|
+
case 'srt':
|
|
119
|
+
content = toSRT(transcript.segments);
|
|
120
|
+
break;
|
|
121
|
+
case 'txt':
|
|
122
|
+
content = toTXT(transcript.segments);
|
|
123
|
+
break;
|
|
124
|
+
case 'md':
|
|
125
|
+
content = toMarkdownDoc(transcript.title, transcript.channel, transcript.segments);
|
|
126
|
+
break;
|
|
127
|
+
case 'json':
|
|
128
|
+
content = toJSON(transcript);
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
const { contentType, ext } = FORMAT_META[fmt];
|
|
132
|
+
const filename = safeFilename(transcript.title, videoId);
|
|
133
|
+
res.setHeader('Content-Type', contentType);
|
|
134
|
+
res.setHeader('Content-Disposition', `attachment; filename="${filename}.${ext}"`);
|
|
135
|
+
res.send(content);
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
const message = error?.message ?? 'Failed to extract YouTube transcript';
|
|
139
|
+
if (message.includes('No captions available')) {
|
|
140
|
+
res.status(404).json({
|
|
141
|
+
success: false,
|
|
142
|
+
error: {
|
|
143
|
+
type: 'no_captions',
|
|
144
|
+
message: 'No captions are available for this video. The video may not have subtitles enabled.',
|
|
145
|
+
hint: 'Try a different video or check if captions are enabled on YouTube.',
|
|
146
|
+
docs: 'https://webpeel.dev/docs/errors#no-captions',
|
|
147
|
+
},
|
|
148
|
+
videoId,
|
|
149
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
150
|
+
});
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
if (message.includes('Not a valid YouTube URL')) {
|
|
154
|
+
res.status(400).json({
|
|
155
|
+
success: false,
|
|
156
|
+
error: {
|
|
157
|
+
type: 'invalid_youtube_url',
|
|
158
|
+
message,
|
|
159
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
|
|
160
|
+
},
|
|
161
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
162
|
+
});
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
res.status(500).json({
|
|
166
|
+
success: false,
|
|
167
|
+
error: {
|
|
168
|
+
type: 'extraction_failed',
|
|
169
|
+
message: 'Failed to extract YouTube transcript. The video page may have changed or the video is unavailable.',
|
|
170
|
+
hint: process.env.NODE_ENV !== 'production' ? message : undefined,
|
|
171
|
+
docs: 'https://webpeel.dev/docs/errors#extraction-failed',
|
|
172
|
+
},
|
|
173
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
return router;
|
|
178
|
+
}
|
|
@@ -671,7 +671,10 @@ export function createUserRouter() {
|
|
|
671
671
|
router.post('/v1/keys', jwtAuth, async (req, res) => {
|
|
672
672
|
try {
|
|
673
673
|
const { userId } = req.user;
|
|
674
|
-
const { name, expiresIn } = req.body;
|
|
674
|
+
const { name, expiresIn, scope } = req.body;
|
|
675
|
+
// Validate scope — only allow known values; default to 'full'
|
|
676
|
+
const validScopes = ['full', 'read', 'restricted'];
|
|
677
|
+
const keyScope = validScopes.includes(scope) ? scope : 'full';
|
|
675
678
|
// Parse optional expiration
|
|
676
679
|
const expiresAt = parseExpiresIn(expiresIn);
|
|
677
680
|
// Generate API key
|
|
@@ -679,15 +682,16 @@ export function createUserRouter() {
|
|
|
679
682
|
const keyHash = crypto.createHash('sha256').update(apiKey).digest('hex');
|
|
680
683
|
const keyPrefix = PostgresAuthStore.getKeyPrefix(apiKey);
|
|
681
684
|
// Store API key
|
|
682
|
-
const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at)
|
|
683
|
-
VALUES ($1, $2, $3, $4, $5)
|
|
684
|
-
RETURNING id, key_prefix, name, created_at, expires_at`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt]);
|
|
685
|
+
const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at, scope)
|
|
686
|
+
VALUES ($1, $2, $3, $4, $5, $6)
|
|
687
|
+
RETURNING id, key_prefix, name, created_at, expires_at, scope`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt, keyScope]);
|
|
685
688
|
const key = result.rows[0];
|
|
686
689
|
res.status(201).json({
|
|
687
690
|
id: key.id,
|
|
688
691
|
key: apiKey, // SECURITY: Only returned once
|
|
689
692
|
prefix: key.key_prefix,
|
|
690
693
|
name: key.name,
|
|
694
|
+
scope: key.scope,
|
|
691
695
|
createdAt: key.created_at,
|
|
692
696
|
expiresAt: key.expires_at,
|
|
693
697
|
});
|
|
@@ -731,7 +735,7 @@ export function createUserRouter() {
|
|
|
731
735
|
router.get('/v1/keys', jwtAuth, async (req, res) => {
|
|
732
736
|
try {
|
|
733
737
|
const { userId } = req.user;
|
|
734
|
-
const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at
|
|
738
|
+
const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at, scope
|
|
735
739
|
FROM api_keys
|
|
736
740
|
WHERE user_id = $1
|
|
737
741
|
ORDER BY created_at DESC`, [userId]);
|
|
@@ -745,6 +749,7 @@ export function createUserRouter() {
|
|
|
745
749
|
prefix: key.key_prefix,
|
|
746
750
|
name: key.name,
|
|
747
751
|
isActive: key.is_active,
|
|
752
|
+
scope: key.scope || 'full',
|
|
748
753
|
createdAt: key.created_at,
|
|
749
754
|
lastUsedAt: key.last_used_at,
|
|
750
755
|
expiresAt: key.expires_at,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.1",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|