webpeel 0.20.21 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Transcript export endpoint
3
+ *
4
+ * GET /v1/transcript/export?url=<youtube_url>&format=srt|txt|md|json
5
+ *
6
+ * Downloads a YouTube transcript in the requested format with appropriate
7
+ * Content-Type and Content-Disposition headers.
8
+ */
9
+ import { Router } from 'express';
10
+ import crypto from 'crypto';
11
+ import { getYouTubeTranscript, parseYouTubeUrl } from '../../core/youtube.js';
12
+ import { toSRT, toTXT, toMarkdownDoc, toJSON } from '../../core/transcript-export.js';
13
+ // Valid export format values
14
+ const VALID_FORMATS = ['srt', 'txt', 'md', 'json'];
15
+ // Content-Type and file extension per format
16
+ const FORMAT_META = {
17
+ srt: { contentType: 'text/plain; charset=utf-8', ext: 'srt' },
18
+ txt: { contentType: 'text/plain; charset=utf-8', ext: 'txt' },
19
+ md: { contentType: 'text/markdown; charset=utf-8', ext: 'md' },
20
+ json: { contentType: 'application/json; charset=utf-8', ext: 'json' },
21
+ };
22
+ /**
23
+ * Sanitise a video title so it is safe to use as a filename.
24
+ * Strips special characters, collapses spaces to underscores, truncates to 80 chars.
25
+ */
26
+ function safeFilename(title, fallback) {
27
+ const base = (title || fallback)
28
+ .replace(/[^\w\s\-._]/g, '')
29
+ .replace(/\s+/g, '_')
30
+ .replace(/_+/g, '_')
31
+ .slice(0, 80)
32
+ .replace(/^_+|_+$/g, '');
33
+ return base || fallback;
34
+ }
35
+ export function createTranscriptExportRouter() {
36
+ const router = Router();
37
+ /**
38
+ * GET /v1/transcript/export
39
+ *
40
+ * Query params:
41
+ * url - YouTube video URL (required)
42
+ * format - Output format: srt | txt | md | json (default: txt)
43
+ * language - Preferred transcript language code, e.g. "en" (default: "en")
44
+ *
45
+ * Response:
46
+ * - 200 file download with appropriate Content-Type / Content-Disposition
47
+ * - 400 invalid URL or format
48
+ * - 401 missing API key
49
+ * - 404 video has no captions
50
+ * - 500 extraction failure
51
+ */
52
+ router.get('/v1/transcript/export', async (req, res) => {
53
+ // ── Auth ───────────────────────────────────────────────────────────────
54
+ const authId = req.auth?.keyInfo?.accountId || req.user?.userId;
55
+ if (!authId) {
56
+ res.status(401).json({
57
+ success: false,
58
+ error: {
59
+ type: 'authentication_required',
60
+ message: 'API key required. Get one at https://app.webpeel.dev/keys',
61
+ hint: 'Pass your API key in the Authorization header: Bearer <key>',
62
+ docs: 'https://webpeel.dev/docs/errors#authentication-required',
63
+ },
64
+ requestId: req.requestId || crypto.randomUUID(),
65
+ });
66
+ return;
67
+ }
68
+ const { url, format, language } = req.query;
69
+ // ── URL validation ─────────────────────────────────────────────────────
70
+ if (!url || typeof url !== 'string') {
71
+ res.status(400).json({
72
+ success: false,
73
+ error: {
74
+ type: 'invalid_request',
75
+ message: 'Missing or invalid "url" parameter. Pass a YouTube URL: GET /v1/transcript/export?url=https://youtu.be/VIDEO_ID&format=srt',
76
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
77
+ },
78
+ requestId: req.requestId || crypto.randomUUID(),
79
+ });
80
+ return;
81
+ }
82
+ const videoId = parseYouTubeUrl(url);
83
+ if (!videoId) {
84
+ res.status(400).json({
85
+ success: false,
86
+ error: {
87
+ type: 'invalid_youtube_url',
88
+ message: 'The provided URL is not a valid YouTube video URL.',
89
+ hint: 'Supported formats: https://www.youtube.com/watch?v=VIDEO_ID, https://youtu.be/VIDEO_ID',
90
+ docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
91
+ },
92
+ requestId: req.requestId || crypto.randomUUID(),
93
+ });
94
+ return;
95
+ }
96
+ // ── Format validation ──────────────────────────────────────────────────
97
+ const rawFormat = (typeof format === 'string' ? format : 'txt').toLowerCase();
98
+ if (!VALID_FORMATS.includes(rawFormat)) {
99
+ res.status(400).json({
100
+ success: false,
101
+ error: {
102
+ type: 'invalid_format',
103
+ message: `Invalid format "${format}". Supported formats: ${VALID_FORMATS.join(', ')}`,
104
+ docs: 'https://webpeel.dev/docs/errors#invalid-format',
105
+ },
106
+ requestId: req.requestId || crypto.randomUUID(),
107
+ });
108
+ return;
109
+ }
110
+ const fmt = rawFormat;
111
+ // ── Extract transcript ─────────────────────────────────────────────────
112
+ try {
113
+ const lang = typeof language === 'string' ? language : 'en';
114
+ const transcript = await getYouTubeTranscript(url, { language: lang });
115
+ // ── Convert to requested format ──────────────────────────────────────
116
+ let content;
117
+ switch (fmt) {
118
+ case 'srt':
119
+ content = toSRT(transcript.segments);
120
+ break;
121
+ case 'txt':
122
+ content = toTXT(transcript.segments);
123
+ break;
124
+ case 'md':
125
+ content = toMarkdownDoc(transcript.title, transcript.channel, transcript.segments);
126
+ break;
127
+ case 'json':
128
+ content = toJSON(transcript);
129
+ break;
130
+ }
131
+ const { contentType, ext } = FORMAT_META[fmt];
132
+ const filename = safeFilename(transcript.title, videoId);
133
+ res.setHeader('Content-Type', contentType);
134
+ res.setHeader('Content-Disposition', `attachment; filename="${filename}.${ext}"`);
135
+ res.send(content);
136
+ }
137
+ catch (error) {
138
+ const message = error?.message ?? 'Failed to extract YouTube transcript';
139
+ if (message.includes('No captions available')) {
140
+ res.status(404).json({
141
+ success: false,
142
+ error: {
143
+ type: 'no_captions',
144
+ message: 'No captions are available for this video. The video may not have subtitles enabled.',
145
+ hint: 'Try a different video or check if captions are enabled on YouTube.',
146
+ docs: 'https://webpeel.dev/docs/errors#no-captions',
147
+ },
148
+ videoId,
149
+ requestId: req.requestId || crypto.randomUUID(),
150
+ });
151
+ return;
152
+ }
153
+ if (message.includes('Not a valid YouTube URL')) {
154
+ res.status(400).json({
155
+ success: false,
156
+ error: {
157
+ type: 'invalid_youtube_url',
158
+ message,
159
+ docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
160
+ },
161
+ requestId: req.requestId || crypto.randomUUID(),
162
+ });
163
+ return;
164
+ }
165
+ res.status(500).json({
166
+ success: false,
167
+ error: {
168
+ type: 'extraction_failed',
169
+ message: 'Failed to extract YouTube transcript. The video page may have changed or the video is unavailable.',
170
+ hint: process.env.NODE_ENV !== 'production' ? message : undefined,
171
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
172
+ },
173
+ requestId: req.requestId || crypto.randomUUID(),
174
+ });
175
+ }
176
+ });
177
+ return router;
178
+ }
@@ -671,7 +671,10 @@ export function createUserRouter() {
671
671
  router.post('/v1/keys', jwtAuth, async (req, res) => {
672
672
  try {
673
673
  const { userId } = req.user;
674
- const { name, expiresIn } = req.body;
674
+ const { name, expiresIn, scope } = req.body;
675
+ // Validate scope — only allow known values; default to 'full'
676
+ const validScopes = ['full', 'read', 'restricted'];
677
+ const keyScope = validScopes.includes(scope) ? scope : 'full';
675
678
  // Parse optional expiration
676
679
  const expiresAt = parseExpiresIn(expiresIn);
677
680
  // Generate API key
@@ -679,15 +682,16 @@ export function createUserRouter() {
679
682
  const keyHash = crypto.createHash('sha256').update(apiKey).digest('hex');
680
683
  const keyPrefix = PostgresAuthStore.getKeyPrefix(apiKey);
681
684
  // Store API key
682
- const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at)
683
- VALUES ($1, $2, $3, $4, $5)
684
- RETURNING id, key_prefix, name, created_at, expires_at`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt]);
685
+ const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at, scope)
686
+ VALUES ($1, $2, $3, $4, $5, $6)
687
+ RETURNING id, key_prefix, name, created_at, expires_at, scope`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt, keyScope]);
685
688
  const key = result.rows[0];
686
689
  res.status(201).json({
687
690
  id: key.id,
688
691
  key: apiKey, // SECURITY: Only returned once
689
692
  prefix: key.key_prefix,
690
693
  name: key.name,
694
+ scope: key.scope,
691
695
  createdAt: key.created_at,
692
696
  expiresAt: key.expires_at,
693
697
  });
@@ -731,7 +735,7 @@ export function createUserRouter() {
731
735
  router.get('/v1/keys', jwtAuth, async (req, res) => {
732
736
  try {
733
737
  const { userId } = req.user;
734
- const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at
738
+ const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at, scope
735
739
  FROM api_keys
736
740
  WHERE user_id = $1
737
741
  ORDER BY created_at DESC`, [userId]);
@@ -745,6 +749,7 @@ export function createUserRouter() {
745
749
  prefix: key.key_prefix,
746
750
  name: key.name,
747
751
  isActive: key.is_active,
752
+ scope: key.scope || 'full',
748
753
  createdAt: key.created_at,
749
754
  lastUsedAt: key.last_used_at,
750
755
  expiresAt: key.expires_at,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.20.21",
3
+ "version": "0.21.1",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",