webpeel 0.20.21 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -205,6 +205,55 @@ export async function runFetch(url, options) {
205
205
  console.error(usageCheck.message);
206
206
  process.exit(1);
207
207
  }
208
+ // ── --export: YouTube transcript download (early exit) ────────────────
209
+ if (options.export) {
210
+ const exportFmt = options.export.toLowerCase();
211
+ const validExportFmts = ['srt', 'txt', 'md', 'json'];
212
+ if (!validExportFmts.includes(exportFmt)) {
213
+ console.error(`Error: --export format must be one of: ${validExportFmts.join(', ')}`);
214
+ process.exit(1);
215
+ }
216
+ const exportCfg = loadConfig();
217
+ const exportApiKey = exportCfg.apiKey || process.env.WEBPEEL_API_KEY;
218
+ const exportApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
219
+ if (!exportApiKey) {
220
+ console.error('No API key configured. Run: webpeel auth <your-key>');
221
+ console.error('Get a free key at: https://app.webpeel.dev/keys');
222
+ process.exit(2);
223
+ }
224
+ const lang = options.language || 'en';
225
+ const exportUrl = `${exportApiUrl}/v1/transcript/export?url=${encodeURIComponent(url)}&format=${exportFmt}&language=${lang}`;
226
+ const exportRes = await fetch(exportUrl, {
227
+ headers: { 'Authorization': `Bearer ${exportApiKey}` },
228
+ signal: AbortSignal.timeout(options.timeout ?? 90000),
229
+ });
230
+ if (!exportRes.ok) {
231
+ const errBody = await exportRes.text().catch(() => '');
232
+ try {
233
+ const errJson = JSON.parse(errBody);
234
+ const msg = errJson?.error?.message || errJson?.message || exportRes.statusText;
235
+ console.error(`Export failed (${exportRes.status}): ${msg}`);
236
+ }
237
+ catch {
238
+ console.error(`Export failed (${exportRes.status}): ${exportRes.statusText}`);
239
+ }
240
+ process.exit(1);
241
+ }
242
+ const exportContent = await exportRes.text();
243
+ if (options.output) {
244
+ writeFileSync(options.output, exportContent, 'utf-8');
245
+ if (!options.silent) {
246
+ console.error(`Transcript saved to: ${options.output}`);
247
+ }
248
+ }
249
+ else {
250
+ process.stdout.write(exportContent);
251
+ if (!exportContent.endsWith('\n'))
252
+ process.stdout.write('\n');
253
+ }
254
+ await cleanup();
255
+ process.exit(0);
256
+ }
208
257
  // Check cache first (before spinner/network)
209
258
  // Default: 5m TTL for all CLI fetches unless --no-cache is set
210
259
  let cacheTtlMs;
@@ -1077,6 +1126,8 @@ export function registerFetchCommands(program) {
1077
1126
  .option('--content-only', 'Output only the raw content field (no metadata, no JSON wrapper) — ideal for piping to LLMs')
1078
1127
  .option('--progress', 'Show engine escalation steps (simple → browser → stealth) with timing')
1079
1128
  .option('--stdin', 'Read HTML from stdin instead of fetching a URL — converts to markdown')
1129
+ .option('--export <format>', 'Export YouTube transcript in the given format: srt, txt, md, json')
1130
+ .option('--output <file>', 'Write output to a file instead of stdout')
1080
1131
  .action(async (url, options) => {
1081
1132
  if (options.stdin) {
1082
1133
  await runStdin(options);
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Transcript export format converters.
3
+ *
4
+ * Converts YouTube transcript data into SRT, plain text, Markdown, or JSON
5
+ * so users can download transcripts in their preferred format.
6
+ */
7
+ import type { TranscriptSegment, YouTubeTranscript } from './youtube.js';
8
+ export type { TranscriptSegment, YouTubeTranscript as TranscriptResult };
9
+ /**
10
+ * Format seconds as an SRT timestamp: HH:MM:SS,mmm
11
+ *
12
+ * @example formatSRTTimestamp(3661.5) → "01:01:01,500"
13
+ */
14
+ export declare function formatSRTTimestamp(seconds: number): string;
15
+ /**
16
+ * Convert transcript segments to SRT subtitle format.
17
+ *
18
+ * SRT structure:
19
+ * ```
20
+ * 1
21
+ * 00:00:01,000 --> 00:00:04,500
22
+ * We're no strangers to love
23
+ *
24
+ * 2
25
+ * 00:00:04,500 --> 00:00:08,000
26
+ * You know the rules and so do I
27
+ * ```
28
+ */
29
+ export declare function toSRT(segments: TranscriptSegment[]): string;
30
+ /**
31
+ * Convert transcript segments to plain text.
32
+ * One line per segment, no timestamps.
33
+ */
34
+ export declare function toTXT(segments: TranscriptSegment[]): string;
35
+ /**
36
+ * Convert transcript to a clean Markdown document.
37
+ * Includes title, channel header, and timestamped transcript lines.
38
+ *
39
+ * @param title - Video title
40
+ * @param channel - Channel name
41
+ * @param segments - Transcript segments
42
+ */
43
+ export declare function toMarkdownDoc(title: string, channel: string, segments: TranscriptSegment[]): string;
44
+ /**
45
+ * Convert full transcript result to pretty-printed JSON.
46
+ */
47
+ export declare function toJSON(result: YouTubeTranscript): string;
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Transcript export format converters.
3
+ *
4
+ * Converts YouTube transcript data into SRT, plain text, Markdown, or JSON
5
+ * so users can download transcripts in their preferred format.
6
+ */
7
+ // ---------------------------------------------------------------------------
8
+ // Timestamp helpers
9
+ // ---------------------------------------------------------------------------
10
+ /**
11
+ * Format seconds as an SRT timestamp: HH:MM:SS,mmm
12
+ *
13
+ * @example formatSRTTimestamp(3661.5) → "01:01:01,500"
14
+ */
15
+ export function formatSRTTimestamp(seconds) {
16
+ const totalMs = Math.round(Math.max(0, seconds) * 1000);
17
+ const ms = totalMs % 1000;
18
+ const totalSec = Math.floor(totalMs / 1000);
19
+ const s = totalSec % 60;
20
+ const totalMin = Math.floor(totalSec / 60);
21
+ const m = totalMin % 60;
22
+ const h = Math.floor(totalMin / 60);
23
+ return (`${String(h).padStart(2, '0')}:` +
24
+ `${String(m).padStart(2, '0')}:` +
25
+ `${String(s).padStart(2, '0')},` +
26
+ `${String(ms).padStart(3, '0')}`);
27
+ }
28
+ /**
29
+ * Format seconds as a human-readable timestamp: M:SS or H:MM:SS
30
+ *
31
+ * @example formatReadableTimestamp(125.3) → "2:05"
32
+ */
33
+ function formatReadableTimestamp(seconds) {
34
+ const totalSec = Math.floor(Math.max(0, seconds));
35
+ const h = Math.floor(totalSec / 3600);
36
+ const m = Math.floor((totalSec % 3600) / 60);
37
+ const s = totalSec % 60;
38
+ if (h > 0) {
39
+ return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
40
+ }
41
+ return `${m}:${String(s).padStart(2, '0')}`;
42
+ }
43
+ // ---------------------------------------------------------------------------
44
+ // Export functions
45
+ // ---------------------------------------------------------------------------
46
+ /**
47
+ * Convert transcript segments to SRT subtitle format.
48
+ *
49
+ * SRT structure:
50
+ * ```
51
+ * 1
52
+ * 00:00:01,000 --> 00:00:04,500
53
+ * We're no strangers to love
54
+ *
55
+ * 2
56
+ * 00:00:04,500 --> 00:00:08,000
57
+ * You know the rules and so do I
58
+ * ```
59
+ */
60
+ export function toSRT(segments) {
61
+ if (segments.length === 0)
62
+ return '';
63
+ return segments
64
+ .map((seg, i) => {
65
+ const start = formatSRTTimestamp(seg.start);
66
+ const end = formatSRTTimestamp(seg.start + Math.max(0, seg.duration));
67
+ return `${i + 1}\n${start} --> ${end}\n${seg.text}`;
68
+ })
69
+ .join('\n\n');
70
+ }
71
+ /**
72
+ * Convert transcript segments to plain text.
73
+ * One line per segment, no timestamps.
74
+ */
75
+ export function toTXT(segments) {
76
+ return segments.map((seg) => seg.text).join('\n');
77
+ }
78
+ /**
79
+ * Convert transcript to a clean Markdown document.
80
+ * Includes title, channel header, and timestamped transcript lines.
81
+ *
82
+ * @param title - Video title
83
+ * @param channel - Channel name
84
+ * @param segments - Transcript segments
85
+ */
86
+ export function toMarkdownDoc(title, channel, segments) {
87
+ const lines = [];
88
+ lines.push(`# ${title || 'Transcript'}`);
89
+ lines.push('');
90
+ if (channel) {
91
+ lines.push(`**Channel:** ${channel}`);
92
+ lines.push('');
93
+ }
94
+ lines.push('## Transcript');
95
+ lines.push('');
96
+ for (const seg of segments) {
97
+ const ts = formatReadableTimestamp(seg.start);
98
+ lines.push(`**[${ts}]** ${seg.text}`);
99
+ }
100
+ return lines.join('\n');
101
+ }
102
+ /**
103
+ * Convert full transcript result to pretty-printed JSON.
104
+ */
105
+ export function toJSON(result) {
106
+ return JSON.stringify(result, null, 2);
107
+ }
@@ -31,6 +31,7 @@ import { createAskRouter } from './routes/ask.js';
31
31
  import { createMcpRouter } from './routes/mcp.js';
32
32
  import { createDoRouter } from './routes/do.js';
33
33
  import { createYouTubeRouter } from './routes/youtube.js';
34
+ import { createTranscriptExportRouter } from './routes/transcript-export.js';
34
35
  import { createDeepFetchRouter } from './routes/deep-fetch.js';
35
36
  import { createWatchRouter } from './routes/watch.js';
36
37
  import pg from 'pg';
@@ -38,6 +39,7 @@ import { createScreenshotRouter } from './routes/screenshot.js';
38
39
  import { createDemoRouter } from './routes/demo.js';
39
40
  import { createPlaygroundRouter } from './routes/playground.js';
40
41
  import { createReaderRouter } from './routes/reader.js';
42
+ import { createSharePublicRouter, createShareRouter } from './routes/share.js';
41
43
  import { createJobQueue } from './job-queue.js';
42
44
  import { createCompatRouter } from './routes/compat.js';
43
45
  import { createCrawlRouter } from './routes/crawl.js';
@@ -46,6 +48,7 @@ import { createExtractRouter } from './routes/extract.js';
46
48
  import { createAgentRouter } from './routes/agent.js';
47
49
  import { createSessionRouter } from './routes/session.js';
48
50
  import { createSentryHooks } from './sentry.js';
51
+ import { requireScope } from './middleware/scope-guard.js';
49
52
  import { warmup, cleanup as cleanupFetcher } from '../core/fetcher.js';
50
53
  import { registerPremiumHooks } from './premium/index.js';
51
54
  import { readFileSync } from 'fs';
@@ -239,6 +242,9 @@ export function createApp(config = {}) {
239
242
  app.use(createDemoRouter());
240
243
  // Playground endpoint — unauthenticated, CORS-locked to webpeel.dev/localhost
241
244
  app.use('/v1/playground', createPlaygroundRouter());
245
+ // Public share endpoint — GET /s/:id (no auth required, must be before reader router)
246
+ // Registered first so valid share IDs are served before falling through to reader's /s/* search
247
+ app.use(createSharePublicRouter(pool));
242
248
  // Zero-auth reader API — Jina-style URL prefix (/r/URL) and search (/s/query)
243
249
  // Must be BEFORE auth middleware so no API key is required
244
250
  app.use(createReaderRouter());
@@ -246,17 +252,33 @@ export function createApp(config = {}) {
246
252
  app.use(createAuthMiddleware(authStore));
247
253
  // Apply rate limiting middleware globally
248
254
  app.use(createRateLimitMiddleware(rateLimiter));
255
+ // Share links — POST /v1/share (auth required, after auth middleware)
256
+ app.use(createShareRouter(pool));
249
257
  // First-class native routes (registered before compat so they take precedence)
250
- app.use('/v1/crawl', createCrawlRouter(jobQueue));
251
- app.use('/v1/map', createMapRouter());
258
+ //
259
+ // Scope guards enforce API key permission scopes; JWT sessions bypass them.
260
+ // For routers with relative paths: app.use(path, guard, router) ← prefix stripped, relative paths match
261
+ // For routers with absolute paths: app.use(path, guard) then app.use(router) ← guard at path, router sees full path
262
+ // /v1/crawl — full or read only (router uses relative paths)
263
+ app.use('/v1/crawl', requireScope('full', 'read'), createCrawlRouter(jobQueue));
264
+ // /v1/map — full or read only (router uses relative paths)
265
+ app.use('/v1/map', requireScope('full', 'read'), createMapRouter());
266
+ // Compat routes (/v1/scrape, /v1/search) — all scopes allowed, no guard needed
252
267
  app.use(createCompatRouter(jobQueue));
253
268
  app.use(createSessionRouter());
254
269
  app.use(createExtractRouter());
270
+ // /v1/deep-fetch — full or read only (router uses absolute paths, guard before router)
271
+ app.use('/v1/deep-fetch', requireScope('full', 'read'));
255
272
  app.use(createDeepFetchRouter());
273
+ // /v1/watch — full or read only (router uses absolute paths, guard before router)
256
274
  if (pool) {
275
+ app.use('/v1/watch', requireScope('full', 'read'));
257
276
  app.use(createWatchRouter(pool));
258
277
  }
278
+ // /v1/fetch, /v1/search — all scopes allowed, no guard needed
259
279
  app.use(createFetchRouter(authStore));
280
+ // /v1/screenshot — full or read only (router uses absolute paths, guard before router)
281
+ app.use('/v1/screenshot', requireScope('full', 'read'));
260
282
  app.use(createScreenshotRouter(authStore));
261
283
  app.use(createSearchRouter(authStore));
262
284
  app.use(createBillingPortalRouter(pool));
@@ -266,6 +288,8 @@ export function createApp(config = {}) {
266
288
  app.use(createActivityRouter(authStore));
267
289
  app.use(createCLIUsageRouter());
268
290
  app.use(createJobsRouter(jobQueue, authStore));
291
+ // /v1/batch — full or read only (router uses absolute paths, guard before router)
292
+ app.use('/v1/batch', requireScope('full', 'read'));
269
293
  app.use(createBatchRouter(jobQueue));
270
294
  // Deprecation headers for declining endpoints
271
295
  app.use('/v1/answer', (_req, res, next) => {
@@ -274,11 +298,15 @@ export function createApp(config = {}) {
274
298
  res.set('Link', '</v1/ask>; rel="successor-version"');
275
299
  next();
276
300
  });
301
+ // /v1/answer, /v1/ask — all scopes allowed, no guard needed
277
302
  app.use(createAnswerRouter());
278
303
  app.use(createAskRouter());
279
- app.use('/v1/agent', createAgentRouter());
280
- app.use('/v1/do', createDoRouter());
304
+ // /v1/agent — full or read only (router uses relative paths)
305
+ app.use('/v1/agent', requireScope('full', 'read'), createAgentRouter());
306
+ // /v1/do — full only (router uses relative paths; admin-level operation)
307
+ app.use('/v1/do', requireScope('full'), createDoRouter());
281
308
  app.use(createYouTubeRouter());
309
+ app.use(createTranscriptExportRouter());
282
310
  app.use(createMcpRouter(authStore, pool));
283
311
  // 404 handler
284
312
  app.use((req, res) => {
@@ -8,6 +8,7 @@ export interface ApiKeyInfo {
8
8
  rateLimit: number;
9
9
  accountId?: string;
10
10
  createdAt: Date;
11
+ scope?: 'full' | 'read' | 'restricted';
11
12
  }
12
13
  export interface AuthStore {
13
14
  validateKey(key: string): Promise<ApiKeyInfo | null>;
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import { Request, Response, NextFunction } from 'express';
13
13
  import { AuthStore, ApiKeyInfo } from '../auth-store.js';
14
+ import { KeyScope } from '../pg-auth-store.js';
14
15
  import '../types.js';
15
16
  declare global {
16
17
  namespace Express {
@@ -22,6 +23,12 @@ declare global {
22
23
  softLimited: boolean;
23
24
  extraUsageAvailable: boolean;
24
25
  };
26
+ /**
27
+ * Permission scope of the authenticated API key.
28
+ * Undefined when authenticated via JWT (dashboard session) — JWT users bypass scope enforcement.
29
+ * Set to 'full' | 'read' | 'restricted' for API key requests.
30
+ */
31
+ keyScope?: KeyScope;
25
32
  }
26
33
  }
27
34
  }
@@ -204,6 +204,10 @@ export function createAuthMiddleware(authStore) {
204
204
  softLimited,
205
205
  extraUsageAvailable,
206
206
  };
207
+ // Attach API key scope (only for API key auth; JWT users get undefined = bypass scope checks)
208
+ if (keyInfo) {
209
+ req.keyScope = keyInfo.scope || 'full';
210
+ }
207
211
  next();
208
212
  }
209
213
  catch (_error) {
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Scope enforcement middleware for API key permission scoping.
3
+ *
4
+ * Keys have one of three scopes:
5
+ * 'full' — all endpoints (default)
6
+ * 'read' — read/fetch operations only
7
+ * 'restricted' — /v1/scrape only (for limited sharing)
8
+ *
9
+ * JWT-authenticated requests (dashboard sessions) bypass scope enforcement:
10
+ * req.keyScope is undefined for JWT requests, which are always allowed through.
11
+ */
12
+ import { Request, Response, NextFunction } from 'express';
13
+ import { KeyScope } from '../pg-auth-store.js';
14
+ /**
15
+ * Middleware factory that enforces API key scope.
16
+ * Pass the set of scopes that are permitted to access the guarded route.
17
+ *
18
+ * @example
19
+ * // Only full-access keys may manage billing:
20
+ * router.post('/v1/billing', requireScope('full'), handler);
21
+ *
22
+ * // Read and full keys may scrape:
23
+ * app.use('/v1/scrape', requireScope('full', 'read', 'restricted'), scrapeRouter);
24
+ */
25
+ export declare function requireScope(...allowedScopes: KeyScope[]): (req: Request, res: Response, next: NextFunction) => void;
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Scope enforcement middleware for API key permission scoping.
3
+ *
4
+ * Keys have one of three scopes:
5
+ * 'full' — all endpoints (default)
6
+ * 'read' — read/fetch operations only
7
+ * 'restricted' — /v1/scrape only (for limited sharing)
8
+ *
9
+ * JWT-authenticated requests (dashboard sessions) bypass scope enforcement:
10
+ * req.keyScope is undefined for JWT requests, which are always allowed through.
11
+ */
12
+ /**
13
+ * Middleware factory that enforces API key scope.
14
+ * Pass the set of scopes that are permitted to access the guarded route.
15
+ *
16
+ * @example
17
+ * // Only full-access keys may manage billing:
18
+ * router.post('/v1/billing', requireScope('full'), handler);
19
+ *
20
+ * // Read and full keys may scrape:
21
+ * app.use('/v1/scrape', requireScope('full', 'read', 'restricted'), scrapeRouter);
22
+ */
23
+ export function requireScope(...allowedScopes) {
24
+ return (req, res, next) => {
25
+ // JWT sessions (req.keyScope === undefined) always pass through.
26
+ // Scope enforcement only applies to API key requests.
27
+ if (req.keyScope === undefined) {
28
+ return next();
29
+ }
30
+ if (!allowedScopes.includes(req.keyScope)) {
31
+ res.status(403).json({
32
+ success: false,
33
+ error: {
34
+ type: 'insufficient_scope',
35
+ message: `This API key has '${req.keyScope}' scope. This endpoint requires: ${allowedScopes.join(' or ')}.`,
36
+ docs: 'https://webpeel.dev/docs/authentication#scopes',
37
+ hint: 'Create a new API key with the required scope in your dashboard.',
38
+ },
39
+ requestId: req.requestId,
40
+ });
41
+ return;
42
+ }
43
+ next();
44
+ };
45
+ }
@@ -3,6 +3,8 @@
3
3
  * Uses SHA-256 hashing for API keys and tracks WEEKLY usage with burst limits
4
4
  */
5
5
  import { AuthStore, ApiKeyInfo } from './auth-store.js';
6
+ /** Permission scope for an API key */
7
+ export type KeyScope = 'full' | 'read' | 'restricted';
6
8
  export interface WeeklyUsageInfo {
7
9
  week: string;
8
10
  basicCount: number;
@@ -39,6 +41,11 @@ export interface ExtraUsageInfo {
39
41
  export declare class PostgresAuthStore implements AuthStore {
40
42
  private pool;
41
43
  constructor(connectionString?: string);
44
+ /**
45
+ * Run idempotent schema migrations.
46
+ * Safe to call on every startup — all statements use IF NOT EXISTS / IF EXISTS.
47
+ */
48
+ private ensureSchema;
42
49
  /**
43
50
  * Hash API key with SHA-256
44
51
  * SECURITY: Never store raw API keys
@@ -34,6 +34,36 @@ export class PostgresAuthStore {
34
34
  idleTimeoutMillis: 30000,
35
35
  connectionTimeoutMillis: 10000,
36
36
  });
37
+ // Run idempotent schema migrations on startup
38
+ this.ensureSchema().catch(err => console.error('[pg-auth-store] Schema migration failed:', err));
39
+ }
40
+ /**
41
+ * Run idempotent schema migrations.
42
+ * Safe to call on every startup — all statements use IF NOT EXISTS / IF EXISTS.
43
+ */
44
+ async ensureSchema() {
45
+ await this.pool.query(`
46
+ ALTER TABLE api_keys ADD COLUMN IF NOT EXISTS scope VARCHAR(20) NOT NULL DEFAULT 'full';
47
+ `);
48
+ await this.pool.query(`
49
+ CREATE TABLE IF NOT EXISTS shared_reads (
50
+ id VARCHAR(12) PRIMARY KEY,
51
+ url TEXT NOT NULL,
52
+ title TEXT,
53
+ content TEXT NOT NULL,
54
+ tokens INTEGER,
55
+ created_by TEXT REFERENCES users(id),
56
+ created_at TIMESTAMPTZ DEFAULT NOW(),
57
+ expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '30 days',
58
+ view_count INTEGER DEFAULT 0
59
+ );
60
+ `);
61
+ await this.pool.query(`
62
+ CREATE INDEX IF NOT EXISTS idx_shared_reads_url ON shared_reads(url);
63
+ `);
64
+ await this.pool.query(`
65
+ CREATE INDEX IF NOT EXISTS idx_shared_reads_created_by ON shared_reads(created_by);
66
+ `);
37
67
  }
38
68
  /**
39
69
  * Hash API key with SHA-256
@@ -108,6 +138,7 @@ export class PostgresAuthStore {
108
138
  ak.user_id,
109
139
  ak.key_prefix,
110
140
  ak.name,
141
+ ak.scope,
111
142
  u.tier,
112
143
  u.rate_limit,
113
144
  u.weekly_limit,
@@ -129,6 +160,7 @@ export class PostgresAuthStore {
129
160
  rateLimit: row.rate_limit,
130
161
  accountId: row.user_id,
131
162
  createdAt: new Date(),
163
+ scope: row.scope || 'full',
132
164
  };
133
165
  }
134
166
  catch (error) {
@@ -34,10 +34,12 @@ export function createActivityRouter(authStore) {
34
34
  SELECT
35
35
  id,
36
36
  url,
37
+ endpoint,
37
38
  method,
38
39
  status_code,
39
40
  processing_time_ms,
40
41
  tokens_used,
42
+ ip_address,
41
43
  created_at
42
44
  FROM usage_logs
43
45
  WHERE user_id = $1
@@ -49,11 +51,14 @@ export function createActivityRouter(authStore) {
49
51
  const requests = result.rows.map((row) => ({
50
52
  id: row.id,
51
53
  url: row.url || 'N/A',
54
+ endpoint: row.endpoint || null,
52
55
  status: (row.status_code >= 200 && row.status_code < 300) ? 'success' : 'error',
53
56
  responseTime: row.processing_time_ms || 0,
54
57
  mode: row.method || 'basic',
55
58
  timestamp: row.created_at,
56
59
  tokensUsed: row.tokens_used || null,
60
+ ipAddress: row.ip_address || null,
61
+ statusCode: row.status_code || null,
57
62
  }));
58
63
  res.json({ requests });
59
64
  }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Shareable read links — short public URLs for fetched content
3
+ *
4
+ * POST /v1/share — create a short link (auth required, 50/day limit)
5
+ * GET /s/:id — serve shared content (public, no auth)
6
+ *
7
+ * IDs are 9-char base64url strings (crypto.randomBytes(6).toString('base64url').slice(0, 9))
8
+ * Shares expire after 30 days. view_count is incremented on every public read.
9
+ */
10
+ import { Router } from 'express';
11
+ import pg from 'pg';
12
+ /** Generate a cryptographically secure 9-char base64url ID.
13
+ * randomBytes(7) → base64url gives 10 chars (7*4/3=9.33→10), slice to 9.
14
+ * Note: randomBytes(6) → base64url gives only 8 chars (6/3*4=8), so we need 7+ bytes.
15
+ */
16
+ export declare function generateShareId(): string;
17
+ export declare function createSharePublicRouter(pool: pg.Pool | null): Router;
18
+ export declare function createShareRouter(pool: pg.Pool | null): Router;
@@ -0,0 +1,462 @@
1
+ /**
2
+ * Shareable read links — short public URLs for fetched content
3
+ *
4
+ * POST /v1/share — create a short link (auth required, 50/day limit)
5
+ * GET /s/:id — serve shared content (public, no auth)
6
+ *
7
+ * IDs are 9-char base64url strings (crypto.randomBytes(6).toString('base64url').slice(0, 9))
8
+ * Shares expire after 30 days. view_count is incremented on every public read.
9
+ */
10
+ import { Router } from 'express';
11
+ import crypto from 'crypto';
12
+ import { createLogger } from '../logger.js';
13
+ import { peel } from '../../index.js';
14
+ import { validateUrlForSSRF, SSRFError } from '../middleware/url-validator.js';
15
+ const log = createLogger('share');
16
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
17
+ /** Generate a cryptographically secure 9-char base64url ID.
18
+ * randomBytes(7) → base64url gives 10 chars (7*4/3=9.33→10), slice to 9.
19
+ * Note: randomBytes(6) → base64url gives only 8 chars (6/3*4=8), so we need 7+ bytes.
20
+ */
21
+ export function generateShareId() {
22
+ return crypto.randomBytes(7).toString('base64url').slice(0, 9);
23
+ }
24
+ /** Base URL for share links */
25
+ function getBaseUrl() {
26
+ return process.env.API_BASE_URL || 'https://api.webpeel.dev';
27
+ }
28
+ /** Simple markdown → HTML renderer (no external deps) */
29
+ function markdownToHtml(md) {
30
+ let html = md
31
+ // Escape raw HTML in content to prevent XSS
32
+ .replace(/&/g, '&amp;')
33
+ .replace(/</g, '&lt;')
34
+ .replace(/>/g, '&gt;')
35
+ // Code blocks (``` ... ```)
36
+ .replace(/```[\w]*\n([\s\S]*?)```/g, (_m, code) => `<pre><code>${code.trim()}</code></pre>`)
37
+ // Inline code
38
+ .replace(/`([^`]+)`/g, '<code>$1</code>')
39
+ // Bold + italic
40
+ .replace(/\*\*\*([^*]+)\*\*\*/g, '<strong><em>$1</em></strong>')
41
+ // Bold
42
+ .replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
43
+ // Italic
44
+ .replace(/\*([^*]+)\*/g, '<em>$1</em>')
45
+ // Headings
46
+ .replace(/^### (.+)$/gm, '<h3>$1</h3>')
47
+ .replace(/^## (.+)$/gm, '<h2>$1</h2>')
48
+ .replace(/^# (.+)$/gm, '<h1>$1</h1>')
49
+ // Horizontal rule
50
+ .replace(/^---$/gm, '<hr>')
51
+ // Blockquote
52
+ .replace(/^> (.+)$/gm, '<blockquote>$1</blockquote>')
53
+ // Unordered list items
54
+ .replace(/^[\*\-] (.+)$/gm, '<li>$1</li>')
55
+ // Ordered list items
56
+ .replace(/^\d+\. (.+)$/gm, '<li>$1</li>')
57
+ // Links
58
+ .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" rel="noopener noreferrer">$1</a>')
59
+ // Images
60
+ .replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '<img src="$2" alt="$1">')
61
+ // Double newlines → paragraph breaks
62
+ .replace(/\n\n+/g, '\n</p><p>\n')
63
+ // Remaining single newlines → <br>
64
+ .replace(/\n/g, '<br>\n');
65
+ // Wrap consecutive <li> items in <ul>
66
+ html = html.replace(/(<li>.*?<\/li>\n?)+/gs, (m) => `<ul>\n${m}</ul>\n`);
67
+ return `<p>\n${html}\n</p>`;
68
+ }
69
+ /** Build the full HTML page for a shared read */
70
+ function buildHtmlPage(share) {
71
+ const title = share.title ? `${share.title} — WebPeel` : 'Shared Read — WebPeel';
72
+ const description = share.content.slice(0, 200).replace(/\n/g, ' ').replace(/"/g, '&quot;') + '…';
73
+ const canonicalUrl = `${getBaseUrl()}/s/${share.id}`;
74
+ const originalUrl = share.url
75
+ .replace(/&/g, '&amp;')
76
+ .replace(/</g, '&lt;')
77
+ .replace(/>/g, '&gt;');
78
+ const bodyHtml = markdownToHtml(share.content);
79
+ return `<!DOCTYPE html>
80
+ <html lang="en">
81
+ <head>
82
+ <meta charset="UTF-8">
83
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
84
+ <title>${title.replace(/</g, '&lt;')}</title>
85
+ <meta name="description" content="${description}">
86
+ <link rel="canonical" href="${canonicalUrl}">
87
+
88
+ <!-- Open Graph -->
89
+ <meta property="og:title" content="${(share.title || 'Shared Read').replace(/</g, '&lt;')}">
90
+ <meta property="og:description" content="${description}">
91
+ <meta property="og:url" content="${canonicalUrl}">
92
+ <meta property="og:type" content="article">
93
+ <meta property="og:site_name" content="WebPeel">
94
+
95
+ <!-- Twitter Card -->
96
+ <meta name="twitter:card" content="summary">
97
+ <meta name="twitter:title" content="${(share.title || 'Shared Read').replace(/</g, '&lt;')}">
98
+ <meta name="twitter:description" content="${description}">
99
+ <meta name="twitter:site" content="@webpeel">
100
+
101
+ <style>
102
+ *, *::before, *::after { box-sizing: border-box; }
103
+ :root {
104
+ --bg: #0f0f11;
105
+ --surface: #1a1a1f;
106
+ --border: #2a2a35;
107
+ --text: #e4e4e7;
108
+ --muted: #71717a;
109
+ --accent: #818cf8;
110
+ --link: #6366f1;
111
+ --code-bg: #1e1e28;
112
+ --max-w: 760px;
113
+ }
114
+ html { background: var(--bg); }
115
+ body {
116
+ margin: 0;
117
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
118
+ font-size: 16px;
119
+ line-height: 1.75;
120
+ color: var(--text);
121
+ background: var(--bg);
122
+ padding: 0 16px;
123
+ }
124
+
125
+ /* Top bar */
126
+ .topbar {
127
+ display: flex;
128
+ align-items: center;
129
+ justify-content: space-between;
130
+ max-width: var(--max-w);
131
+ margin: 0 auto;
132
+ padding: 20px 0 16px;
133
+ border-bottom: 1px solid var(--border);
134
+ gap: 12px;
135
+ flex-wrap: wrap;
136
+ }
137
+ .logo { display: flex; align-items: center; gap: 8px; text-decoration: none; color: var(--text); }
138
+ .logo-mark {
139
+ width: 28px; height: 28px;
140
+ background: var(--accent);
141
+ border-radius: 7px;
142
+ display: flex; align-items: center; justify-content: center;
143
+ font-size: 14px; font-weight: 700; color: #fff; letter-spacing: -0.5px;
144
+ }
145
+ .logo-name { font-weight: 600; font-size: 15px; }
146
+ .source-link {
147
+ font-size: 12px; color: var(--muted);
148
+ text-decoration: none; max-width: 300px;
149
+ overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
150
+ }
151
+ .source-link:hover { color: var(--accent); }
152
+
153
+ /* Main content */
154
+ main {
155
+ max-width: var(--max-w);
156
+ margin: 32px auto;
157
+ }
158
+ h1 { font-size: 1.75rem; font-weight: 700; line-height: 1.25; margin: 0 0 24px; color: var(--text); }
159
+ h2 { font-size: 1.35rem; font-weight: 600; margin: 28px 0 12px; color: var(--text); }
160
+ h3 { font-size: 1.1rem; font-weight: 600; margin: 24px 0 10px; color: var(--text); }
161
+ p { margin: 0 0 16px; color: #d4d4d8; }
162
+ a { color: var(--link); text-decoration: underline; text-underline-offset: 3px; }
163
+ a:hover { color: var(--accent); }
164
+ ul, ol { padding-left: 24px; margin: 0 0 16px; }
165
+ li { margin-bottom: 6px; color: #d4d4d8; }
166
+ blockquote {
167
+ border-left: 3px solid var(--accent); margin: 16px 0;
168
+ padding: 4px 16px; color: var(--muted); font-style: italic;
169
+ }
170
+ code {
171
+ background: var(--code-bg); padding: 2px 6px; border-radius: 4px;
172
+ font-family: 'Fira Code', 'Cascadia Code', monospace; font-size: 0.875em;
173
+ color: var(--accent);
174
+ }
175
+ pre {
176
+ background: var(--code-bg); padding: 16px; border-radius: 8px;
177
+ overflow-x: auto; margin: 16px 0; border: 1px solid var(--border);
178
+ }
179
+ pre code { background: none; padding: 0; color: #e4e4e7; }
180
+ img { max-width: 100%; border-radius: 6px; margin: 8px 0; }
181
+ hr { border: none; border-top: 1px solid var(--border); margin: 28px 0; }
182
+
183
+ /* Meta info */
184
+ .meta {
185
+ display: flex; gap: 16px; flex-wrap: wrap;
186
+ font-size: 12px; color: var(--muted);
187
+ margin-bottom: 28px; padding-bottom: 20px;
188
+ border-bottom: 1px solid var(--border);
189
+ }
190
+ .meta span { display: flex; align-items: center; gap: 4px; }
191
+
192
+ /* Footer */
193
+ footer {
194
+ max-width: var(--max-w);
195
+ margin: 48px auto 32px;
196
+ padding-top: 24px;
197
+ border-top: 1px solid var(--border);
198
+ display: flex;
199
+ align-items: center;
200
+ justify-content: space-between;
201
+ gap: 12px;
202
+ flex-wrap: wrap;
203
+ }
204
+ .footer-left { font-size: 13px; color: var(--muted); }
205
+ .cta-btn {
206
+ display: inline-flex; align-items: center; gap-6px;
207
+ padding: 8px 16px; border-radius: 8px;
208
+ background: var(--accent); color: #fff;
209
+ font-size: 13px; font-weight: 600;
210
+ text-decoration: none; transition: opacity 0.15s;
211
+ }
212
+ .cta-btn:hover { opacity: 0.85; color: #fff; }
213
+
214
+ @media (max-width: 600px) {
215
+ h1 { font-size: 1.4rem; }
216
+ .topbar { flex-direction: column; align-items: flex-start; }
217
+ }
218
+ </style>
219
+ </head>
220
+ <body>
221
+ <!-- Top bar -->
222
+ <div class="topbar">
223
+ <a class="logo" href="https://webpeel.dev" target="_blank" rel="noopener">
224
+ <div class="logo-mark">W</div>
225
+ <span class="logo-name">WebPeel</span>
226
+ </a>
227
+ <a class="source-link" href="${originalUrl}" target="_blank" rel="noopener noreferrer" title="${originalUrl}">
228
+ ↗ ${originalUrl}
229
+ </a>
230
+ </div>
231
+
232
+ <!-- Article -->
233
+ <main>
234
+ ${share.title ? `<h1>${share.title.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</h1>` : ''}
235
+ <div class="meta">
236
+ ${share.tokens != null ? `<span>📝 ${share.tokens.toLocaleString()} tokens</span>` : ''}
237
+ <span>👁 ${share.view_count.toLocaleString()} views</span>
238
+ <span>⏰ Expires ${new Date(share.expires_at).toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric' })}</span>
239
+ </div>
240
+ <div class="content">
241
+ ${bodyHtml}
242
+ </div>
243
+ </main>
244
+
245
+ <!-- Footer -->
246
+ <footer>
247
+ <span class="footer-left">Powered by <a href="https://webpeel.dev" target="_blank" rel="noopener">WebPeel</a> — clean web reading for humans &amp; AI</span>
248
+ <a class="cta-btn" href="https://app.webpeel.dev" target="_blank" rel="noopener">
249
+ Try WebPeel →
250
+ </a>
251
+ </footer>
252
+ </body>
253
+ </html>`;
254
+ }
255
+ // ─── Rate limit: 50 shares per day per user ───────────────────────────────────
256
+ const shareRateMap = new Map();
257
+ const SHARE_DAY_LIMIT = 50;
258
+ const SHARE_DAY_MS = 24 * 60 * 60 * 1000;
259
+ function checkShareRateLimit(userId) {
260
+ const now = Date.now();
261
+ const entry = shareRateMap.get(userId);
262
+ if (!entry || entry.resetAt < now) {
263
+ shareRateMap.set(userId, { count: 1, resetAt: now + SHARE_DAY_MS });
264
+ return { allowed: true, remaining: SHARE_DAY_LIMIT - 1 };
265
+ }
266
+ entry.count++;
267
+ if (entry.count > SHARE_DAY_LIMIT) {
268
+ return { allowed: false, remaining: 0 };
269
+ }
270
+ return { allowed: true, remaining: SHARE_DAY_LIMIT - entry.count };
271
+ }
272
+ // ─── Public router: GET /s/:id ────────────────────────────────────────────────
273
+ export function createSharePublicRouter(pool) {
274
+ const router = Router();
275
+ router.get('/s/:id', async (req, res, next) => {
276
+ const id = String(req.params['id'] || '');
277
+ // Only intercept valid-looking 9-char base64url IDs
278
+ if (!/^[A-Za-z0-9_-]{9}$/.test(id)) {
279
+ return next();
280
+ }
281
+ if (!pool) {
282
+ // No DB: fall through to reader's search handler
283
+ return next();
284
+ }
285
+ try {
286
+ // Fetch share and increment view count atomically
287
+ const result = await pool.query(`UPDATE shared_reads
288
+ SET view_count = view_count + 1
289
+ WHERE id = $1
290
+ AND expires_at > NOW()
291
+ RETURNING id, url, title, content, tokens, created_at, expires_at, view_count`, [id]);
292
+ if (result.rows.length === 0) {
293
+ // Not found or expired — fall through to reader's /s/* search handler
294
+ return next();
295
+ }
296
+ const share = result.rows[0];
297
+ // Respond based on Accept header
298
+ const accept = req.headers.accept || '';
299
+ if (accept.includes('application/json')) {
300
+ return res.json({
301
+ success: true,
302
+ shareId: share.id,
303
+ url: share.url,
304
+ title: share.title,
305
+ content: share.content,
306
+ tokens: share.tokens,
307
+ viewCount: share.view_count,
308
+ createdAt: share.created_at,
309
+ expiresAt: share.expires_at,
310
+ });
311
+ }
312
+ if (accept.includes('text/markdown')) {
313
+ res.setHeader('Content-Type', 'text/markdown; charset=utf-8');
314
+ return res.send(share.content);
315
+ }
316
+ // Default: return HTML page (also covers text/html)
317
+ // Override CSP to allow inline styles for the share page
318
+ res.setHeader('Content-Security-Policy', "default-src 'none'; style-src 'unsafe-inline'; img-src https: data:; " +
319
+ "frame-ancestors 'none'; base-uri 'none'; form-action 'none'; " +
320
+ "script-src 'none'");
321
+ res.setHeader('Content-Type', 'text/html; charset=utf-8');
322
+ res.setHeader('Cache-Control', 'public, max-age=60, stale-while-revalidate=300');
323
+ return res.send(buildHtmlPage(share));
324
+ }
325
+ catch (err) {
326
+ log.error('Share GET error:', err.message);
327
+ return res.status(500).json({
328
+ success: false,
329
+ error: { type: 'server_error', message: 'Failed to retrieve share' },
330
+ });
331
+ }
332
+ });
333
+ return router;
334
+ }
335
+ // ─── Protected router: POST /v1/share ─────────────────────────────────────────
336
+ export function createShareRouter(pool) {
337
+ const router = Router();
338
+ router.post('/v1/share', async (req, res) => {
339
+ // Require auth
340
+ const userId = req.auth?.keyInfo?.accountId || req.user?.userId;
341
+ if (!userId) {
342
+ return res.status(401).json({
343
+ success: false,
344
+ error: {
345
+ type: 'unauthorized',
346
+ message: 'Authentication required to create share links.',
347
+ hint: 'Include an Authorization: Bearer <token> header.',
348
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
349
+ },
350
+ });
351
+ }
352
+ if (!pool) {
353
+ return res.status(503).json({
354
+ success: false,
355
+ error: {
356
+ type: 'unavailable',
357
+ message: 'Share links require a PostgreSQL database.',
358
+ },
359
+ });
360
+ }
361
+ // Rate limit: 50 shares per day per user
362
+ const { allowed, remaining } = checkShareRateLimit(userId);
363
+ res.setHeader('X-Share-Limit-Remaining', remaining.toString());
364
+ if (!allowed) {
365
+ return res.status(429).json({
366
+ success: false,
367
+ error: {
368
+ type: 'rate_limited',
369
+ message: 'Share limit exceeded. Maximum 50 shares per day.',
370
+ hint: 'Wait until tomorrow to create more share links.',
371
+ },
372
+ });
373
+ }
374
+ const { url, content, title } = req.body;
375
+ if (!url || typeof url !== 'string') {
376
+ return res.status(400).json({
377
+ success: false,
378
+ error: {
379
+ type: 'invalid_request',
380
+ message: 'url is required.',
381
+ },
382
+ });
383
+ }
384
+ // SECURITY: SSRF validation
385
+ try {
386
+ validateUrlForSSRF(url);
387
+ }
388
+ catch (err) {
389
+ if (err instanceof SSRFError) {
390
+ return res.status(400).json({
391
+ success: false,
392
+ error: { type: 'ssrf_blocked', message: err.message },
393
+ });
394
+ }
395
+ throw err;
396
+ }
397
+ let shareContent;
398
+ let shareTitle;
399
+ let tokens;
400
+ if (content && typeof content === 'string') {
401
+ // Content provided directly (user already fetched it in dashboard)
402
+ shareContent = content;
403
+ shareTitle = title;
404
+ tokens = content.split(/\s+/).filter(Boolean).length;
405
+ }
406
+ else {
407
+ // Fetch the URL via peel()
408
+ try {
409
+ const result = await peel(url, { timeout: 15000, noEscalate: true });
410
+ shareContent = result.content || '';
411
+ shareTitle = result.title;
412
+ tokens = result.tokens ?? undefined;
413
+ }
414
+ catch (err) {
415
+ log.error('Share: peel failed', { url, error: err.message });
416
+ return res.status(422).json({
417
+ success: false,
418
+ error: {
419
+ type: 'fetch_failed',
420
+ message: `Failed to fetch URL: ${err.message}`,
421
+ },
422
+ });
423
+ }
424
+ }
425
+ if (!shareContent) {
426
+ return res.status(422).json({
427
+ success: false,
428
+ error: {
429
+ type: 'empty_content',
430
+ message: 'No content could be extracted from the URL.',
431
+ },
432
+ });
433
+ }
434
+ // Generate a unique ID with retry for collisions (extremely rare)
435
+ let shareId = '';
436
+ for (let attempt = 0; attempt < 5; attempt++) {
437
+ const candidate = generateShareId();
438
+ const exists = await pool.query('SELECT 1 FROM shared_reads WHERE id = $1', [candidate]);
439
+ if (exists.rows.length === 0) {
440
+ shareId = candidate;
441
+ break;
442
+ }
443
+ }
444
+ if (!shareId) {
445
+ return res.status(500).json({
446
+ success: false,
447
+ error: { type: 'server_error', message: 'Failed to generate unique share ID.' },
448
+ });
449
+ }
450
+ // Insert share into DB
451
+ await pool.query(`INSERT INTO shared_reads (id, url, title, content, tokens, created_by)
452
+ VALUES ($1, $2, $3, $4, $5, $6)`, [shareId, url, shareTitle ?? null, shareContent, tokens ?? null, userId]);
453
+ const shareUrl = `${getBaseUrl()}/s/${shareId}`;
454
+ log.info('Share created', { shareId, url, userId });
455
+ return res.status(201).json({
456
+ success: true,
457
+ shareId,
458
+ shareUrl,
459
+ });
460
+ });
461
+ return router;
462
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Transcript export endpoint
3
+ *
4
+ * GET /v1/transcript/export?url=<youtube_url>&format=srt|txt|md|json
5
+ *
6
+ * Downloads a YouTube transcript in the requested format with appropriate
7
+ * Content-Type and Content-Disposition headers.
8
+ */
9
+ import { Router } from 'express';
10
+ export declare function createTranscriptExportRouter(): Router;
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Transcript export endpoint
3
+ *
4
+ * GET /v1/transcript/export?url=<youtube_url>&format=srt|txt|md|json
5
+ *
6
+ * Downloads a YouTube transcript in the requested format with appropriate
7
+ * Content-Type and Content-Disposition headers.
8
+ */
9
+ import { Router } from 'express';
10
+ import crypto from 'crypto';
11
+ import { getYouTubeTranscript, parseYouTubeUrl } from '../../core/youtube.js';
12
+ import { toSRT, toTXT, toMarkdownDoc, toJSON } from '../../core/transcript-export.js';
13
+ // Valid export format values
14
+ const VALID_FORMATS = ['srt', 'txt', 'md', 'json'];
15
+ // Content-Type and file extension per format
16
+ const FORMAT_META = {
17
+ srt: { contentType: 'text/plain; charset=utf-8', ext: 'srt' },
18
+ txt: { contentType: 'text/plain; charset=utf-8', ext: 'txt' },
19
+ md: { contentType: 'text/markdown; charset=utf-8', ext: 'md' },
20
+ json: { contentType: 'application/json; charset=utf-8', ext: 'json' },
21
+ };
22
+ /**
23
+ * Sanitise a video title so it is safe to use as a filename.
24
+ * Strips special characters, collapses spaces to underscores, truncates to 80 chars.
25
+ */
26
+ function safeFilename(title, fallback) {
27
+ const base = (title || fallback)
28
+ .replace(/[^\w\s\-._]/g, '')
29
+ .replace(/\s+/g, '_')
30
+ .replace(/_+/g, '_')
31
+ .slice(0, 80)
32
+ .replace(/^_+|_+$/g, '');
33
+ return base || fallback;
34
+ }
35
+ export function createTranscriptExportRouter() {
36
+ const router = Router();
37
+ /**
38
+ * GET /v1/transcript/export
39
+ *
40
+ * Query params:
41
+ * url - YouTube video URL (required)
42
+ * format - Output format: srt | txt | md | json (default: txt)
43
+ * language - Preferred transcript language code, e.g. "en" (default: "en")
44
+ *
45
+ * Response:
46
+ * - 200 file download with appropriate Content-Type / Content-Disposition
47
+ * - 400 invalid URL or format
48
+ * - 401 missing API key
49
+ * - 404 video has no captions
50
+ * - 500 extraction failure
51
+ */
52
+ router.get('/v1/transcript/export', async (req, res) => {
53
+ // ── Auth ───────────────────────────────────────────────────────────────
54
+ const authId = req.auth?.keyInfo?.accountId || req.user?.userId;
55
+ if (!authId) {
56
+ res.status(401).json({
57
+ success: false,
58
+ error: {
59
+ type: 'authentication_required',
60
+ message: 'API key required. Get one at https://app.webpeel.dev/keys',
61
+ hint: 'Pass your API key in the Authorization header: Bearer <key>',
62
+ docs: 'https://webpeel.dev/docs/errors#authentication-required',
63
+ },
64
+ requestId: req.requestId || crypto.randomUUID(),
65
+ });
66
+ return;
67
+ }
68
+ const { url, format, language } = req.query;
69
+ // ── URL validation ─────────────────────────────────────────────────────
70
+ if (!url || typeof url !== 'string') {
71
+ res.status(400).json({
72
+ success: false,
73
+ error: {
74
+ type: 'invalid_request',
75
+ message: 'Missing or invalid "url" parameter. Pass a YouTube URL: GET /v1/transcript/export?url=https://youtu.be/VIDEO_ID&format=srt',
76
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
77
+ },
78
+ requestId: req.requestId || crypto.randomUUID(),
79
+ });
80
+ return;
81
+ }
82
+ const videoId = parseYouTubeUrl(url);
83
+ if (!videoId) {
84
+ res.status(400).json({
85
+ success: false,
86
+ error: {
87
+ type: 'invalid_youtube_url',
88
+ message: 'The provided URL is not a valid YouTube video URL.',
89
+ hint: 'Supported formats: https://www.youtube.com/watch?v=VIDEO_ID, https://youtu.be/VIDEO_ID',
90
+ docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
91
+ },
92
+ requestId: req.requestId || crypto.randomUUID(),
93
+ });
94
+ return;
95
+ }
96
+ // ── Format validation ──────────────────────────────────────────────────
97
+ const rawFormat = (typeof format === 'string' ? format : 'txt').toLowerCase();
98
+ if (!VALID_FORMATS.includes(rawFormat)) {
99
+ res.status(400).json({
100
+ success: false,
101
+ error: {
102
+ type: 'invalid_format',
103
+ message: `Invalid format "${format}". Supported formats: ${VALID_FORMATS.join(', ')}`,
104
+ docs: 'https://webpeel.dev/docs/errors#invalid-format',
105
+ },
106
+ requestId: req.requestId || crypto.randomUUID(),
107
+ });
108
+ return;
109
+ }
110
+ const fmt = rawFormat;
111
+ // ── Extract transcript ─────────────────────────────────────────────────
112
+ try {
113
+ const lang = typeof language === 'string' ? language : 'en';
114
+ const transcript = await getYouTubeTranscript(url, { language: lang });
115
+ // ── Convert to requested format ──────────────────────────────────────
116
+ let content;
117
+ switch (fmt) {
118
+ case 'srt':
119
+ content = toSRT(transcript.segments);
120
+ break;
121
+ case 'txt':
122
+ content = toTXT(transcript.segments);
123
+ break;
124
+ case 'md':
125
+ content = toMarkdownDoc(transcript.title, transcript.channel, transcript.segments);
126
+ break;
127
+ case 'json':
128
+ content = toJSON(transcript);
129
+ break;
130
+ }
131
+ const { contentType, ext } = FORMAT_META[fmt];
132
+ const filename = safeFilename(transcript.title, videoId);
133
+ res.setHeader('Content-Type', contentType);
134
+ res.setHeader('Content-Disposition', `attachment; filename="${filename}.${ext}"`);
135
+ res.send(content);
136
+ }
137
+ catch (error) {
138
+ const message = error?.message ?? 'Failed to extract YouTube transcript';
139
+ if (message.includes('No captions available')) {
140
+ res.status(404).json({
141
+ success: false,
142
+ error: {
143
+ type: 'no_captions',
144
+ message: 'No captions are available for this video. The video may not have subtitles enabled.',
145
+ hint: 'Try a different video or check if captions are enabled on YouTube.',
146
+ docs: 'https://webpeel.dev/docs/errors#no-captions',
147
+ },
148
+ videoId,
149
+ requestId: req.requestId || crypto.randomUUID(),
150
+ });
151
+ return;
152
+ }
153
+ if (message.includes('Not a valid YouTube URL')) {
154
+ res.status(400).json({
155
+ success: false,
156
+ error: {
157
+ type: 'invalid_youtube_url',
158
+ message,
159
+ docs: 'https://webpeel.dev/docs/errors#invalid-youtube-url',
160
+ },
161
+ requestId: req.requestId || crypto.randomUUID(),
162
+ });
163
+ return;
164
+ }
165
+ res.status(500).json({
166
+ success: false,
167
+ error: {
168
+ type: 'extraction_failed',
169
+ message: 'Failed to extract YouTube transcript. The video page may have changed or the video is unavailable.',
170
+ hint: process.env.NODE_ENV !== 'production' ? message : undefined,
171
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
172
+ },
173
+ requestId: req.requestId || crypto.randomUUID(),
174
+ });
175
+ }
176
+ });
177
+ return router;
178
+ }
@@ -671,7 +671,10 @@ export function createUserRouter() {
671
671
  router.post('/v1/keys', jwtAuth, async (req, res) => {
672
672
  try {
673
673
  const { userId } = req.user;
674
- const { name, expiresIn } = req.body;
674
+ const { name, expiresIn, scope } = req.body;
675
+ // Validate scope — only allow known values; default to 'full'
676
+ const validScopes = ['full', 'read', 'restricted'];
677
+ const keyScope = validScopes.includes(scope) ? scope : 'full';
675
678
  // Parse optional expiration
676
679
  const expiresAt = parseExpiresIn(expiresIn);
677
680
  // Generate API key
@@ -679,15 +682,16 @@ export function createUserRouter() {
679
682
  const keyHash = crypto.createHash('sha256').update(apiKey).digest('hex');
680
683
  const keyPrefix = PostgresAuthStore.getKeyPrefix(apiKey);
681
684
  // Store API key
682
- const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at)
683
- VALUES ($1, $2, $3, $4, $5)
684
- RETURNING id, key_prefix, name, created_at, expires_at`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt]);
685
+ const result = await pool.query(`INSERT INTO api_keys (user_id, key_hash, key_prefix, name, expires_at, scope)
686
+ VALUES ($1, $2, $3, $4, $5, $6)
687
+ RETURNING id, key_prefix, name, created_at, expires_at, scope`, [userId, keyHash, keyPrefix, name || 'Unnamed Key', expiresAt, keyScope]);
685
688
  const key = result.rows[0];
686
689
  res.status(201).json({
687
690
  id: key.id,
688
691
  key: apiKey, // SECURITY: Only returned once
689
692
  prefix: key.key_prefix,
690
693
  name: key.name,
694
+ scope: key.scope,
691
695
  createdAt: key.created_at,
692
696
  expiresAt: key.expires_at,
693
697
  });
@@ -731,7 +735,7 @@ export function createUserRouter() {
731
735
  router.get('/v1/keys', jwtAuth, async (req, res) => {
732
736
  try {
733
737
  const { userId } = req.user;
734
- const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at
738
+ const result = await pool.query(`SELECT id, key_prefix, name, is_active, created_at, last_used_at, expires_at, scope
735
739
  FROM api_keys
736
740
  WHERE user_id = $1
737
741
  ORDER BY created_at DESC`, [userId]);
@@ -745,6 +749,7 @@ export function createUserRouter() {
745
749
  prefix: key.key_prefix,
746
750
  name: key.name,
747
751
  isActive: key.is_active,
752
+ scope: key.scope || 'full',
748
753
  createdAt: key.created_at,
749
754
  lastUsedAt: key.last_used_at,
750
755
  expiresAt: key.expires_at,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.20.21",
3
+ "version": "0.21.0",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",