webpeel 0.20.21 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/fetch.js +51 -0
- package/dist/core/dns-cache.js +15 -0
- package/dist/core/http-fetch.js +4 -2
- package/dist/core/pipeline.d.ts +2 -0
- package/dist/core/pipeline.js +14 -2
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/server/app.js +32 -4
- package/dist/server/auth-store.d.ts +1 -0
- package/dist/server/middleware/auth.d.ts +7 -0
- package/dist/server/middleware/auth.js +4 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/pg-auth-store.d.ts +7 -0
- package/dist/server/pg-auth-store.js +32 -0
- package/dist/server/routes/activity.js +5 -0
- package/dist/server/routes/fetch.js +22 -0
- package/dist/server/routes/reader.js +5 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/users.js +10 -5
- package/package.json +1 -1
|
@@ -205,6 +205,55 @@ export async function runFetch(url, options) {
|
|
|
205
205
|
console.error(usageCheck.message);
|
|
206
206
|
process.exit(1);
|
|
207
207
|
}
|
|
208
|
+
// ── --export: YouTube transcript download (early exit) ────────────────
|
|
209
|
+
if (options.export) {
|
|
210
|
+
const exportFmt = options.export.toLowerCase();
|
|
211
|
+
const validExportFmts = ['srt', 'txt', 'md', 'json'];
|
|
212
|
+
if (!validExportFmts.includes(exportFmt)) {
|
|
213
|
+
console.error(`Error: --export format must be one of: ${validExportFmts.join(', ')}`);
|
|
214
|
+
process.exit(1);
|
|
215
|
+
}
|
|
216
|
+
const exportCfg = loadConfig();
|
|
217
|
+
const exportApiKey = exportCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
218
|
+
const exportApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
219
|
+
if (!exportApiKey) {
|
|
220
|
+
console.error('No API key configured. Run: webpeel auth <your-key>');
|
|
221
|
+
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
222
|
+
process.exit(2);
|
|
223
|
+
}
|
|
224
|
+
const lang = options.language || 'en';
|
|
225
|
+
const exportUrl = `${exportApiUrl}/v1/transcript/export?url=${encodeURIComponent(url)}&format=${exportFmt}&language=${lang}`;
|
|
226
|
+
const exportRes = await fetch(exportUrl, {
|
|
227
|
+
headers: { 'Authorization': `Bearer ${exportApiKey}` },
|
|
228
|
+
signal: AbortSignal.timeout(options.timeout ?? 90000),
|
|
229
|
+
});
|
|
230
|
+
if (!exportRes.ok) {
|
|
231
|
+
const errBody = await exportRes.text().catch(() => '');
|
|
232
|
+
try {
|
|
233
|
+
const errJson = JSON.parse(errBody);
|
|
234
|
+
const msg = errJson?.error?.message || errJson?.message || exportRes.statusText;
|
|
235
|
+
console.error(`Export failed (${exportRes.status}): ${msg}`);
|
|
236
|
+
}
|
|
237
|
+
catch {
|
|
238
|
+
console.error(`Export failed (${exportRes.status}): ${exportRes.statusText}`);
|
|
239
|
+
}
|
|
240
|
+
process.exit(1);
|
|
241
|
+
}
|
|
242
|
+
const exportContent = await exportRes.text();
|
|
243
|
+
if (options.output) {
|
|
244
|
+
writeFileSync(options.output, exportContent, 'utf-8');
|
|
245
|
+
if (!options.silent) {
|
|
246
|
+
console.error(`Transcript saved to: ${options.output}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
process.stdout.write(exportContent);
|
|
251
|
+
if (!exportContent.endsWith('\n'))
|
|
252
|
+
process.stdout.write('\n');
|
|
253
|
+
}
|
|
254
|
+
await cleanup();
|
|
255
|
+
process.exit(0);
|
|
256
|
+
}
|
|
208
257
|
// Check cache first (before spinner/network)
|
|
209
258
|
// Default: 5m TTL for all CLI fetches unless --no-cache is set
|
|
210
259
|
let cacheTtlMs;
|
|
@@ -1077,6 +1126,8 @@ export function registerFetchCommands(program) {
|
|
|
1077
1126
|
.option('--content-only', 'Output only the raw content field (no metadata, no JSON wrapper) — ideal for piping to LLMs')
|
|
1078
1127
|
.option('--progress', 'Show engine escalation steps (simple → browser → stealth) with timing')
|
|
1079
1128
|
.option('--stdin', 'Read HTML from stdin instead of fetching a URL — converts to markdown')
|
|
1129
|
+
.option('--export <format>', 'Export YouTube transcript in the given format: srt, txt, md, json')
|
|
1130
|
+
.option('--output <file>', 'Write output to a file instead of stdout')
|
|
1080
1131
|
.action(async (url, options) => {
|
|
1081
1132
|
if (options.stdin) {
|
|
1082
1133
|
await runStdin(options);
|
package/dist/core/dns-cache.js
CHANGED
|
@@ -40,6 +40,21 @@ const DNS_WARMUP_DOMAINS = [
|
|
|
40
40
|
'tools.ietf.org',
|
|
41
41
|
'unicode.org',
|
|
42
42
|
'www.bbc.com',
|
|
43
|
+
'bbc.co.uk',
|
|
44
|
+
'stripe.com',
|
|
45
|
+
'docs.stripe.com',
|
|
46
|
+
'vuejs.org',
|
|
47
|
+
'angular.io',
|
|
48
|
+
'www.washingtonpost.com',
|
|
49
|
+
'www.theguardian.com',
|
|
50
|
+
'techcrunch.com',
|
|
51
|
+
'www.wired.com',
|
|
52
|
+
'arstechnica.com',
|
|
53
|
+
'docs.google.com',
|
|
54
|
+
'drive.google.com',
|
|
55
|
+
'www.notion.so',
|
|
56
|
+
'www.producthunt.com',
|
|
57
|
+
'www.crunchbase.com',
|
|
43
58
|
'news.google.com',
|
|
44
59
|
'www.youtube.com',
|
|
45
60
|
'example.com',
|
package/dist/core/http-fetch.js
CHANGED
|
@@ -42,11 +42,13 @@ const HTTP_STATUS_TEXT = {
|
|
|
42
42
|
// ── HTTP connection pool ──────────────────────────────────────────────────────
|
|
43
43
|
function createHttpPool() {
|
|
44
44
|
return new Agent({
|
|
45
|
-
connections:
|
|
46
|
-
pipelining:
|
|
45
|
+
connections: 50,
|
|
46
|
+
pipelining: 10,
|
|
47
47
|
keepAliveTimeout: 60000,
|
|
48
48
|
keepAliveMaxTimeout: 60000,
|
|
49
49
|
allowH2: true,
|
|
50
|
+
headersTimeout: 10000,
|
|
51
|
+
bodyTimeout: 30000,
|
|
50
52
|
connect: {
|
|
51
53
|
lookup: cachedLookup,
|
|
52
54
|
},
|
package/dist/core/pipeline.d.ts
CHANGED
|
@@ -75,6 +75,8 @@ export interface PipelineContext {
|
|
|
75
75
|
domainApiHandled?: boolean;
|
|
76
76
|
/** True when server returned pre-rendered markdown (Content-Type: text/markdown) */
|
|
77
77
|
serverMarkdown?: boolean;
|
|
78
|
+
/** True when HTTP fetch completed in < 500ms — enables fast path (skip challenge detection) */
|
|
79
|
+
fastPath?: boolean;
|
|
78
80
|
/** Non-fatal warnings accumulated during the pipeline run */
|
|
79
81
|
warnings: string[];
|
|
80
82
|
/** Raw HTML size in characters (measured from fetched content before any conversion) */
|
package/dist/core/pipeline.js
CHANGED
|
@@ -399,7 +399,17 @@ export async function fetchContent(ctx) {
|
|
|
399
399
|
}
|
|
400
400
|
throw fetchError;
|
|
401
401
|
}
|
|
402
|
-
ctx.timer.end('fetch');
|
|
402
|
+
const fetchDuration = ctx.timer.end('fetch');
|
|
403
|
+
// Fast path: if a plain HTTP fetch completed quickly with real HTML content,
|
|
404
|
+
// mark it so post-processing can skip expensive heuristics (challenge detection).
|
|
405
|
+
// Only applies to non-browser fetches that succeeded with HTML content.
|
|
406
|
+
if (fetchDuration < 500 &&
|
|
407
|
+
!ctx.render &&
|
|
408
|
+
fetchResult.statusCode === 200 &&
|
|
409
|
+
(fetchResult.contentType || '').includes('html') &&
|
|
410
|
+
(fetchResult.html?.length || 0) > 200) {
|
|
411
|
+
ctx.fastPath = true;
|
|
412
|
+
}
|
|
403
413
|
// Auto-scroll to load lazy content, then grab fresh HTML
|
|
404
414
|
if (needsAutoScroll && fetchResult.page) {
|
|
405
415
|
try {
|
|
@@ -927,7 +937,9 @@ export async function postProcess(ctx) {
|
|
|
927
937
|
// === Challenge / bot-protection page detection ===
|
|
928
938
|
// If the extracted content looks like a challenge page (not real content),
|
|
929
939
|
// mark it and try the search-as-proxy fallback to get the real info.
|
|
930
|
-
|
|
940
|
+
// Fast path: skip this check for HTTP fetches that completed in < 500ms —
|
|
941
|
+
// a fast successful response is virtually never a challenge page.
|
|
942
|
+
if (!ctx.fastPath && ctx.content && ctx.content.length < 2000) {
|
|
931
943
|
const lowerContent = ctx.content.toLowerCase();
|
|
932
944
|
const challengeSignals = [
|
|
933
945
|
'please verify you are a human',
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript export format converters.
|
|
3
|
+
*
|
|
4
|
+
* Converts YouTube transcript data into SRT, plain text, Markdown, or JSON
|
|
5
|
+
* so users can download transcripts in their preferred format.
|
|
6
|
+
*/
|
|
7
|
+
import type { TranscriptSegment, YouTubeTranscript } from './youtube.js';
|
|
8
|
+
export type { TranscriptSegment, YouTubeTranscript as TranscriptResult };
|
|
9
|
+
/**
|
|
10
|
+
* Format seconds as an SRT timestamp: HH:MM:SS,mmm
|
|
11
|
+
*
|
|
12
|
+
* @example formatSRTTimestamp(3661.5) → "01:01:01,500"
|
|
13
|
+
*/
|
|
14
|
+
export declare function formatSRTTimestamp(seconds: number): string;
|
|
15
|
+
/**
|
|
16
|
+
* Convert transcript segments to SRT subtitle format.
|
|
17
|
+
*
|
|
18
|
+
* SRT structure:
|
|
19
|
+
* ```
|
|
20
|
+
* 1
|
|
21
|
+
* 00:00:01,000 --> 00:00:04,500
|
|
22
|
+
* We're no strangers to love
|
|
23
|
+
*
|
|
24
|
+
* 2
|
|
25
|
+
* 00:00:04,500 --> 00:00:08,000
|
|
26
|
+
* You know the rules and so do I
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export declare function toSRT(segments: TranscriptSegment[]): string;
|
|
30
|
+
/**
|
|
31
|
+
* Convert transcript segments to plain text.
|
|
32
|
+
* One line per segment, no timestamps.
|
|
33
|
+
*/
|
|
34
|
+
export declare function toTXT(segments: TranscriptSegment[]): string;
|
|
35
|
+
/**
|
|
36
|
+
* Convert transcript to a clean Markdown document.
|
|
37
|
+
* Includes title, channel header, and timestamped transcript lines.
|
|
38
|
+
*
|
|
39
|
+
* @param title - Video title
|
|
40
|
+
* @param channel - Channel name
|
|
41
|
+
* @param segments - Transcript segments
|
|
42
|
+
*/
|
|
43
|
+
export declare function toMarkdownDoc(title: string, channel: string, segments: TranscriptSegment[]): string;
|
|
44
|
+
/**
|
|
45
|
+
* Convert full transcript result to pretty-printed JSON.
|
|
46
|
+
*/
|
|
47
|
+
export declare function toJSON(result: YouTubeTranscript): string;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript export format converters.
|
|
3
|
+
*
|
|
4
|
+
* Converts YouTube transcript data into SRT, plain text, Markdown, or JSON
|
|
5
|
+
* so users can download transcripts in their preferred format.
|
|
6
|
+
*/
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Timestamp helpers
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
/**
|
|
11
|
+
* Format seconds as an SRT timestamp: HH:MM:SS,mmm
|
|
12
|
+
*
|
|
13
|
+
* @example formatSRTTimestamp(3661.5) → "01:01:01,500"
|
|
14
|
+
*/
|
|
15
|
+
export function formatSRTTimestamp(seconds) {
|
|
16
|
+
const totalMs = Math.round(Math.max(0, seconds) * 1000);
|
|
17
|
+
const ms = totalMs % 1000;
|
|
18
|
+
const totalSec = Math.floor(totalMs / 1000);
|
|
19
|
+
const s = totalSec % 60;
|
|
20
|
+
const totalMin = Math.floor(totalSec / 60);
|
|
21
|
+
const m = totalMin % 60;
|
|
22
|
+
const h = Math.floor(totalMin / 60);
|
|
23
|
+
return (`${String(h).padStart(2, '0')}:` +
|
|
24
|
+
`${String(m).padStart(2, '0')}:` +
|
|
25
|
+
`${String(s).padStart(2, '0')},` +
|
|
26
|
+
`${String(ms).padStart(3, '0')}`);
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Format seconds as a human-readable timestamp: M:SS or H:MM:SS
|
|
30
|
+
*
|
|
31
|
+
* @example formatReadableTimestamp(125.3) → "2:05"
|
|
32
|
+
*/
|
|
33
|
+
function formatReadableTimestamp(seconds) {
|
|
34
|
+
const totalSec = Math.floor(Math.max(0, seconds));
|
|
35
|
+
const h = Math.floor(totalSec / 3600);
|
|
36
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
37
|
+
const s = totalSec % 60;
|
|
38
|
+
if (h > 0) {
|
|
39
|
+
return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
|
|
40
|
+
}
|
|
41
|
+
return `${m}:${String(s).padStart(2, '0')}`;
|
|
42
|
+
}
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Export functions
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
/**
|
|
47
|
+
* Convert transcript segments to SRT subtitle format.
|
|
48
|
+
*
|
|
49
|
+
* SRT structure:
|
|
50
|
+
* ```
|
|
51
|
+
* 1
|
|
52
|
+
* 00:00:01,000 --> 00:00:04,500
|
|
53
|
+
* We're no strangers to love
|
|
54
|
+
*
|
|
55
|
+
* 2
|
|
56
|
+
* 00:00:04,500 --> 00:00:08,000
|
|
57
|
+
* You know the rules and so do I
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export function toSRT(segments) {
|
|
61
|
+
if (segments.length === 0)
|
|
62
|
+
return '';
|
|
63
|
+
return segments
|
|
64
|
+
.map((seg, i) => {
|
|
65
|
+
const start = formatSRTTimestamp(seg.start);
|
|
66
|
+
const end = formatSRTTimestamp(seg.start + Math.max(0, seg.duration));
|
|
67
|
+
return `${i + 1}\n${start} --> ${end}\n${seg.text}`;
|
|
68
|
+
})
|
|
69
|
+
.join('\n\n');
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Convert transcript segments to plain text.
|
|
73
|
+
* One line per segment, no timestamps.
|
|
74
|
+
*/
|
|
75
|
+
export function toTXT(segments) {
|
|
76
|
+
return segments.map((seg) => seg.text).join('\n');
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Convert transcript to a clean Markdown document.
|
|
80
|
+
* Includes title, channel header, and timestamped transcript lines.
|
|
81
|
+
*
|
|
82
|
+
* @param title - Video title
|
|
83
|
+
* @param channel - Channel name
|
|
84
|
+
* @param segments - Transcript segments
|
|
85
|
+
*/
|
|
86
|
+
export function toMarkdownDoc(title, channel, segments) {
|
|
87
|
+
const lines = [];
|
|
88
|
+
lines.push(`# ${title || 'Transcript'}`);
|
|
89
|
+
lines.push('');
|
|
90
|
+
if (channel) {
|
|
91
|
+
lines.push(`**Channel:** ${channel}`);
|
|
92
|
+
lines.push('');
|
|
93
|
+
}
|
|
94
|
+
lines.push('## Transcript');
|
|
95
|
+
lines.push('');
|
|
96
|
+
for (const seg of segments) {
|
|
97
|
+
const ts = formatReadableTimestamp(seg.start);
|
|
98
|
+
lines.push(`**[${ts}]** ${seg.text}`);
|
|
99
|
+
}
|
|
100
|
+
return lines.join('\n');
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Convert full transcript result to pretty-printed JSON.
|
|
104
|
+
*/
|
|
105
|
+
export function toJSON(result) {
|
|
106
|
+
return JSON.stringify(result, null, 2);
|
|
107
|
+
}
|
package/dist/server/app.js
CHANGED
|
@@ -31,6 +31,7 @@ import { createAskRouter } from './routes/ask.js';
|
|
|
31
31
|
import { createMcpRouter } from './routes/mcp.js';
|
|
32
32
|
import { createDoRouter } from './routes/do.js';
|
|
33
33
|
import { createYouTubeRouter } from './routes/youtube.js';
|
|
34
|
+
import { createTranscriptExportRouter } from './routes/transcript-export.js';
|
|
34
35
|
import { createDeepFetchRouter } from './routes/deep-fetch.js';
|
|
35
36
|
import { createWatchRouter } from './routes/watch.js';
|
|
36
37
|
import pg from 'pg';
|
|
@@ -38,6 +39,7 @@ import { createScreenshotRouter } from './routes/screenshot.js';
|
|
|
38
39
|
import { createDemoRouter } from './routes/demo.js';
|
|
39
40
|
import { createPlaygroundRouter } from './routes/playground.js';
|
|
40
41
|
import { createReaderRouter } from './routes/reader.js';
|
|
42
|
+
import { createSharePublicRouter, createShareRouter } from './routes/share.js';
|
|
41
43
|
import { createJobQueue } from './job-queue.js';
|
|
42
44
|
import { createCompatRouter } from './routes/compat.js';
|
|
43
45
|
import { createCrawlRouter } from './routes/crawl.js';
|
|
@@ -46,6 +48,7 @@ import { createExtractRouter } from './routes/extract.js';
|
|
|
46
48
|
import { createAgentRouter } from './routes/agent.js';
|
|
47
49
|
import { createSessionRouter } from './routes/session.js';
|
|
48
50
|
import { createSentryHooks } from './sentry.js';
|
|
51
|
+
import { requireScope } from './middleware/scope-guard.js';
|
|
49
52
|
import { warmup, cleanup as cleanupFetcher } from '../core/fetcher.js';
|
|
50
53
|
import { registerPremiumHooks } from './premium/index.js';
|
|
51
54
|
import { readFileSync } from 'fs';
|
|
@@ -239,6 +242,9 @@ export function createApp(config = {}) {
|
|
|
239
242
|
app.use(createDemoRouter());
|
|
240
243
|
// Playground endpoint — unauthenticated, CORS-locked to webpeel.dev/localhost
|
|
241
244
|
app.use('/v1/playground', createPlaygroundRouter());
|
|
245
|
+
// Public share endpoint — GET /s/:id (no auth required, must be before reader router)
|
|
246
|
+
// Registered first so valid share IDs are served before falling through to reader's /s/* search
|
|
247
|
+
app.use(createSharePublicRouter(pool));
|
|
242
248
|
// Zero-auth reader API — Jina-style URL prefix (/r/URL) and search (/s/query)
|
|
243
249
|
// Must be BEFORE auth middleware so no API key is required
|
|
244
250
|
app.use(createReaderRouter());
|
|
@@ -246,17 +252,33 @@ export function createApp(config = {}) {
|
|
|
246
252
|
app.use(createAuthMiddleware(authStore));
|
|
247
253
|
// Apply rate limiting middleware globally
|
|
248
254
|
app.use(createRateLimitMiddleware(rateLimiter));
|
|
255
|
+
// Share links — POST /v1/share (auth required, after auth middleware)
|
|
256
|
+
app.use(createShareRouter(pool));
|
|
249
257
|
// First-class native routes (registered before compat so they take precedence)
|
|
250
|
-
|
|
251
|
-
|
|
258
|
+
//
|
|
259
|
+
// Scope guards enforce API key permission scopes; JWT sessions bypass them.
|
|
260
|
+
// For routers with relative paths: app.use(path, guard, router) ← prefix stripped, relative paths match
|
|
261
|
+
// For routers with absolute paths: app.use(path, guard) then app.use(router) ← guard at path, router sees full path
|
|
262
|
+
// /v1/crawl — full or read only (router uses relative paths)
|
|
263
|
+
app.use('/v1/crawl', requireScope('full', 'read'), createCrawlRouter(jobQueue));
|
|
264
|
+
// /v1/map — full or read only (router uses relative paths)
|
|
265
|
+
app.use('/v1/map', requireScope('full', 'read'), createMapRouter());
|
|
266
|
+
// Compat routes (/v1/scrape, /v1/search) — all scopes allowed, no guard needed
|
|
252
267
|
app.use(createCompatRouter(jobQueue));
|
|
253
268
|
app.use(createSessionRouter());
|
|
254
269
|
app.use(createExtractRouter());
|
|
270
|
+
// /v1/deep-fetch — full or read only (router uses absolute paths, guard before router)
|
|
271
|
+
app.use('/v1/deep-fetch', requireScope('full', 'read'));
|
|
255
272
|
app.use(createDeepFetchRouter());
|
|
273
|
+
// /v1/watch — full or read only (router uses absolute paths, guard before router)
|
|
256
274
|
if (pool) {
|
|
275
|
+
app.use('/v1/watch', requireScope('full', 'read'));
|
|
257
276
|
app.use(createWatchRouter(pool));
|
|
258
277
|
}
|
|
278
|
+
// /v1/fetch, /v1/search — all scopes allowed, no guard needed
|
|
259
279
|
app.use(createFetchRouter(authStore));
|
|
280
|
+
// /v1/screenshot — full or read only (router uses absolute paths, guard before router)
|
|
281
|
+
app.use('/v1/screenshot', requireScope('full', 'read'));
|
|
260
282
|
app.use(createScreenshotRouter(authStore));
|
|
261
283
|
app.use(createSearchRouter(authStore));
|
|
262
284
|
app.use(createBillingPortalRouter(pool));
|
|
@@ -266,6 +288,8 @@ export function createApp(config = {}) {
|
|
|
266
288
|
app.use(createActivityRouter(authStore));
|
|
267
289
|
app.use(createCLIUsageRouter());
|
|
268
290
|
app.use(createJobsRouter(jobQueue, authStore));
|
|
291
|
+
// /v1/batch — full or read only (router uses absolute paths, guard before router)
|
|
292
|
+
app.use('/v1/batch', requireScope('full', 'read'));
|
|
269
293
|
app.use(createBatchRouter(jobQueue));
|
|
270
294
|
// Deprecation headers for declining endpoints
|
|
271
295
|
app.use('/v1/answer', (_req, res, next) => {
|
|
@@ -274,11 +298,15 @@ export function createApp(config = {}) {
|
|
|
274
298
|
res.set('Link', '</v1/ask>; rel="successor-version"');
|
|
275
299
|
next();
|
|
276
300
|
});
|
|
301
|
+
// /v1/answer, /v1/ask — all scopes allowed, no guard needed
|
|
277
302
|
app.use(createAnswerRouter());
|
|
278
303
|
app.use(createAskRouter());
|
|
279
|
-
|
|
280
|
-
app.use('/v1/
|
|
304
|
+
// /v1/agent — full or read only (router uses relative paths)
|
|
305
|
+
app.use('/v1/agent', requireScope('full', 'read'), createAgentRouter());
|
|
306
|
+
// /v1/do — full only (router uses relative paths; admin-level operation)
|
|
307
|
+
app.use('/v1/do', requireScope('full'), createDoRouter());
|
|
281
308
|
app.use(createYouTubeRouter());
|
|
309
|
+
app.use(createTranscriptExportRouter());
|
|
282
310
|
app.use(createMcpRouter(authStore, pool));
|
|
283
311
|
// 404 handler
|
|
284
312
|
app.use((req, res) => {
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { Request, Response, NextFunction } from 'express';
|
|
13
13
|
import { AuthStore, ApiKeyInfo } from '../auth-store.js';
|
|
14
|
+
import { KeyScope } from '../pg-auth-store.js';
|
|
14
15
|
import '../types.js';
|
|
15
16
|
declare global {
|
|
16
17
|
namespace Express {
|
|
@@ -22,6 +23,12 @@ declare global {
|
|
|
22
23
|
softLimited: boolean;
|
|
23
24
|
extraUsageAvailable: boolean;
|
|
24
25
|
};
|
|
26
|
+
/**
|
|
27
|
+
* Permission scope of the authenticated API key.
|
|
28
|
+
* Undefined when authenticated via JWT (dashboard session) — JWT users bypass scope enforcement.
|
|
29
|
+
* Set to 'full' | 'read' | 'restricted' for API key requests.
|
|
30
|
+
*/
|
|
31
|
+
keyScope?: KeyScope;
|
|
25
32
|
}
|
|
26
33
|
}
|
|
27
34
|
}
|
|
@@ -204,6 +204,10 @@ export function createAuthMiddleware(authStore) {
|
|
|
204
204
|
softLimited,
|
|
205
205
|
extraUsageAvailable,
|
|
206
206
|
};
|
|
207
|
+
// Attach API key scope (only for API key auth; JWT users get undefined = bypass scope checks)
|
|
208
|
+
if (keyInfo) {
|
|
209
|
+
req.keyScope = keyInfo.scope || 'full';
|
|
210
|
+
}
|
|
207
211
|
next();
|
|
208
212
|
}
|
|
209
213
|
catch (_error) {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scope enforcement middleware for API key permission scoping.
|
|
3
|
+
*
|
|
4
|
+
* Keys have one of three scopes:
|
|
5
|
+
* 'full' — all endpoints (default)
|
|
6
|
+
* 'read' — read/fetch operations only
|
|
7
|
+
* 'restricted' — /v1/scrape only (for limited sharing)
|
|
8
|
+
*
|
|
9
|
+
* JWT-authenticated requests (dashboard sessions) bypass scope enforcement:
|
|
10
|
+
* req.keyScope is undefined for JWT requests, which are always allowed through.
|
|
11
|
+
*/
|
|
12
|
+
import { Request, Response, NextFunction } from 'express';
|
|
13
|
+
import { KeyScope } from '../pg-auth-store.js';
|
|
14
|
+
/**
|
|
15
|
+
* Middleware factory that enforces API key scope.
|
|
16
|
+
* Pass the set of scopes that are permitted to access the guarded route.
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* // Only full-access keys may manage billing:
|
|
20
|
+
* router.post('/v1/billing', requireScope('full'), handler);
|
|
21
|
+
*
|
|
22
|
+
* // Read and full keys may scrape:
|
|
23
|
+
* app.use('/v1/scrape', requireScope('full', 'read', 'restricted'), scrapeRouter);
|
|
24
|
+
*/
|
|
25
|
+
export declare function requireScope(...allowedScopes: KeyScope[]): (req: Request, res: Response, next: NextFunction) => void;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scope enforcement middleware for API key permission scoping.
|
|
3
|
+
*
|
|
4
|
+
* Keys have one of three scopes:
|
|
5
|
+
* 'full' — all endpoints (default)
|
|
6
|
+
* 'read' — read/fetch operations only
|
|
7
|
+
* 'restricted' — /v1/scrape only (for limited sharing)
|
|
8
|
+
*
|
|
9
|
+
* JWT-authenticated requests (dashboard sessions) bypass scope enforcement:
|
|
10
|
+
* req.keyScope is undefined for JWT requests, which are always allowed through.
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Middleware factory that enforces API key scope.
|
|
14
|
+
* Pass the set of scopes that are permitted to access the guarded route.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* // Only full-access keys may manage billing:
|
|
18
|
+
* router.post('/v1/billing', requireScope('full'), handler);
|
|
19
|
+
*
|
|
20
|
+
* // Read and full keys may scrape:
|
|
21
|
+
* app.use('/v1/scrape', requireScope('full', 'read', 'restricted'), scrapeRouter);
|
|
22
|
+
*/
|
|
23
|
+
export function requireScope(...allowedScopes) {
|
|
24
|
+
return (req, res, next) => {
|
|
25
|
+
// JWT sessions (req.keyScope === undefined) always pass through.
|
|
26
|
+
// Scope enforcement only applies to API key requests.
|
|
27
|
+
if (req.keyScope === undefined) {
|
|
28
|
+
return next();
|
|
29
|
+
}
|
|
30
|
+
if (!allowedScopes.includes(req.keyScope)) {
|
|
31
|
+
res.status(403).json({
|
|
32
|
+
success: false,
|
|
33
|
+
error: {
|
|
34
|
+
type: 'insufficient_scope',
|
|
35
|
+
message: `This API key has '${req.keyScope}' scope. This endpoint requires: ${allowedScopes.join(' or ')}.`,
|
|
36
|
+
docs: 'https://webpeel.dev/docs/authentication#scopes',
|
|
37
|
+
hint: 'Create a new API key with the required scope in your dashboard.',
|
|
38
|
+
},
|
|
39
|
+
requestId: req.requestId,
|
|
40
|
+
});
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
next();
|
|
44
|
+
};
|
|
45
|
+
}
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* Uses SHA-256 hashing for API keys and tracks WEEKLY usage with burst limits
|
|
4
4
|
*/
|
|
5
5
|
import { AuthStore, ApiKeyInfo } from './auth-store.js';
|
|
6
|
+
/** Permission scope for an API key */
|
|
7
|
+
export type KeyScope = 'full' | 'read' | 'restricted';
|
|
6
8
|
export interface WeeklyUsageInfo {
|
|
7
9
|
week: string;
|
|
8
10
|
basicCount: number;
|
|
@@ -39,6 +41,11 @@ export interface ExtraUsageInfo {
|
|
|
39
41
|
export declare class PostgresAuthStore implements AuthStore {
|
|
40
42
|
private pool;
|
|
41
43
|
constructor(connectionString?: string);
|
|
44
|
+
/**
|
|
45
|
+
* Run idempotent schema migrations.
|
|
46
|
+
* Safe to call on every startup — all statements use IF NOT EXISTS / IF EXISTS.
|
|
47
|
+
*/
|
|
48
|
+
private ensureSchema;
|
|
42
49
|
/**
|
|
43
50
|
* Hash API key with SHA-256
|
|
44
51
|
* SECURITY: Never store raw API keys
|
|
@@ -34,6 +34,36 @@ export class PostgresAuthStore {
|
|
|
34
34
|
idleTimeoutMillis: 30000,
|
|
35
35
|
connectionTimeoutMillis: 10000,
|
|
36
36
|
});
|
|
37
|
+
// Run idempotent schema migrations on startup
|
|
38
|
+
this.ensureSchema().catch(err => console.error('[pg-auth-store] Schema migration failed:', err));
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Run idempotent schema migrations.
|
|
42
|
+
* Safe to call on every startup — all statements use IF NOT EXISTS / IF EXISTS.
|
|
43
|
+
*/
|
|
44
|
+
async ensureSchema() {
|
|
45
|
+
await this.pool.query(`
|
|
46
|
+
ALTER TABLE api_keys ADD COLUMN IF NOT EXISTS scope VARCHAR(20) NOT NULL DEFAULT 'full';
|
|
47
|
+
`);
|
|
48
|
+
await this.pool.query(`
|
|
49
|
+
CREATE TABLE IF NOT EXISTS shared_reads (
|
|
50
|
+
id VARCHAR(12) PRIMARY KEY,
|
|
51
|
+
url TEXT NOT NULL,
|
|
52
|
+
title TEXT,
|
|
53
|
+
content TEXT NOT NULL,
|
|
54
|
+
tokens INTEGER,
|
|
55
|
+
created_by TEXT REFERENCES users(id),
|
|
56
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
57
|
+
expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '30 days',
|
|
58
|
+
view_count INTEGER DEFAULT 0
|
|
59
|
+
);
|
|
60
|
+
`);
|
|
61
|
+
await this.pool.query(`
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_shared_reads_url ON shared_reads(url);
|
|
63
|
+
`);
|
|
64
|
+
await this.pool.query(`
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_shared_reads_created_by ON shared_reads(created_by);
|
|
66
|
+
`);
|
|
37
67
|
}
|
|
38
68
|
/**
|
|
39
69
|
* Hash API key with SHA-256
|
|
@@ -108,6 +138,7 @@ export class PostgresAuthStore {
|
|
|
108
138
|
ak.user_id,
|
|
109
139
|
ak.key_prefix,
|
|
110
140
|
ak.name,
|
|
141
|
+
ak.scope,
|
|
111
142
|
u.tier,
|
|
112
143
|
u.rate_limit,
|
|
113
144
|
u.weekly_limit,
|
|
@@ -129,6 +160,7 @@ export class PostgresAuthStore {
|
|
|
129
160
|
rateLimit: row.rate_limit,
|
|
130
161
|
accountId: row.user_id,
|
|
131
162
|
createdAt: new Date(),
|
|
163
|
+
scope: row.scope || 'full',
|
|
132
164
|
};
|
|
133
165
|
}
|
|
134
166
|
catch (error) {
|
|
@@ -34,10 +34,12 @@ export function createActivityRouter(authStore) {
|
|
|
34
34
|
SELECT
|
|
35
35
|
id,
|
|
36
36
|
url,
|
|
37
|
+
endpoint,
|
|
37
38
|
method,
|
|
38
39
|
status_code,
|
|
39
40
|
processing_time_ms,
|
|
40
41
|
tokens_used,
|
|
42
|
+
ip_address,
|
|
41
43
|
created_at
|
|
42
44
|
FROM usage_logs
|
|
43
45
|
WHERE user_id = $1
|
|
@@ -49,11 +51,14 @@ export function createActivityRouter(authStore) {
|
|
|
49
51
|
const requests = result.rows.map((row) => ({
|
|
50
52
|
id: row.id,
|
|
51
53
|
url: row.url || 'N/A',
|
|
54
|
+
endpoint: row.endpoint || null,
|
|
52
55
|
status: (row.status_code >= 200 && row.status_code < 300) ? 'success' : 'error',
|
|
53
56
|
responseTime: row.processing_time_ms || 0,
|
|
54
57
|
mode: row.method || 'basic',
|
|
55
58
|
timestamp: row.created_at,
|
|
56
59
|
tokensUsed: row.tokens_used || null,
|
|
60
|
+
ipAddress: row.ip_address || null,
|
|
61
|
+
statusCode: row.status_code || null,
|
|
57
62
|
}));
|
|
58
63
|
res.json({ requests });
|
|
59
64
|
}
|