webpeel 0.21.0 → 0.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/dns-cache.js +15 -0
- package/dist/core/http-fetch.js +4 -2
- package/dist/core/pipeline.d.ts +2 -0
- package/dist/core/pipeline.js +14 -2
- package/dist/server/app.js +17 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +210 -0
- package/dist/server/routes/fetch.js +22 -0
- package/dist/server/routes/reader.js +5 -0
- package/package.json +1 -1
package/dist/core/dns-cache.js
CHANGED
|
@@ -40,6 +40,21 @@ const DNS_WARMUP_DOMAINS = [
|
|
|
40
40
|
'tools.ietf.org',
|
|
41
41
|
'unicode.org',
|
|
42
42
|
'www.bbc.com',
|
|
43
|
+
'bbc.co.uk',
|
|
44
|
+
'stripe.com',
|
|
45
|
+
'docs.stripe.com',
|
|
46
|
+
'vuejs.org',
|
|
47
|
+
'angular.io',
|
|
48
|
+
'www.washingtonpost.com',
|
|
49
|
+
'www.theguardian.com',
|
|
50
|
+
'techcrunch.com',
|
|
51
|
+
'www.wired.com',
|
|
52
|
+
'arstechnica.com',
|
|
53
|
+
'docs.google.com',
|
|
54
|
+
'drive.google.com',
|
|
55
|
+
'www.notion.so',
|
|
56
|
+
'www.producthunt.com',
|
|
57
|
+
'www.crunchbase.com',
|
|
43
58
|
'news.google.com',
|
|
44
59
|
'www.youtube.com',
|
|
45
60
|
'example.com',
|
package/dist/core/http-fetch.js
CHANGED
|
@@ -42,11 +42,13 @@ const HTTP_STATUS_TEXT = {
|
|
|
42
42
|
// ── HTTP connection pool ──────────────────────────────────────────────────────
|
|
43
43
|
function createHttpPool() {
|
|
44
44
|
return new Agent({
|
|
45
|
-
connections:
|
|
46
|
-
pipelining:
|
|
45
|
+
connections: 50,
|
|
46
|
+
pipelining: 10,
|
|
47
47
|
keepAliveTimeout: 60000,
|
|
48
48
|
keepAliveMaxTimeout: 60000,
|
|
49
49
|
allowH2: true,
|
|
50
|
+
headersTimeout: 10000,
|
|
51
|
+
bodyTimeout: 30000,
|
|
50
52
|
connect: {
|
|
51
53
|
lookup: cachedLookup,
|
|
52
54
|
},
|
package/dist/core/pipeline.d.ts
CHANGED
|
@@ -75,6 +75,8 @@ export interface PipelineContext {
|
|
|
75
75
|
domainApiHandled?: boolean;
|
|
76
76
|
/** True when server returned pre-rendered markdown (Content-Type: text/markdown) */
|
|
77
77
|
serverMarkdown?: boolean;
|
|
78
|
+
/** True when HTTP fetch completed in < 500ms — enables fast path (skip challenge detection) */
|
|
79
|
+
fastPath?: boolean;
|
|
78
80
|
/** Non-fatal warnings accumulated during the pipeline run */
|
|
79
81
|
warnings: string[];
|
|
80
82
|
/** Raw HTML size in characters (measured from fetched content before any conversion) */
|
package/dist/core/pipeline.js
CHANGED
|
@@ -399,7 +399,17 @@ export async function fetchContent(ctx) {
|
|
|
399
399
|
}
|
|
400
400
|
throw fetchError;
|
|
401
401
|
}
|
|
402
|
-
ctx.timer.end('fetch');
|
|
402
|
+
const fetchDuration = ctx.timer.end('fetch');
|
|
403
|
+
// Fast path: if a plain HTTP fetch completed quickly with real HTML content,
|
|
404
|
+
// mark it so post-processing can skip expensive heuristics (challenge detection).
|
|
405
|
+
// Only applies to non-browser fetches that succeeded with HTML content.
|
|
406
|
+
if (fetchDuration < 500 &&
|
|
407
|
+
!ctx.render &&
|
|
408
|
+
fetchResult.statusCode === 200 &&
|
|
409
|
+
(fetchResult.contentType || '').includes('html') &&
|
|
410
|
+
(fetchResult.html?.length || 0) > 200) {
|
|
411
|
+
ctx.fastPath = true;
|
|
412
|
+
}
|
|
403
413
|
// Auto-scroll to load lazy content, then grab fresh HTML
|
|
404
414
|
if (needsAutoScroll && fetchResult.page) {
|
|
405
415
|
try {
|
|
@@ -927,7 +937,9 @@ export async function postProcess(ctx) {
|
|
|
927
937
|
// === Challenge / bot-protection page detection ===
|
|
928
938
|
// If the extracted content looks like a challenge page (not real content),
|
|
929
939
|
// mark it and try the search-as-proxy fallback to get the real info.
|
|
930
|
-
|
|
940
|
+
// Fast path: skip this check for HTTP fetches that completed in < 500ms —
|
|
941
|
+
// a fast successful response is virtually never a challenge page.
|
|
942
|
+
if (!ctx.fastPath && ctx.content && ctx.content.length < 2000) {
|
|
931
943
|
const lowerContent = ctx.content.toLowerCase();
|
|
932
944
|
const challengeSignals = [
|
|
933
945
|
'please verify you are a human',
|
package/dist/server/app.js
CHANGED
|
@@ -49,6 +49,7 @@ import { createAgentRouter } from './routes/agent.js';
|
|
|
49
49
|
import { createSessionRouter } from './routes/session.js';
|
|
50
50
|
import { createSentryHooks } from './sentry.js';
|
|
51
51
|
import { requireScope } from './middleware/scope-guard.js';
|
|
52
|
+
import { createCacheWarmRouter, startCacheWarmer } from './routes/cache-warm.js';
|
|
52
53
|
import { warmup, cleanup as cleanupFetcher } from '../core/fetcher.js';
|
|
53
54
|
import { registerPremiumHooks } from './premium/index.js';
|
|
54
55
|
import { readFileSync } from 'fs';
|
|
@@ -238,6 +239,9 @@ export function createApp(config = {}) {
|
|
|
238
239
|
app.get('/docs/api', (_req, res) => {
|
|
239
240
|
res.redirect('/openapi.yaml');
|
|
240
241
|
});
|
|
242
|
+
// Internal cache-warming endpoints — unauthenticated (self-auth via bearer token)
|
|
243
|
+
// Must be BEFORE auth middleware so the CF Worker can call without an API key
|
|
244
|
+
app.use(createCacheWarmRouter(pool));
|
|
241
245
|
// Demo endpoint — unauthenticated, must be before auth middleware
|
|
242
246
|
app.use(createDemoRouter());
|
|
243
247
|
// Playground endpoint — unauthenticated, CORS-locked to webpeel.dev/localhost
|
|
@@ -398,9 +402,22 @@ export function startServer(config = {}) {
|
|
|
398
402
|
void warmup().catch((error) => {
|
|
399
403
|
log.warn('Browser warmup failed', { error: error instanceof Error ? error.message : String(error) });
|
|
400
404
|
});
|
|
405
|
+
// Build a dedicated pool for the cache warmer (separate from the app pool inside createApp)
|
|
406
|
+
const warmerPool = process.env.DATABASE_URL
|
|
407
|
+
? new pg.Pool({
|
|
408
|
+
connectionString: process.env.DATABASE_URL,
|
|
409
|
+
ssl: process.env.NODE_ENV === 'production' ? { rejectUnauthorized: true } : false,
|
|
410
|
+
max: 2, // small pool — warmer only needs occasional queries
|
|
411
|
+
})
|
|
412
|
+
: null;
|
|
401
413
|
const server = app.listen(port, () => {
|
|
402
414
|
log.info(`WebPeel API server listening on port ${port}`);
|
|
403
415
|
log.info(`Health: http://localhost:${port}/health Fetch: /v1/fetch Search: /v1/search`);
|
|
416
|
+
// Start cache warmer only when opted-in
|
|
417
|
+
if (process.env.ENABLE_CACHE_WARM === 'true') {
|
|
418
|
+
log.info('Cache warming enabled (ENABLE_CACHE_WARM=true)');
|
|
419
|
+
startCacheWarmer(warmerPool);
|
|
420
|
+
}
|
|
404
421
|
});
|
|
405
422
|
// Graceful shutdown
|
|
406
423
|
const shutdown = () => {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache Pre-Warming Routes
|
|
3
|
+
*
|
|
4
|
+
* GET /internal/popular-urls — Top N URLs fetched in the last 24h (for CF Worker)
|
|
5
|
+
* GET /internal/cache-status — Current warmer state (warmed URLs, last run time)
|
|
6
|
+
*
|
|
7
|
+
* Both routes are mounted BEFORE auth middleware so they're accessible internally.
|
|
8
|
+
* /internal/popular-urls is protected by CACHE_WARM_SECRET bearer token when set.
|
|
9
|
+
*
|
|
10
|
+
* startCacheWarmer() — server-side self-warming (opt-in via ENABLE_CACHE_WARM=true)
|
|
11
|
+
*/
|
|
12
|
+
import { Router } from 'express';
|
|
13
|
+
import pg from 'pg';
|
|
14
|
+
export declare function createCacheWarmRouter(pool: pg.Pool | null): Router;
|
|
15
|
+
/**
|
|
16
|
+
* startCacheWarmer — server-side self-warming (fallback when no CF Worker).
|
|
17
|
+
*
|
|
18
|
+
* Every `intervalMs` (default 2 min):
|
|
19
|
+
* 1. Queries /internal/popular-urls (via the DB, not HTTP)
|
|
20
|
+
* 2. Fetches each URL through /r/<url> with concurrency 5
|
|
21
|
+
* 3. Updates warmerState for /internal/cache-status
|
|
22
|
+
*
|
|
23
|
+
* Only started if ENABLE_CACHE_WARM=true.
|
|
24
|
+
*/
|
|
25
|
+
export declare function startCacheWarmer(pool: pg.Pool | null, intervalMs?: number): void;
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cache Pre-Warming Routes
|
|
3
|
+
*
|
|
4
|
+
* GET /internal/popular-urls — Top N URLs fetched in the last 24h (for CF Worker)
|
|
5
|
+
* GET /internal/cache-status — Current warmer state (warmed URLs, last run time)
|
|
6
|
+
*
|
|
7
|
+
* Both routes are mounted BEFORE auth middleware so they're accessible internally.
|
|
8
|
+
* /internal/popular-urls is protected by CACHE_WARM_SECRET bearer token when set.
|
|
9
|
+
*
|
|
10
|
+
* startCacheWarmer() — server-side self-warming (opt-in via ENABLE_CACHE_WARM=true)
|
|
11
|
+
*/
|
|
12
|
+
import { Router } from 'express';
|
|
13
|
+
import { createLogger } from '../logger.js';
|
|
14
|
+
const log = createLogger('cache-warm');
|
|
15
|
+
// ─── Fallback URLs ────────────────────────────────────────────────────────────
|
|
16
|
+
// Used when the DB has no usage data yet (new deployment / empty DB).
|
|
17
|
+
const FALLBACK_URLS = [
|
|
18
|
+
'https://www.bbc.com/news',
|
|
19
|
+
'https://news.ycombinator.com',
|
|
20
|
+
'https://github.com',
|
|
21
|
+
'https://en.wikipedia.org/wiki/Main_Page',
|
|
22
|
+
'https://www.reuters.com',
|
|
23
|
+
'https://techcrunch.com',
|
|
24
|
+
'https://stripe.com/docs',
|
|
25
|
+
'https://developer.mozilla.org',
|
|
26
|
+
'https://react.dev',
|
|
27
|
+
'https://docs.python.org/3/',
|
|
28
|
+
'https://nodejs.org/en/docs',
|
|
29
|
+
'https://www.npmjs.com',
|
|
30
|
+
'https://vercel.com/docs',
|
|
31
|
+
'https://nextjs.org/docs',
|
|
32
|
+
'https://tailwindcss.com/docs',
|
|
33
|
+
'https://www.typescriptlang.org/docs/',
|
|
34
|
+
'https://docs.render.com',
|
|
35
|
+
'https://cloudflare.com/docs',
|
|
36
|
+
'https://aws.amazon.com/documentation/',
|
|
37
|
+
'https://docs.github.com',
|
|
38
|
+
'https://www.nytimes.com',
|
|
39
|
+
'https://www.theguardian.com',
|
|
40
|
+
'https://arstechnica.com',
|
|
41
|
+
'https://www.wired.com',
|
|
42
|
+
'https://www.bloomberg.com/technology',
|
|
43
|
+
'https://lobste.rs',
|
|
44
|
+
'https://www.producthunt.com',
|
|
45
|
+
'https://stackoverflow.com',
|
|
46
|
+
'https://css-tricks.com',
|
|
47
|
+
'https://web.dev',
|
|
48
|
+
];
|
|
49
|
+
// ─── In-memory warmer state ───────────────────────────────────────────────────
|
|
50
|
+
const warmerState = {
|
|
51
|
+
warmedUrls: new Set(),
|
|
52
|
+
lastWarmTime: null,
|
|
53
|
+
};
|
|
54
|
+
// ─── Router ──────────────────────────────────────────────────────────────────
|
|
55
|
+
export function createCacheWarmRouter(pool) {
|
|
56
|
+
const router = Router();
|
|
57
|
+
// GET /internal/popular-urls
|
|
58
|
+
router.get('/internal/popular-urls', async (req, res) => {
|
|
59
|
+
// Auth check — if CACHE_WARM_SECRET is set, require it
|
|
60
|
+
const secret = process.env.CACHE_WARM_SECRET;
|
|
61
|
+
if (secret) {
|
|
62
|
+
const authHeader = req.headers['authorization'] || '';
|
|
63
|
+
const token = authHeader.startsWith('Bearer ') ? authHeader.slice(7) : '';
|
|
64
|
+
if (token !== secret) {
|
|
65
|
+
res.status(401).json({
|
|
66
|
+
success: false,
|
|
67
|
+
error: { type: 'unauthorized', message: 'Invalid or missing bearer token.' },
|
|
68
|
+
});
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
let urls = [];
|
|
73
|
+
// Query DB if available
|
|
74
|
+
if (pool) {
|
|
75
|
+
try {
|
|
76
|
+
const result = await pool.query(`
|
|
77
|
+
SELECT url, COUNT(*) as fetch_count
|
|
78
|
+
FROM usage_logs
|
|
79
|
+
WHERE created_at > NOW() - INTERVAL '24 hours'
|
|
80
|
+
AND url IS NOT NULL
|
|
81
|
+
AND status_code >= 200 AND status_code < 300
|
|
82
|
+
AND url NOT LIKE '%localhost%'
|
|
83
|
+
AND url NOT LIKE '%127.0.0.1%'
|
|
84
|
+
AND url NOT LIKE '%169.254%'
|
|
85
|
+
GROUP BY url
|
|
86
|
+
ORDER BY fetch_count DESC
|
|
87
|
+
LIMIT 50
|
|
88
|
+
`);
|
|
89
|
+
urls = result.rows.map((row) => ({
|
|
90
|
+
url: row.url,
|
|
91
|
+
count: parseInt(row.fetch_count, 10),
|
|
92
|
+
}));
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
log.warn('Failed to query usage_logs, falling back to static list', {
|
|
96
|
+
error: err?.message,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Fall back to static list if no DB data
|
|
101
|
+
if (urls.length === 0) {
|
|
102
|
+
log.info('No usage data found, using fallback URL list');
|
|
103
|
+
urls = FALLBACK_URLS.map((url) => ({ url, count: 0 }));
|
|
104
|
+
}
|
|
105
|
+
res.json({
|
|
106
|
+
urls,
|
|
107
|
+
total: urls.length,
|
|
108
|
+
generatedAt: new Date().toISOString(),
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
// GET /internal/cache-status
|
|
112
|
+
router.get('/internal/cache-status', (_req, res) => {
|
|
113
|
+
res.json({
|
|
114
|
+
warmedUrls: Array.from(warmerState.warmedUrls),
|
|
115
|
+
urlCount: warmerState.warmedUrls.size,
|
|
116
|
+
lastWarmTime: warmerState.lastWarmTime?.toISOString() ?? null,
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
return router;
|
|
120
|
+
}
|
|
121
|
+
// ─── Self-Warming ─────────────────────────────────────────────────────────────
|
|
122
|
+
/**
|
|
123
|
+
* startCacheWarmer — server-side self-warming (fallback when no CF Worker).
|
|
124
|
+
*
|
|
125
|
+
* Every `intervalMs` (default 2 min):
|
|
126
|
+
* 1. Queries /internal/popular-urls (via the DB, not HTTP)
|
|
127
|
+
* 2. Fetches each URL through /r/<url> with concurrency 5
|
|
128
|
+
* 3. Updates warmerState for /internal/cache-status
|
|
129
|
+
*
|
|
130
|
+
* Only started if ENABLE_CACHE_WARM=true.
|
|
131
|
+
*/
|
|
132
|
+
export function startCacheWarmer(pool, intervalMs = 120_000) {
|
|
133
|
+
log.info('Cache warmer started', { intervalMs });
|
|
134
|
+
const runWarm = async () => {
|
|
135
|
+
const t0 = Date.now();
|
|
136
|
+
log.info('Cache warm cycle starting');
|
|
137
|
+
// Determine base URL
|
|
138
|
+
const base = process.env.RENDER_EXTERNAL_URL?.replace(/\/$/, '') ||
|
|
139
|
+
`http://localhost:${process.env.PORT || 3000}`;
|
|
140
|
+
// Step 1: Fetch popular URLs (same logic as the endpoint)
|
|
141
|
+
let urls = [];
|
|
142
|
+
if (pool) {
|
|
143
|
+
try {
|
|
144
|
+
const result = await pool.query(`
|
|
145
|
+
SELECT url, COUNT(*) as fetch_count
|
|
146
|
+
FROM usage_logs
|
|
147
|
+
WHERE created_at > NOW() - INTERVAL '24 hours'
|
|
148
|
+
AND url IS NOT NULL
|
|
149
|
+
AND status_code >= 200 AND status_code < 300
|
|
150
|
+
AND url NOT LIKE '%localhost%'
|
|
151
|
+
AND url NOT LIKE '%127.0.0.1%'
|
|
152
|
+
AND url NOT LIKE '%169.254%'
|
|
153
|
+
GROUP BY url
|
|
154
|
+
ORDER BY fetch_count DESC
|
|
155
|
+
LIMIT 50
|
|
156
|
+
`);
|
|
157
|
+
urls = result.rows.map((row) => ({
|
|
158
|
+
url: row.url,
|
|
159
|
+
count: parseInt(row.fetch_count, 10),
|
|
160
|
+
}));
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
log.warn('Warm cycle: DB query failed, using fallback', { error: err?.message });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (urls.length === 0) {
|
|
167
|
+
urls = FALLBACK_URLS.map((u) => ({ url: u, count: 0 }));
|
|
168
|
+
}
|
|
169
|
+
// Step 2: Warm each URL with concurrency 5
|
|
170
|
+
const concurrency = 5;
|
|
171
|
+
let warmed = 0;
|
|
172
|
+
let failed = 0;
|
|
173
|
+
const newWarmedSet = new Set();
|
|
174
|
+
for (let i = 0; i < urls.length; i += concurrency) {
|
|
175
|
+
const batch = urls.slice(i, i + concurrency);
|
|
176
|
+
const results = await Promise.allSettled(batch.map(({ url }) => fetch(`${base}/r/${encodeURIComponent(url)}`, {
|
|
177
|
+
headers: { 'User-Agent': 'WebPeel-CacheWarmer/1.0' },
|
|
178
|
+
signal: AbortSignal.timeout(15_000),
|
|
179
|
+
}).then((r) => {
|
|
180
|
+
if (r.ok) {
|
|
181
|
+
newWarmedSet.add(url);
|
|
182
|
+
warmed++;
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
failed++;
|
|
186
|
+
}
|
|
187
|
+
})));
|
|
188
|
+
// Count settled rejections as failures
|
|
189
|
+
results.forEach((r) => {
|
|
190
|
+
if (r.status === 'rejected') {
|
|
191
|
+
failed++;
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
// Step 3: Update state
|
|
196
|
+
warmerState.warmedUrls = newWarmedSet;
|
|
197
|
+
warmerState.lastWarmTime = new Date();
|
|
198
|
+
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
|
199
|
+
log.info(`Warmed ${warmed}/${urls.length} URLs in ${elapsed}s`, { failed });
|
|
200
|
+
};
|
|
201
|
+
// Run once immediately, then on interval
|
|
202
|
+
void runWarm().catch((err) => {
|
|
203
|
+
log.error('Cache warm cycle error', { error: err?.message });
|
|
204
|
+
});
|
|
205
|
+
setInterval(() => {
|
|
206
|
+
void runWarm().catch((err) => {
|
|
207
|
+
log.error('Cache warm cycle error', { error: err?.message });
|
|
208
|
+
});
|
|
209
|
+
}, intervalMs);
|
|
210
|
+
}
|
|
@@ -229,7 +229,10 @@ export function createFetchRouter(authStore) {
|
|
|
229
229
|
const cacheAge = Date.now() - cached.timestamp;
|
|
230
230
|
if (cacheAge < maxAgeMs && cacheAge < cacheTtlMs) {
|
|
231
231
|
res.setHeader('X-Cache', 'HIT');
|
|
232
|
+
res.setHeader('X-Cache-Status', 'HIT');
|
|
232
233
|
res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
|
|
234
|
+
// Cache-Control: allow Cloudflare edge to cache successful GET responses
|
|
235
|
+
res.setHeader('Cache-Control', 'public, s-maxage=60, stale-while-revalidate=300');
|
|
233
236
|
if (wantsEnvelope(req)) {
|
|
234
237
|
successResponse(res, cached.result, {
|
|
235
238
|
requestId: req.requestId,
|
|
@@ -467,9 +470,19 @@ export function createFetchRouter(authStore) {
|
|
|
467
470
|
: undefined;
|
|
468
471
|
// Add usage headers (kept for backward compat; also surfaced in envelope metadata)
|
|
469
472
|
res.setHeader('X-Cache', 'MISS');
|
|
473
|
+
res.setHeader('X-Cache-Status', 'MISS');
|
|
470
474
|
res.setHeader('X-Credits-Used', '1');
|
|
471
475
|
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
472
476
|
res.setHeader('X-Fetch-Type', fetchType);
|
|
477
|
+
// Cache-Control: allow Cloudflare edge to cache successful GET responses for 60s
|
|
478
|
+
res.setHeader('Cache-Control', 'public, s-maxage=60, stale-while-revalidate=300');
|
|
479
|
+
// Response timing headers — let customers see exactly where time is spent
|
|
480
|
+
const timingFetch = result.timing?.fetch ?? 0;
|
|
481
|
+
const timingParse = (result.timing?.convert ?? 0) + (result.timing?.metadata ?? 0) + (result.timing?.prune ?? 0);
|
|
482
|
+
res.setHeader('X-Response-Time', `${elapsed}ms`);
|
|
483
|
+
res.setHeader('X-Fetch-Time', `${timingFetch}ms`);
|
|
484
|
+
res.setHeader('X-Parse-Time', `${timingParse}ms`);
|
|
485
|
+
res.setHeader('Server-Timing', `fetch;dur=${timingFetch}, parse;dur=${timingParse}, total;dur=${elapsed}`);
|
|
473
486
|
// Build response — extend result with optional answer/summary fields
|
|
474
487
|
const getResponseBody = { ...result };
|
|
475
488
|
if (getAnswerResult !== undefined)
|
|
@@ -676,6 +689,7 @@ export function createFetchRouter(authStore) {
|
|
|
676
689
|
const cacheAge = Date.now() - cached.timestamp;
|
|
677
690
|
if (cacheAge < postCacheTtlMs) {
|
|
678
691
|
res.setHeader('X-Cache', 'HIT');
|
|
692
|
+
res.setHeader('X-Cache-Status', 'HIT');
|
|
679
693
|
res.setHeader('X-Cache-Age', Math.floor(cacheAge / 1000).toString());
|
|
680
694
|
if (wantsEnvelope(req)) {
|
|
681
695
|
successResponse(res, cached.result, {
|
|
@@ -932,9 +946,17 @@ export function createFetchRouter(authStore) {
|
|
|
932
946
|
// --- Build response ------------------------------------------------------
|
|
933
947
|
// Headers kept for backward compat; also surfaced in envelope metadata.
|
|
934
948
|
res.setHeader('X-Cache', 'MISS');
|
|
949
|
+
res.setHeader('X-Cache-Status', 'MISS');
|
|
935
950
|
res.setHeader('X-Credits-Used', '1');
|
|
936
951
|
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
937
952
|
res.setHeader('X-Fetch-Type', fetchType);
|
|
953
|
+
// Response timing headers — let customers see exactly where time is spent
|
|
954
|
+
const postTimingFetch = result.timing?.fetch ?? 0;
|
|
955
|
+
const postTimingParse = (result.timing?.convert ?? 0) + (result.timing?.metadata ?? 0) + (result.timing?.prune ?? 0);
|
|
956
|
+
res.setHeader('X-Response-Time', `${elapsed}ms`);
|
|
957
|
+
res.setHeader('X-Fetch-Time', `${postTimingFetch}ms`);
|
|
958
|
+
res.setHeader('X-Parse-Time', `${postTimingParse}ms`);
|
|
959
|
+
res.setHeader('Server-Timing', `fetch;dur=${postTimingFetch}, parse;dur=${postTimingParse}, total;dur=${elapsed}`);
|
|
938
960
|
const responseBody = { ...result };
|
|
939
961
|
if (jsonData !== undefined) {
|
|
940
962
|
responseBody.json = jsonData;
|
|
@@ -96,6 +96,11 @@ export function createReaderRouter() {
|
|
|
96
96
|
selector: targetSelector,
|
|
97
97
|
waitSelector: waitForSelector,
|
|
98
98
|
});
|
|
99
|
+
// Cache-Control: this endpoint is public and heavily cacheable.
|
|
100
|
+
// Cloudflare edge caches for 2 min; serves stale for up to 10 min while revalidating.
|
|
101
|
+
res.setHeader('Cache-Control', 'public, s-maxage=120, stale-while-revalidate=600');
|
|
102
|
+
// Vary on Accept so different content-type representations are cached separately.
|
|
103
|
+
res.setHeader('Vary', 'Accept');
|
|
99
104
|
// Return based on format
|
|
100
105
|
const responseFormat = format.toLowerCase();
|
|
101
106
|
if (responseFormat === 'text') {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.2",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|