webpeel 0.21.63 → 0.21.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/core/http-fetch.js
CHANGED
|
@@ -559,10 +559,14 @@ export async function simpleFetch(url, userAgent, timeoutMs = 30000, customHeade
|
|
|
559
559
|
try {
|
|
560
560
|
const requestHeaders = { ...mergedHeaders };
|
|
561
561
|
const validators = getConditionalValidators(currentUrl);
|
|
562
|
-
|
|
562
|
+
// Only send conditional headers if we actually have the cached body
|
|
563
|
+
// In server/worker mode, the in-memory cache may have been cleared (pod restart)
|
|
564
|
+
// and sending If-None-Match without a cached body would cause a 304 crash
|
|
565
|
+
const cachedBody = getCachedResultFor304(currentUrl, url);
|
|
566
|
+
if (validators?.etag && cachedBody && !hasHeader(requestHeaders, 'if-none-match')) {
|
|
563
567
|
requestHeaders['If-None-Match'] = validators.etag;
|
|
564
568
|
}
|
|
565
|
-
if (validators?.lastModified && !hasHeader(requestHeaders, 'if-modified-since')) {
|
|
569
|
+
if (validators?.lastModified && cachedBody && !hasHeader(requestHeaders, 'if-modified-since')) {
|
|
566
570
|
requestHeaders['If-Modified-Since'] = validators.lastModified;
|
|
567
571
|
}
|
|
568
572
|
// Use proxy if provided or auto-selected, otherwise use shared connection pool
|
|
@@ -176,6 +176,81 @@ export function createQueueFetchRouter() {
|
|
|
176
176
|
pollUrl: `/v1/jobs/${jobId}`,
|
|
177
177
|
});
|
|
178
178
|
}
|
|
179
|
+
/**
|
|
180
|
+
* GET/POST /v1/fetch/sync — Synchronous fetch, no queue
|
|
181
|
+
* Returns content inline (no jobId/polling). Much faster for simple pages.
|
|
182
|
+
* Timeout: 25s max. No fallback to queue — fails fast if timeout exceeded.
|
|
183
|
+
*/
|
|
184
|
+
async function handleSyncFetch(req, res) {
|
|
185
|
+
const requestId = req.requestId || randomUUID();
|
|
186
|
+
const url = validateUrl(req.body?.url || req.query?.url, res, requestId);
|
|
187
|
+
if (!url)
|
|
188
|
+
return;
|
|
189
|
+
const userId = req.auth?.keyInfo?.accountId || req.user?.userId;
|
|
190
|
+
if (!userId) {
|
|
191
|
+
res.status(401).json({
|
|
192
|
+
success: false,
|
|
193
|
+
error: { type: 'unauthorized', message: 'API key required.' },
|
|
194
|
+
requestId,
|
|
195
|
+
});
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
try {
|
|
199
|
+
// Import peel dynamically to avoid circular deps
|
|
200
|
+
const { peel } = await import('../../index.js');
|
|
201
|
+
const options = {
|
|
202
|
+
format: req.body?.format || req.query?.format || 'markdown',
|
|
203
|
+
render: req.body?.render === true || req.query?.render === 'true',
|
|
204
|
+
stealth: req.body?.stealth === true || req.query?.stealth === 'true',
|
|
205
|
+
budget: req.body?.budget ? Number(req.body.budget) : (req.query?.budget ? Number(req.query.budget) : undefined),
|
|
206
|
+
selector: req.body?.selector || req.query?.selector,
|
|
207
|
+
readable: req.body?.readable === true || req.query?.readable === 'true',
|
|
208
|
+
wait: req.body?.wait ? Number(req.body.wait) : (req.query?.wait ? Number(req.query.wait) : undefined),
|
|
209
|
+
question: req.body?.question || req.query?.question,
|
|
210
|
+
timeout: 25000, // 25s max (leave 5s buffer for response)
|
|
211
|
+
};
|
|
212
|
+
const result = await peel(url, options);
|
|
213
|
+
res.json({
|
|
214
|
+
success: true,
|
|
215
|
+
...result,
|
|
216
|
+
requestId,
|
|
217
|
+
mode: 'sync',
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
const statusCode = err.statusCode || 500;
|
|
222
|
+
res.status(statusCode >= 400 && statusCode < 600 ? statusCode : 500).json({
|
|
223
|
+
success: false,
|
|
224
|
+
error: {
|
|
225
|
+
type: err.errorType || 'fetch_error',
|
|
226
|
+
message: err.message || 'Fetch failed',
|
|
227
|
+
},
|
|
228
|
+
requestId,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
router.get('/v1/fetch/sync', (req, res) => {
|
|
233
|
+
// Map query params to body
|
|
234
|
+
req.body = req.body || {};
|
|
235
|
+
if (req.query.url)
|
|
236
|
+
req.body.url = req.query.url;
|
|
237
|
+
if (req.query.format)
|
|
238
|
+
req.body.format = req.query.format;
|
|
239
|
+
if (req.query.render)
|
|
240
|
+
req.body.render = req.query.render === 'true';
|
|
241
|
+
if (req.query.stealth)
|
|
242
|
+
req.body.stealth = req.query.stealth === 'true';
|
|
243
|
+
if (req.query.budget)
|
|
244
|
+
req.body.budget = Number(req.query.budget);
|
|
245
|
+
if (req.query.selector)
|
|
246
|
+
req.body.selector = req.query.selector;
|
|
247
|
+
if (req.query.readable)
|
|
248
|
+
req.body.readable = req.query.readable === 'true';
|
|
249
|
+
if (req.query.question)
|
|
250
|
+
req.body.question = req.query.question;
|
|
251
|
+
void handleSyncFetch(req, res);
|
|
252
|
+
});
|
|
253
|
+
router.post('/v1/fetch/sync', (req, res) => void handleSyncFetch(req, res));
|
|
179
254
|
// GET /v1/fetch?url=... — CLI and backward-compatible GET requests
|
|
180
255
|
// Maps query params into req.body so handleEnqueue works uniformly
|
|
181
256
|
router.get('/v1/fetch', (req, res) => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.64",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|
|
@@ -59,6 +59,7 @@
|
|
|
59
59
|
"prepublishOnly": "bash scripts/pre-publish.sh",
|
|
60
60
|
"serve": "node dist/server/app.js",
|
|
61
61
|
"mcp": "node dist/mcp/server.js",
|
|
62
|
+
"preversion": "npm run build && npm test && bash scripts/pre-publish-gate.sh",
|
|
62
63
|
"version": "bash scripts/postversion.sh"
|
|
63
64
|
},
|
|
64
65
|
"repository": {
|