webpeel 0.21.61 → 0.21.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -7
- package/dist/cli/utils.js +1 -1
- package/dist/server/routes/fetch-queue.js +40 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
<a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
9
9
|
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
|
|
10
10
|
<a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg?style=flat-square" alt="PyPI version"></a>
|
|
11
|
-
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square" alt="License: MIT"></a>
|
|
12
11
|
<a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
|
|
13
12
|
<a href="https://webpeel.dev/status"><img src="https://img.shields.io/badge/status-operational-brightgreen.svg?style=flat-square" alt="Status"></a>
|
|
14
13
|
</p>
|
|
@@ -305,20 +304,20 @@ webpeel "https://news.ycombinator.com"
|
|
|
305
304
|
# Search the web
|
|
306
305
|
webpeel search "typescript orm comparison 2025"
|
|
307
306
|
|
|
308
|
-
# Extract structured data
|
|
309
|
-
webpeel
|
|
307
|
+
# Extract structured data with a JSON schema
|
|
308
|
+
webpeel "https://stripe.com/pricing" --extract-schema pricing-schema.json
|
|
310
309
|
|
|
311
|
-
# Crawl a site
|
|
312
|
-
webpeel crawl "https://docs.example.com" --
|
|
310
|
+
# Crawl a site
|
|
311
|
+
webpeel crawl "https://docs.example.com" --max-pages 100
|
|
313
312
|
|
|
314
313
|
# Screenshot
|
|
315
314
|
webpeel screenshot "https://webpeel.dev" --full-page --output screenshot.png
|
|
316
315
|
|
|
317
316
|
# YouTube transcript
|
|
318
|
-
webpeel
|
|
317
|
+
webpeel "https://youtube.com/watch?v=dQw4w9WgXcQ" --json
|
|
319
318
|
|
|
320
319
|
# Ask a question about a page
|
|
321
|
-
webpeel
|
|
320
|
+
webpeel ask "https://openai.com/pricing" "How much does GPT-4o cost per million tokens?"
|
|
322
321
|
|
|
323
322
|
# Output as JSON
|
|
324
323
|
webpeel "https://example.com" --json
|
package/dist/cli/utils.js
CHANGED
|
@@ -398,7 +398,7 @@ export function buildCondensedHelp() {
|
|
|
398
398
|
` --raw Full page (disable auto reader mode)`,
|
|
399
399
|
` --full Full page, no budget limit`,
|
|
400
400
|
` --json JSON output with metadata`,
|
|
401
|
-
` --budget: 4000)`,
|
|
401
|
+
` --budget <n> Token budget (default: 4000 in pipe mode)`,
|
|
402
402
|
` -q, --question <q> Ask about the content`,
|
|
403
403
|
` -s, --silent No spinner output`,
|
|
404
404
|
'',
|
|
@@ -176,6 +176,33 @@ export function createQueueFetchRouter() {
|
|
|
176
176
|
pollUrl: `/v1/jobs/${jobId}`,
|
|
177
177
|
});
|
|
178
178
|
}
|
|
179
|
+
// GET /v1/fetch?url=... — CLI and backward-compatible GET requests
|
|
180
|
+
// Maps query params into req.body so handleEnqueue works uniformly
|
|
181
|
+
router.get('/v1/fetch', (req, res) => {
|
|
182
|
+
// Map query string to body for uniform handling
|
|
183
|
+
req.body = {
|
|
184
|
+
url: req.query.url,
|
|
185
|
+
format: req.query.format || 'markdown',
|
|
186
|
+
render: req.query.render === 'true',
|
|
187
|
+
stealth: req.query.stealth === 'true',
|
|
188
|
+
wait: req.query.wait ? Number(req.query.wait) : undefined,
|
|
189
|
+
selector: req.query.selector,
|
|
190
|
+
readable: req.query.readable === 'true',
|
|
191
|
+
budget: req.query.budget ? Number(req.query.budget) : undefined,
|
|
192
|
+
question: req.query.question,
|
|
193
|
+
screenshot: req.query.screenshot === 'true',
|
|
194
|
+
fullPage: req.query.fullPage === 'true' || req.query['full-page'] === 'true',
|
|
195
|
+
maxTokens: req.query.maxTokens ? Number(req.query.maxTokens) : undefined,
|
|
196
|
+
lite: req.query.lite === 'true',
|
|
197
|
+
raw: req.query.raw === 'true',
|
|
198
|
+
images: req.query.images === 'true',
|
|
199
|
+
};
|
|
200
|
+
void handleEnqueue(req, res, false);
|
|
201
|
+
});
|
|
202
|
+
router.get('/v1/render', (req, res) => {
|
|
203
|
+
req.body = { url: req.query.url, format: req.query.format || 'markdown' };
|
|
204
|
+
void handleEnqueue(req, res, true);
|
|
205
|
+
});
|
|
179
206
|
router.post('/v1/fetch', (req, res) => void handleEnqueue(req, res, false));
|
|
180
207
|
router.post('/v1/render', (req, res) => void handleEnqueue(req, res, true));
|
|
181
208
|
/**
|
|
@@ -186,6 +213,19 @@ export function createQueueFetchRouter() {
|
|
|
186
213
|
router.get('/v1/jobs/:id', async (req, res) => {
|
|
187
214
|
const { id } = req.params;
|
|
188
215
|
const requestId = req.requestId || randomUUID();
|
|
216
|
+
// Auth required — prevent IDOR (unauthenticated access to job results)
|
|
217
|
+
if (!req.auth?.keyInfo) {
|
|
218
|
+
res.status(401).json({
|
|
219
|
+
success: false,
|
|
220
|
+
error: {
|
|
221
|
+
type: 'unauthorized',
|
|
222
|
+
message: 'API key required to poll job results.',
|
|
223
|
+
docs: 'https://webpeel.dev/docs/errors#unauthorized',
|
|
224
|
+
},
|
|
225
|
+
requestId,
|
|
226
|
+
});
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
189
229
|
if (!id || typeof id !== 'string') {
|
|
190
230
|
res.status(400).json({
|
|
191
231
|
success: false,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "webpeel",
|
|
3
|
-
"version": "0.21.
|
|
3
|
+
"version": "0.21.63",
|
|
4
4
|
"description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
|
|
5
5
|
"author": "Jake Liu",
|
|
6
6
|
"license": "AGPL-3.0-only",
|