webpeel 0.21.61 → 0.21.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,6 @@
8
8
  <a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
9
9
  <a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
10
10
  <a href="https://pypi.org/project/webpeel/"><img src="https://img.shields.io/pypi/v/webpeel.svg?style=flat-square" alt="PyPI version"></a>
11
- <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square" alt="License: MIT"></a>
12
11
  <a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
13
12
  <a href="https://webpeel.dev/status"><img src="https://img.shields.io/badge/status-operational-brightgreen.svg?style=flat-square" alt="Status"></a>
14
13
  </p>
@@ -305,20 +304,20 @@ webpeel "https://news.ycombinator.com"
305
304
  # Search the web
306
305
  webpeel search "typescript orm comparison 2025"
307
306
 
308
- # Extract structured data
309
- webpeel extract "https://stripe.com/pricing" --schema pricing-schema.json
307
+ # Extract structured data with a JSON schema
308
+ webpeel "https://stripe.com/pricing" --extract-schema pricing-schema.json
310
309
 
311
- # Crawl a site, save to folder
312
- webpeel crawl "https://docs.example.com" --output ./docs-dump --max-pages 100
310
+ # Crawl a site
311
+ webpeel crawl "https://docs.example.com" --max-pages 100
313
312
 
314
313
  # Screenshot
315
314
  webpeel screenshot "https://webpeel.dev" --full-page --output screenshot.png
316
315
 
317
316
  # YouTube transcript
318
- webpeel youtube "https://youtube.com/watch?v=dQw4w9WgXcQ"
317
+ webpeel "https://youtube.com/watch?v=dQw4w9WgXcQ" --json
319
318
 
320
319
  # Ask a question about a page
321
- webpeel qa "https://openai.com/pricing" --question "How much does GPT-4o cost per million tokens?"
320
+ webpeel ask "https://openai.com/pricing" "How much does GPT-4o cost per million tokens?"
322
321
 
323
322
  # Output as JSON
324
323
  webpeel "https://example.com" --json
package/dist/cli/utils.js CHANGED
@@ -398,7 +398,7 @@ export function buildCondensedHelp() {
398
398
  ` --raw Full page (disable auto reader mode)`,
399
399
  ` --full Full page, no budget limit`,
400
400
  ` --json JSON output with metadata`,
401
- ` --budget: 4000)`,
401
+ ` --budget <n> Token budget (default: 4000 in pipe mode)`,
402
402
  ` -q, --question <q> Ask about the content`,
403
403
  ` -s, --silent No spinner output`,
404
404
  '',
@@ -176,6 +176,33 @@ export function createQueueFetchRouter() {
176
176
  pollUrl: `/v1/jobs/${jobId}`,
177
177
  });
178
178
  }
179
+ // GET /v1/fetch?url=... — CLI and backward-compatible GET requests
180
+ // Maps query params into req.body so handleEnqueue works uniformly
181
+ router.get('/v1/fetch', (req, res) => {
182
+ // Map query string to body for uniform handling
183
+ req.body = {
184
+ url: req.query.url,
185
+ format: req.query.format || 'markdown',
186
+ render: req.query.render === 'true',
187
+ stealth: req.query.stealth === 'true',
188
+ wait: req.query.wait ? Number(req.query.wait) : undefined,
189
+ selector: req.query.selector,
190
+ readable: req.query.readable === 'true',
191
+ budget: req.query.budget ? Number(req.query.budget) : undefined,
192
+ question: req.query.question,
193
+ screenshot: req.query.screenshot === 'true',
194
+ fullPage: req.query.fullPage === 'true' || req.query['full-page'] === 'true',
195
+ maxTokens: req.query.maxTokens ? Number(req.query.maxTokens) : undefined,
196
+ lite: req.query.lite === 'true',
197
+ raw: req.query.raw === 'true',
198
+ images: req.query.images === 'true',
199
+ };
200
+ void handleEnqueue(req, res, false);
201
+ });
202
+ router.get('/v1/render', (req, res) => {
203
+ req.body = { url: req.query.url, format: req.query.format || 'markdown' };
204
+ void handleEnqueue(req, res, true);
205
+ });
179
206
  router.post('/v1/fetch', (req, res) => void handleEnqueue(req, res, false));
180
207
  router.post('/v1/render', (req, res) => void handleEnqueue(req, res, true));
181
208
  /**
@@ -186,6 +213,19 @@ export function createQueueFetchRouter() {
186
213
  router.get('/v1/jobs/:id', async (req, res) => {
187
214
  const { id } = req.params;
188
215
  const requestId = req.requestId || randomUUID();
216
+ // Auth required — prevent IDOR (unauthenticated access to job results)
217
+ if (!req.auth?.keyInfo) {
218
+ res.status(401).json({
219
+ success: false,
220
+ error: {
221
+ type: 'unauthorized',
222
+ message: 'API key required to poll job results.',
223
+ docs: 'https://webpeel.dev/docs/errors#unauthorized',
224
+ },
225
+ requestId,
226
+ });
227
+ return;
228
+ }
189
229
  if (!id || typeof id !== 'string') {
190
230
  res.status(400).json({
191
231
  success: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "webpeel",
3
- "version": "0.21.61",
3
+ "version": "0.21.63",
4
4
  "description": "Fast web fetcher for AI agents - stealth mode, crawl mode, page actions, structured extraction, PDF parsing, smart escalation from simple HTTP to headless browser",
5
5
  "author": "Jake Liu",
6
6
  "license": "AGPL-3.0-only",