@upcrawl/sdk 1.3.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +214 -65
- package/dist/index.d.mts +355 -225
- package/dist/index.d.ts +355 -225
- package/dist/index.js +311 -0
- package/dist/index.mjs +290 -0
- package/package.json +9 -4
package/README.md
CHANGED
|
@@ -314,71 +314,220 @@ const result: ScrapeResponse = await Upcrawl.scrape(options);
|
|
|
314
314
|
| `Upcrawl.generatePdfFromUrl(options)` | Generate PDF from a URL |
|
|
315
315
|
| `Upcrawl.executeCode(options)` | Execute code in an isolated sandbox |
|
|
316
316
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
|
323
|
-
|
|
324
|
-
| `
|
|
325
|
-
| `
|
|
326
|
-
| `
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
|
342
|
-
|
|
343
|
-
| `
|
|
344
|
-
| `
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
|
349
|
-
|
|
350
|
-
| `
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
|
360
|
-
|
|
361
|
-
| `
|
|
362
|
-
| `
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
|
367
|
-
|
|
368
|
-
| `
|
|
369
|
-
| `
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
|
377
|
-
|
|
378
|
-
| `
|
|
379
|
-
| `
|
|
380
|
-
| `
|
|
381
|
-
| `
|
|
317
|
+
<!-- TYPES-START -->
|
|
318
|
+
<!-- Auto-generated by `pnpm generate:docs` — do not edit manually -->
|
|
319
|
+
|
|
320
|
+
### UpcrawlConfig
|
|
321
|
+
|
|
322
|
+
| Field | Type | Required | Description |
|
|
323
|
+
|-------|------|----------|-------------|
|
|
324
|
+
| `apiKey` | string | No | Your Upcrawl API key |
|
|
325
|
+
| `baseUrl` | string | No | Custom API base URL |
|
|
326
|
+
| `timeout` | number | No | Request timeout in milliseconds |
|
|
327
|
+
|
|
328
|
+
### SummaryQuery
|
|
329
|
+
|
|
330
|
+
| Field | Type | Required | Description |
|
|
331
|
+
|-------|------|----------|-------------|
|
|
332
|
+
| `query` | string | Yes | Query/instruction for content summarization |
|
|
333
|
+
|
|
334
|
+
### ScrapeOptions
|
|
335
|
+
|
|
336
|
+
| Field | Type | Required | Description |
|
|
337
|
+
|-------|------|----------|-------------|
|
|
338
|
+
| `url` | string | Yes | URL to scrape (required) |
|
|
339
|
+
| `type` | `"html"` \| `"markdown"` | No | Output format: html or markdown. Defaults to "html" |
|
|
340
|
+
| `onlyMainContent` | boolean | No | Extract only main content (removes nav, ads, footers). Defaults to true |
|
|
341
|
+
| `extractMetadata` | boolean | No | Whether to extract page metadata |
|
|
342
|
+
| `summary` | object | No | Summary query for LLM summarization |
|
|
343
|
+
| `timeoutMs` | number | No | Custom timeout in milliseconds (1000-120000) |
|
|
344
|
+
| `waitUntil` | `"load"` \| `"domcontentloaded"` \| `"networkidle"` | No | Wait strategy for page load |
|
|
345
|
+
|
|
346
|
+
### ScrapeMetadata
|
|
347
|
+
|
|
348
|
+
| Field | Type | Required | Description |
|
|
349
|
+
|-------|------|----------|-------------|
|
|
350
|
+
| `title` | string | No | |
|
|
351
|
+
| `description` | string | No | |
|
|
352
|
+
| `canonicalUrl` | string | No | |
|
|
353
|
+
| `finalUrl` | string | No | |
|
|
354
|
+
| `contentType` | string | No | |
|
|
355
|
+
| `contentLength` | number | No | |
|
|
356
|
+
|
|
357
|
+
### ScrapeResponse
|
|
358
|
+
|
|
359
|
+
| Field | Type | Required | Description |
|
|
360
|
+
|-------|------|----------|-------------|
|
|
361
|
+
| `url` | string | Yes | Original URL that was scraped |
|
|
362
|
+
| `html` | string \| null | No | Rendered HTML content (when type is html) |
|
|
363
|
+
| `markdown` | string \| null | No | Content converted to Markdown (when type is markdown) |
|
|
364
|
+
| `statusCode` | number \| null | Yes | HTTP status code |
|
|
365
|
+
| `success` | boolean | Yes | Whether scraping was successful |
|
|
366
|
+
| `error` | string | No | Error message if scraping failed |
|
|
367
|
+
| `timestamp` | string | Yes | ISO timestamp when scraping completed |
|
|
368
|
+
| `loadTimeMs` | number | Yes | Time taken to load and render the page in milliseconds |
|
|
369
|
+
| `metadata` | object | No | Additional page metadata |
|
|
370
|
+
| `retryCount` | number | Yes | Number of retry attempts made |
|
|
371
|
+
| `cost` | number | No | Cost in USD for this scrape operation |
|
|
372
|
+
| `content` | string \| null | No | Content after summarization (when summary query provided) |
|
|
373
|
+
|
|
374
|
+
### BatchScrapeOptions
|
|
375
|
+
|
|
376
|
+
| Field | Type | Required | Description |
|
|
377
|
+
|-------|------|----------|-------------|
|
|
378
|
+
| `urls` | string \| object[] | Yes | Array of URLs to scrape (strings or detailed request objects) |
|
|
379
|
+
| `type` | `"html"` \| `"markdown"` | No | Output format: html or markdown |
|
|
380
|
+
| `onlyMainContent` | boolean | No | Extract only main content (removes nav, ads, footers) |
|
|
381
|
+
| `summary` | object | No | Summary query for LLM summarization |
|
|
382
|
+
| `batchTimeoutMs` | number | No | Global timeout for entire batch operation in milliseconds (10000-600000) |
|
|
383
|
+
| `failFast` | boolean | No | Whether to stop on first error |
|
|
384
|
+
|
|
385
|
+
### BatchScrapeResponse
|
|
386
|
+
|
|
387
|
+
| Field | Type | Required | Description |
|
|
388
|
+
|-------|------|----------|-------------|
|
|
389
|
+
| `results` | object[] | Yes | Array of scrape results |
|
|
390
|
+
| `total` | number | Yes | Total number of URLs processed |
|
|
391
|
+
| `successful` | number | Yes | Number of successful scrapes |
|
|
392
|
+
| `failed` | number | Yes | Number of failed scrapes |
|
|
393
|
+
| `totalTimeMs` | number | Yes | Total time taken for batch operation in milliseconds |
|
|
394
|
+
| `timestamp` | string | Yes | Timestamp when batch operation completed |
|
|
395
|
+
| `cost` | number | No | Total cost in USD for all scrape operations |
|
|
396
|
+
|
|
397
|
+
### SearchOptions
|
|
398
|
+
|
|
399
|
+
| Field | Type | Required | Description |
|
|
400
|
+
|-------|------|----------|-------------|
|
|
401
|
+
| `queries` | string[] | Yes | Array of search queries to execute (1-20) |
|
|
402
|
+
| `limit` | number | No | Number of results per query (1-100). Defaults to 10 |
|
|
403
|
+
| `location` | string | No | Location for search (e.g., "IN", "US") |
|
|
404
|
+
| `includeDomains` | string[] | No | Domains to include (will add site: to query) |
|
|
405
|
+
| `excludeDomains` | string[] | No | Domains to exclude (will add -site: to query) |
|
|
406
|
+
|
|
407
|
+
### SearchResultWeb
|
|
408
|
+
|
|
409
|
+
| Field | Type | Required | Description |
|
|
410
|
+
|-------|------|----------|-------------|
|
|
411
|
+
| `url` | string | Yes | URL of the search result |
|
|
412
|
+
| `title` | string | Yes | Title of the search result |
|
|
413
|
+
| `description` | string | Yes | Description/snippet of the search result |
|
|
414
|
+
|
|
415
|
+
### SearchResultItem
|
|
416
|
+
|
|
417
|
+
| Field | Type | Required | Description |
|
|
418
|
+
|-------|------|----------|-------------|
|
|
419
|
+
| `query` | string | Yes | The search query |
|
|
420
|
+
| `success` | boolean | Yes | Whether the search was successful |
|
|
421
|
+
| `results` | object[] | Yes | Parsed search result links |
|
|
422
|
+
| `error` | string | No | Error message if failed |
|
|
423
|
+
| `loadTimeMs` | number | No | Time taken in milliseconds |
|
|
424
|
+
| `cost` | number | No | Cost in USD for this query |
|
|
425
|
+
|
|
426
|
+
### SearchResponse
|
|
427
|
+
|
|
428
|
+
| Field | Type | Required | Description |
|
|
429
|
+
|-------|------|----------|-------------|
|
|
430
|
+
| `results` | object[] | Yes | Array of search results per query |
|
|
431
|
+
| `total` | number | Yes | Total number of queries |
|
|
432
|
+
| `successful` | number | Yes | Number of successful searches |
|
|
433
|
+
| `failed` | number | Yes | Number of failed searches |
|
|
434
|
+
| `totalTimeMs` | number | Yes | Total time in milliseconds |
|
|
435
|
+
| `timestamp` | string | Yes | ISO timestamp |
|
|
436
|
+
| `cost` | number | No | Total cost in USD |
|
|
437
|
+
|
|
438
|
+
### PdfMargin
|
|
439
|
+
|
|
440
|
+
| Field | Type | Required | Description |
|
|
441
|
+
|-------|------|----------|-------------|
|
|
442
|
+
| `top` | string | No | |
|
|
443
|
+
| `right` | string | No | |
|
|
444
|
+
| `bottom` | string | No | |
|
|
445
|
+
| `left` | string | No | |
|
|
446
|
+
|
|
447
|
+
### GeneratePdfOptions
|
|
448
|
+
|
|
449
|
+
| Field | Type | Required | Description |
|
|
450
|
+
|-------|------|----------|-------------|
|
|
451
|
+
| `html` | string | Yes | Complete HTML content to convert to PDF (required) |
|
|
452
|
+
| `title` | string | No | Title used for the exported filename |
|
|
453
|
+
| `pageSize` | `"A4"` \| `"Letter"` \| `"Legal"` | No | Page size. Defaults to "A4" |
|
|
454
|
+
| `landscape` | boolean | No | Landscape orientation. Defaults to false |
|
|
455
|
+
| `margin` | object | No | Page margins (e.g., { top: "20mm", right: "20mm", bottom: "20mm", left: "20mm" }) |
|
|
456
|
+
| `printBackground` | boolean | No | Print background graphics and colors. Defaults to true |
|
|
457
|
+
| `skipChartWait` | boolean | No | Skip waiting for chart rendering signal. Defaults to false |
|
|
458
|
+
| `timeoutMs` | number | No | Timeout in milliseconds (5000-120000). Defaults to 30000 |
|
|
459
|
+
|
|
460
|
+
### GeneratePdfFromUrlOptions
|
|
461
|
+
|
|
462
|
+
| Field | Type | Required | Description |
|
|
463
|
+
|-------|------|----------|-------------|
|
|
464
|
+
| `url` | string | Yes | URL to navigate to and convert to PDF (required) |
|
|
465
|
+
| `title` | string | No | Title used for the exported filename |
|
|
466
|
+
| `pageSize` | `"A4"` \| `"Letter"` \| `"Legal"` | No | Page size. Defaults to "A4" |
|
|
467
|
+
| `landscape` | boolean | No | Landscape orientation. Defaults to false |
|
|
468
|
+
| `margin` | object | No | Page margins |
|
|
469
|
+
| `printBackground` | boolean | No | Print background graphics and colors. Defaults to true |
|
|
470
|
+
| `timeoutMs` | number | No | Timeout in milliseconds (5000-120000). Defaults to 30000 |
|
|
471
|
+
|
|
472
|
+
### PdfResponse
|
|
473
|
+
|
|
474
|
+
| Field | Type | Required | Description |
|
|
475
|
+
|-------|------|----------|-------------|
|
|
476
|
+
| `success` | boolean | Yes | Whether PDF generation succeeded |
|
|
477
|
+
| `url` | string | No | Public URL of the generated PDF |
|
|
478
|
+
| `filename` | string | No | Generated filename |
|
|
479
|
+
| `blobName` | string | No | Blob storage path |
|
|
480
|
+
| `error` | string | No | Error message on failure |
|
|
481
|
+
| `durationMs` | number | Yes | Total time taken in milliseconds |
|
|
482
|
+
|
|
483
|
+
### ExecuteCodeOptions
|
|
484
|
+
|
|
485
|
+
| Field | Type | Required | Description |
|
|
486
|
+
|-------|------|----------|-------------|
|
|
487
|
+
| `code` | string | Yes | Code to execute (required) |
|
|
488
|
+
| `language` | `"python"` | No | Language runtime. Defaults to "python" |
|
|
489
|
+
|
|
490
|
+
### ExecuteCodeResponse
|
|
491
|
+
|
|
492
|
+
| Field | Type | Required | Description |
|
|
493
|
+
|-------|------|----------|-------------|
|
|
494
|
+
| `stdout` | string | Yes | Standard output from the executed code |
|
|
495
|
+
| `stderr` | string | Yes | Standard error from the executed code |
|
|
496
|
+
| `exitCode` | number | Yes | Process exit code (0 = success, 124 = timeout) |
|
|
497
|
+
| `executionTimeMs` | number | Yes | Execution time in milliseconds |
|
|
498
|
+
| `timedOut` | boolean | Yes | Whether execution was killed due to timeout |
|
|
499
|
+
| `memoryUsageMb` | number | No | Peak memory usage in megabytes |
|
|
500
|
+
| `error` | string | No | Error message if execution infrastructure failed |
|
|
501
|
+
| `cost` | number | No | Cost in USD for this execution |
|
|
502
|
+
|
|
503
|
+
### UpcrawlErrorResponse
|
|
504
|
+
|
|
505
|
+
| Field | Type | Required | Description |
|
|
506
|
+
|-------|------|----------|-------------|
|
|
507
|
+
| `error` | object | Yes | |
|
|
508
|
+
| `statusCode` | number | No | |
|
|
509
|
+
|
|
510
|
+
### CreateBrowserSessionOptions
|
|
511
|
+
|
|
512
|
+
| Field | Type | Required | Description |
|
|
513
|
+
|-------|------|----------|-------------|
|
|
514
|
+
| `width` | number | No | Browser viewport width (800-3840). Defaults to 1280 |
|
|
515
|
+
| `height` | number | No | Browser viewport height (600-2160). Defaults to 720 |
|
|
516
|
+
| `headless` | boolean | No | Run browser in headless mode. Defaults to true |
|
|
517
|
+
|
|
518
|
+
### BrowserSession
|
|
519
|
+
|
|
520
|
+
| Field | Type | Required | Description |
|
|
521
|
+
|-------|------|----------|-------------|
|
|
522
|
+
| `sessionId` | string | Yes | Unique session identifier |
|
|
523
|
+
| `wsEndpoint` | string | Yes | WebSocket URL for connecting with Playwright/Puppeteer |
|
|
524
|
+
| `vncUrl` | string \| null | Yes | VNC URL for viewing the browser (if available) |
|
|
525
|
+
| `affinityCookie` | string | No | Affinity cookie for sticky session routing (format: SCRAPER_AFFINITY=xxx) - extracted from response headers |
|
|
526
|
+
| `createdAt` | Date | Yes | Session creation timestamp |
|
|
527
|
+
| `width` | number | Yes | Browser viewport width |
|
|
528
|
+
| `height` | number | Yes | Browser viewport height |
|
|
529
|
+
|
|
530
|
+
<!-- TYPES-END -->
|
|
382
531
|
|
|
383
532
|
## License
|
|
384
533
|
|