@dealcrawl/sdk 2.7.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +370 -83
- package/dist/index.d.mts +131 -6
- package/dist/index.d.ts +131 -6
- package/dist/index.js +21 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +21 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,6 +6,15 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
|
|
|
6
6
|
[](https://www.typescriptlang.org/)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
8
|
|
|
9
|
+
## What's New in January 2026 🎉
|
|
10
|
+
|
|
11
|
+
- **📸 Screenshot Storage (Phase 4)** - Automatic screenshot capture and storage via Supabase with public URLs
|
|
12
|
+
- **🎯 Priority Crawl System (Phase 5)** - 3-tier queue system (high/medium/low) based on SmartFrontier deal scores for optimized resource allocation
|
|
13
|
+
- **🤖 AI Deal Extraction** - LLM-powered deal extraction with customizable score thresholds and automatic database storage
|
|
14
|
+
- **💾 Enhanced Data Persistence** - New `crawled_pages` and `crawled_deals` tables for comprehensive deal tracking
|
|
15
|
+
- **📝 Markdown Output** - Convert scraped content to clean Markdown with GFM support
|
|
16
|
+
- **🎬 Browser Actions** - Execute preset actions (click, scroll, write, etc.) before scraping for dynamic content
|
|
17
|
+
|
|
9
18
|
## Features
|
|
10
19
|
|
|
11
20
|
- 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
|
|
@@ -40,15 +49,172 @@ const client = new DealCrawl({
|
|
|
40
49
|
apiKey: process.env.DEALCRAWL_API_KEY!,
|
|
41
50
|
});
|
|
42
51
|
|
|
43
|
-
// Scrape a single page with deal extraction
|
|
52
|
+
// Scrape a single page with deal extraction and screenshot
|
|
44
53
|
const job = await client.scrape.create({
|
|
45
54
|
url: "https://shop.example.com/product",
|
|
46
55
|
extractDeal: true,
|
|
56
|
+
screenshot: { enabled: true },
|
|
57
|
+
outputMarkdown: true, // NEW: Get clean markdown output
|
|
47
58
|
});
|
|
48
59
|
|
|
49
60
|
// Wait for result with automatic polling
|
|
50
61
|
const result = await client.waitForResult(job.jobId);
|
|
51
|
-
console.log(result);
|
|
62
|
+
console.log(result.data.parsed.markdown); // Markdown content
|
|
63
|
+
console.log(result.data.screenshot); // Public screenshot URL
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## January 2026 Features in Detail
|
|
67
|
+
|
|
68
|
+
### 📸 Screenshot Storage (SEC-011)
|
|
69
|
+
|
|
70
|
+
**Private by default** with configurable signed URL expiration:
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
// Basic screenshot (private with tier-specific TTL)
|
|
74
|
+
const job = await client.scrape.create({
|
|
75
|
+
url: "https://example.com",
|
|
76
|
+
screenshot: {
|
|
77
|
+
enabled: true,
|
|
78
|
+
fullPage: true,
|
|
79
|
+
format: "webp",
|
|
80
|
+
quality: 85,
|
|
81
|
+
signedUrlTtl: 604800, // 7 days (default for Pro/Enterprise)
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const result = await client.waitForResult(job.jobId);
|
|
86
|
+
console.log(result.data.screenshotMetadata);
|
|
87
|
+
// {
|
|
88
|
+
// url: "https://...supabase.co/storage/v1/object/sign/screenshots-private/...",
|
|
89
|
+
// isPublic: false,
|
|
90
|
+
// expiresAt: "2026-01-25T12:00:00Z",
|
|
91
|
+
// width: 1280,
|
|
92
|
+
// height: 720,
|
|
93
|
+
// format: "webp",
|
|
94
|
+
// sizeBytes: 125000
|
|
95
|
+
// }
|
|
96
|
+
|
|
97
|
+
// Refresh signed URL before expiration
|
|
98
|
+
const refreshed = await client.screenshots.refresh({
|
|
99
|
+
path: "job_abc123/1234567890_nanoid_example.png",
|
|
100
|
+
ttl: 604800 // Extend for another 7 days
|
|
101
|
+
});
|
|
102
|
+
console.log(refreshed.url); // New signed URL
|
|
103
|
+
console.log(refreshed.expiresAt); // "2026-02-01T12:00:00Z"
|
|
104
|
+
|
|
105
|
+
// Get tier-specific TTL limits
|
|
106
|
+
const limits = await client.screenshots.getLimits();
|
|
107
|
+
console.log(limits);
|
|
108
|
+
// {
|
|
109
|
+
// tier: "pro",
|
|
110
|
+
// limits: { min: 3600, max: 604800, default: 604800 },
|
|
111
|
+
// formattedLimits: { min: "1 hour", max: "7 days", default: "7 days" }
|
|
112
|
+
// }
|
|
113
|
+
|
|
114
|
+
// Enterprise: Public URLs (opt-in)
|
|
115
|
+
const jobPublic = await client.scrape.create({
|
|
116
|
+
url: "https://example.com",
|
|
117
|
+
screenshot: {
|
|
118
|
+
enabled: true,
|
|
119
|
+
publicUrl: true, // ⚠️ Enterprise only - exposes data publicly
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
// → Public URL without expiration (Enterprise tier only)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Security Note:** Screenshots are private by default to prevent exposure of personal data, copyrighted content, or sensitive tokens. Public URLs require Enterprise tier + explicit opt-in.
|
|
126
|
+
|
|
127
|
+
### 🎯 Priority Crawl System
|
|
128
|
+
|
|
129
|
+
3-tier queue system automatically prioritizes high-value pages:
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
// Crawl with automatic prioritization
|
|
133
|
+
const job = await client.crawl.create({
|
|
134
|
+
url: "https://shop.example.com",
|
|
135
|
+
extractDeal: true,
|
|
136
|
+
minDealScore: 50, // Only extract deals scoring 50+
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// Behind the scenes:
|
|
140
|
+
// - Pages scoring 70+ → High priority queue (5 workers, 30/min)
|
|
141
|
+
// - Pages scoring 40-69 → Medium priority queue (10 workers, 60/min)
|
|
142
|
+
// - Pages scoring <40 → Low priority queue (20 workers, 120/min)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### 🤖 AI Deal Extraction
|
|
146
|
+
|
|
147
|
+
Extract deals with LLM-powered analysis:
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
// Extract deals during crawl
|
|
151
|
+
const job = await client.crawl.create({
|
|
152
|
+
url: "https://marketplace.example.com",
|
|
153
|
+
extractDeal: true,
|
|
154
|
+
minDealScore: 30, // Only extract if score >= 30
|
|
155
|
+
maxPages: 200,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Get extracted deals
|
|
159
|
+
const deals = await client.status.getDeals(job.jobId, {
|
|
160
|
+
minScore: 70, // Filter for high-quality deals
|
|
161
|
+
limit: 50,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
console.log(deals.deals); // Array of ExtractedDeal objects
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### 📝 Markdown Output
|
|
168
|
+
|
|
169
|
+
Convert HTML to clean, structured markdown:
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
// Single page markdown
|
|
173
|
+
const job = await client.scrape.create({
|
|
174
|
+
url: "https://blog.example.com/article",
|
|
175
|
+
outputMarkdown: true,
|
|
176
|
+
markdownBaseUrl: "https://blog.example.com", // Resolve relative URLs
|
|
177
|
+
onlyMainContent: true,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
const result = await client.waitForResult(job.jobId);
|
|
181
|
+
console.log(result.data.parsed.markdown);
|
|
182
|
+
// Clean markdown with:
|
|
183
|
+
// - GFM tables, strikethrough, task lists
|
|
184
|
+
// - Code blocks with syntax detection
|
|
185
|
+
// - Absolute URLs
|
|
186
|
+
// - Noise removal (ads, navigation)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### 🎬 Browser Actions
|
|
190
|
+
|
|
191
|
+
Execute actions before scraping for dynamic content:
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
// Handle cookie popups and load more content
|
|
195
|
+
const job = await client.scrape.create({
|
|
196
|
+
url: "https://shop.example.com/products",
|
|
197
|
+
actions: [
|
|
198
|
+
{ type: "click", selector: "#accept-cookies", optional: true },
|
|
199
|
+
{ type: "wait", milliseconds: 500 },
|
|
200
|
+
{ type: "scroll", direction: "down", amount: 500 },
|
|
201
|
+
{ type: "click", selector: ".load-more", retries: 3 },
|
|
202
|
+
{ type: "wait", selector: ".products-loaded" },
|
|
203
|
+
],
|
|
204
|
+
extractMultipleDeals: true,
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// Search and extract
|
|
208
|
+
const job2 = await client.scrape.create({
|
|
209
|
+
url: "https://marketplace.com",
|
|
210
|
+
actions: [
|
|
211
|
+
{ type: "write", selector: "input[name='search']", text: "laptop deals" },
|
|
212
|
+
{ type: "press", key: "Enter" },
|
|
213
|
+
{ type: "wait", selector: ".results" },
|
|
214
|
+
],
|
|
215
|
+
extractMultipleDeals: true,
|
|
216
|
+
maxDeals: 30,
|
|
217
|
+
});
|
|
52
218
|
```
|
|
53
219
|
|
|
54
220
|
## Configuration
|
|
@@ -86,25 +252,29 @@ const job = await client.scrape.withScreenshot("https://example.com", {
|
|
|
86
252
|
```
|
|
87
253
|
|
|
88
254
|
**Options:**
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
|
92
|
-
| `
|
|
93
|
-
| `
|
|
94
|
-
| `
|
|
95
|
-
| `
|
|
96
|
-
| `
|
|
97
|
-
| `
|
|
98
|
-
| `
|
|
99
|
-
| `
|
|
100
|
-
| `
|
|
101
|
-
| `
|
|
102
|
-
| `
|
|
103
|
-
| `
|
|
104
|
-
| `
|
|
105
|
-
| `
|
|
106
|
-
|
|
107
|
-
|
|
255
|
+
|
|
256
|
+
| Option | Type | Default | Description |
|
|
257
|
+
| ---------------------- | -------- | -------- | --------------------------------------------------------- |
|
|
258
|
+
| `url` | string | required | URL to scrape |
|
|
259
|
+
| `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
|
|
260
|
+
| `detectSignals` | boolean | true | Detect prices, discounts, urgency |
|
|
261
|
+
| `extractDeal` | boolean | false | Extract deal information |
|
|
262
|
+
| `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
|
|
263
|
+
| `maxDeals` | number | 20 | Max deals to extract (max: 50) |
|
|
264
|
+
| `extractWithAI` | boolean | false | Use AI for extraction |
|
|
265
|
+
| `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
|
|
266
|
+
| `minDealScore` | number | 0 | Minimum deal score (0-100) |
|
|
267
|
+
| `screenshot` | object | - | Screenshot options |
|
|
268
|
+
| `excludeTags` | string[] | - | HTML tags to exclude |
|
|
269
|
+
| `excludeSelectors` | string[] | - | CSS selectors to exclude |
|
|
270
|
+
| `onlyMainContent` | boolean | true | Extract main content only |
|
|
271
|
+
| `headers` | object | - | Custom HTTP headers |
|
|
272
|
+
| `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
|
|
273
|
+
| `outputMarkdown` | boolean | false | Convert content to Markdown (GFM) |
|
|
274
|
+
| `markdownBaseUrl` | string | - | Base URL for resolving relative URLs in markdown |
|
|
275
|
+
| `actions` | array | - | Browser actions to execute before scraping |
|
|
276
|
+
|
|
277
|
+
### Batch Scrape - Bulk URL Scraping
|
|
108
278
|
|
|
109
279
|
```typescript
|
|
110
280
|
// Scrape multiple URLs in one request (1-100 URLs)
|
|
@@ -128,16 +298,17 @@ const results = await client.waitForAll(batch.jobIds);
|
|
|
128
298
|
```
|
|
129
299
|
|
|
130
300
|
**Batch Options:**
|
|
131
|
-
| Option | Type | Default | Description |
|
|
132
|
-
|--------|------|---------|-------------|
|
|
133
|
-
| `urls` | array | required | 1-100 URL objects with optional overrides |
|
|
134
|
-
| `defaults` | object | - | Default options applied to all URLs |
|
|
135
|
-
| `priority` | number | 5 | Priority 1-10 (higher = faster) |
|
|
136
|
-
| `delay` | number | 0 | Delay between URLs (0-5000ms) |
|
|
137
|
-
| `webhookUrl` | string | - | Webhook for batch completion |
|
|
138
|
-
| `ref` | string | - | Custom reference ID for tracking |
|
|
139
301
|
|
|
140
|
-
|
|
302
|
+
| Option | Type | Default | Description |
|
|
303
|
+
| ------------ | ------ | -------- | ----------------------------------------- |
|
|
304
|
+
| `urls` | array | required | 1-100 URL objects with optional overrides |
|
|
305
|
+
| `defaults` | object | - | Default options applied to all URLs |
|
|
306
|
+
| `priority` | number | 5 | Priority 1-10 (higher = faster) |
|
|
307
|
+
| `delay` | number | 0 | Delay between URLs (0-5000ms) |
|
|
308
|
+
| `webhookUrl` | string | - | Webhook for batch completion |
|
|
309
|
+
| `ref` | string | - | Custom reference ID for tracking |
|
|
310
|
+
|
|
311
|
+
### Search - Web Search with AI
|
|
141
312
|
|
|
142
313
|
```typescript
|
|
143
314
|
// Basic search
|
|
@@ -184,17 +355,18 @@ const result = await client.searchAndWait({
|
|
|
184
355
|
```
|
|
185
356
|
|
|
186
357
|
**Search Options:**
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
|
190
|
-
| `
|
|
191
|
-
| `
|
|
192
|
-
| `
|
|
193
|
-
| `
|
|
194
|
-
| `
|
|
195
|
-
| `
|
|
196
|
-
| `
|
|
197
|
-
| `
|
|
358
|
+
|
|
359
|
+
| Option | Type | Default | Description |
|
|
360
|
+
| ------------------- | ------- | -------- | ----------------------------------------------- |
|
|
361
|
+
| `query` | string | required | Search query |
|
|
362
|
+
| `maxResults` | number | 10 | Results to return (1-100) |
|
|
363
|
+
| `useAiOptimization` | boolean | false | AI-enhance the query |
|
|
364
|
+
| `aiProvider` | string | "openai" | "openai" or "anthropic" |
|
|
365
|
+
| `aiModel` | string | - | Model ID (gpt-4o-mini, claude-3-5-sonnet, etc.) |
|
|
366
|
+
| `useDealScoring` | boolean | false | Score results for deal relevance |
|
|
367
|
+
| `autoScrape` | boolean | false | Auto-scrape top results |
|
|
368
|
+
| `autoScrapeLimit` | number | 3 | Number of results to scrape |
|
|
369
|
+
| `filters` | object | - | Location, language, date, domains |
|
|
198
370
|
|
|
199
371
|
### Crawl - Website Crawling
|
|
200
372
|
|
|
@@ -253,26 +425,30 @@ const job = await client.crawl.create({
|
|
|
253
425
|
- `custom` - No preset, use your own settings
|
|
254
426
|
|
|
255
427
|
**Crawl Options:**
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
|
259
|
-
| `
|
|
260
|
-
| `
|
|
261
|
-
| `
|
|
262
|
-
| `
|
|
263
|
-
| `
|
|
264
|
-
| `
|
|
265
|
-
| `
|
|
266
|
-
| `
|
|
267
|
-
| `
|
|
268
|
-
| `
|
|
269
|
-
| `
|
|
270
|
-
| `
|
|
271
|
-
| `
|
|
272
|
-
| `
|
|
273
|
-
| `
|
|
274
|
-
| `
|
|
275
|
-
| `
|
|
428
|
+
|
|
429
|
+
| Option | Type | Default | Description |
|
|
430
|
+
| ------------------ | -------- | -------- | ---------------------------------------------------- |
|
|
431
|
+
| `url` | string | required | Starting URL |
|
|
432
|
+
| `maxDepth` | number | 3 | Max crawl depth (1-5) |
|
|
433
|
+
| `maxPages` | number | 100 | Max pages to crawl (1-1000) |
|
|
434
|
+
| `detectSignals` | boolean | true | Detect prices, discounts |
|
|
435
|
+
| `extractDeal` | boolean | false | Extract deal info with AI |
|
|
436
|
+
| `minDealScore` | number | 30 | Min deal score threshold (0-100) |
|
|
437
|
+
| `categories` | array | - | Filter: courses, software, physical, services, other |
|
|
438
|
+
| `priceRange` | object | - | Filter: { min, max } price |
|
|
439
|
+
| `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
|
|
440
|
+
| `allowedMerchants` | string[] | - | Only these merchants |
|
|
441
|
+
| `blockedMerchants` | string[] | - | Exclude these merchants |
|
|
442
|
+
| `webhookUrl` | string | - | Real-time notifications URL |
|
|
443
|
+
| `syncToDealup` | boolean | false | Auto-sync to DealUp |
|
|
444
|
+
| `template` | string | - | Job template to use |
|
|
445
|
+
| `useSmartRouting` | boolean | true | Auto-detect best settings |
|
|
446
|
+
| `priority` | string | - | Queue priority (Enterprise only) |
|
|
447
|
+
| `requireJS` | boolean | false | Force JavaScript rendering |
|
|
448
|
+
| `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
|
|
449
|
+
| `outputMarkdown` | boolean | false | Convert pages to Markdown (GFM) |
|
|
450
|
+
| `markdownBaseUrl` | string | - | Base URL for relative links in markdown |
|
|
451
|
+
| `noStore` | boolean | false | Zero Data Retention (Pro/Enterprise only) |
|
|
276
452
|
|
|
277
453
|
### Extract - LLM-Based Extraction
|
|
278
454
|
|
|
@@ -327,7 +503,7 @@ const query = client.dork.buildQuery({
|
|
|
327
503
|
// Returns: "laptop deals site:amazon.com intitle:discount"
|
|
328
504
|
```
|
|
329
505
|
|
|
330
|
-
### Agent - AI Autonomous Navigation
|
|
506
|
+
### Agent - AI Autonomous Navigation
|
|
331
507
|
|
|
332
508
|
Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
|
|
333
509
|
|
|
@@ -335,7 +511,8 @@ Create AI agents that can navigate websites, interact with elements, and extract
|
|
|
335
511
|
// Basic agent - navigate and extract data
|
|
336
512
|
const job = await client.agent.create({
|
|
337
513
|
url: "https://amazon.com",
|
|
338
|
-
prompt:
|
|
514
|
+
prompt:
|
|
515
|
+
"Search for wireless headphones under $50 and extract the top 5 results",
|
|
339
516
|
schema: {
|
|
340
517
|
type: "object",
|
|
341
518
|
properties: {
|
|
@@ -399,28 +576,64 @@ const job = await client.agent.withClaude(
|
|
|
399
576
|
```
|
|
400
577
|
|
|
401
578
|
**Agent Options:**
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
|
405
|
-
| `
|
|
406
|
-
| `
|
|
407
|
-
| `
|
|
408
|
-
| `
|
|
409
|
-
| `
|
|
410
|
-
| `
|
|
411
|
-
| `
|
|
412
|
-
| `
|
|
579
|
+
|
|
580
|
+
| Option | Type | Default | Description |
|
|
581
|
+
| ----------------- | ------- | -------- | --------------------------------------------- |
|
|
582
|
+
| `url` | string | required | Starting URL |
|
|
583
|
+
| `prompt` | string | required | Natural language instructions (10-2000 chars) |
|
|
584
|
+
| `schema` | object | - | JSON Schema for structured output |
|
|
585
|
+
| `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
|
|
586
|
+
| `actions` | array | - | Preset actions to execute first |
|
|
587
|
+
| `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
|
|
588
|
+
| `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
|
|
589
|
+
| `takeScreenshots` | boolean | false | Capture screenshot at each step |
|
|
590
|
+
| `onlyMainContent` | boolean | true | Extract main content only |
|
|
413
591
|
|
|
414
592
|
**Action Types:**
|
|
415
593
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
594
|
+
| Action | Key Parameters | Description |
|
|
595
|
+
|--------------|---------------------------------------------------|--------------------------|
|
|
596
|
+
| `click` | `selector`, `waitAfter?`, `button?`, `force?` | Click an element |
|
|
597
|
+
| `scroll` | `direction`, `amount?`, `smooth?` | Scroll page/to element |
|
|
598
|
+
| `write` | `selector`, `text`, `clearFirst?`, `typeDelay?` | Type text into input |
|
|
599
|
+
| `wait` | `milliseconds?`, `selector?`, `condition?` | Wait for time or element |
|
|
600
|
+
| `press` | `key`, `modifiers?` | Press keyboard key |
|
|
601
|
+
| `screenshot` | `fullPage?`, `selector?`, `name?` | Capture screenshot |
|
|
602
|
+
| `hover` | `selector`, `duration?` | Hover over element |
|
|
603
|
+
| `select` | `selector`, `value`, `byLabel?` | Select dropdown option |
|
|
604
|
+
|
|
605
|
+
**Action Resilience (all actions support):**
|
|
606
|
+
|
|
607
|
+
- `optional: boolean` - Don't fail job if action fails
|
|
608
|
+
- `retries: number` - Retry failed action (1-5 times)
|
|
609
|
+
- `delayBefore: number` - Delay before executing action (ms)
|
|
610
|
+
|
|
611
|
+
**Schema Generation:**
|
|
612
|
+
|
|
613
|
+
```typescript
|
|
614
|
+
// Generate JSON Schema from natural language
|
|
615
|
+
const schemaResult = await client.agent.generateSchema({
|
|
616
|
+
prompt: "Find e-commerce product deals with prices and discounts",
|
|
617
|
+
context: {
|
|
618
|
+
domains: ["e-commerce", "retail"], // Help AI understand context
|
|
619
|
+
dataTypes: ["prices", "discounts"], // Expected data types
|
|
620
|
+
format: "json", // Output format
|
|
621
|
+
clarifications: ["Include shipping info"] // Additional requirements
|
|
622
|
+
},
|
|
623
|
+
});
|
|
624
|
+
|
|
625
|
+
// Use the generated schema
|
|
626
|
+
const job = await client.agent.create({
|
|
627
|
+
url: "https://shop.example.com",
|
|
628
|
+
prompt: schemaResult.refinedPrompt, // AI-improved prompt
|
|
629
|
+
schema: schemaResult.schema, // Generated JSON Schema
|
|
630
|
+
});
|
|
631
|
+
|
|
632
|
+
// Check confidence - if low, ask clarifying questions
|
|
633
|
+
if (schemaResult.confidence < 0.7) {
|
|
634
|
+
console.log("Consider clarifying:", schemaResult.suggestedQuestions);
|
|
635
|
+
}
|
|
636
|
+
```
|
|
424
637
|
|
|
425
638
|
### Status - Job Management
|
|
426
639
|
|
|
@@ -512,6 +725,44 @@ await client.webhooks.delete(webhookId);
|
|
|
512
725
|
- `crawl.completed` - Crawl job finished
|
|
513
726
|
- `crawl.failed` - Crawl job failed
|
|
514
727
|
|
|
728
|
+
### Screenshots - Signed URL Management
|
|
729
|
+
|
|
730
|
+
Manage screenshot signed URLs with configurable TTL and automatic refresh:
|
|
731
|
+
|
|
732
|
+
```typescript
|
|
733
|
+
// Refresh a signed URL before expiration
|
|
734
|
+
const refreshed = await client.screenshots.refresh({
|
|
735
|
+
path: "job_abc123/1234567890_nanoid_example.png",
|
|
736
|
+
ttl: 604800 // Optional: 7 days (defaults to tier default)
|
|
737
|
+
});
|
|
738
|
+
console.log(refreshed.url); // New signed URL
|
|
739
|
+
console.log(refreshed.expiresAt); // "2026-01-25T12:00:00Z"
|
|
740
|
+
console.log(refreshed.tierLimits); // { min: 3600, max: 604800, default: 604800 }
|
|
741
|
+
|
|
742
|
+
// Get tier-specific TTL limits
|
|
743
|
+
const limits = await client.screenshots.getLimits();
|
|
744
|
+
console.log(limits.tier); // "pro"
|
|
745
|
+
console.log(limits.limits); // { min: 3600, max: 604800, default: 604800 }
|
|
746
|
+
console.log(limits.formattedLimits); // { min: "1 hour", max: "7 days", default: "7 days" }
|
|
747
|
+
|
|
748
|
+
// Specify custom bucket (defaults to 'screenshots-private')
|
|
749
|
+
const refreshed = await client.screenshots.refresh({
|
|
750
|
+
path: "job_xyz/screenshot.png",
|
|
751
|
+
ttl: 86400, // 1 day
|
|
752
|
+
bucket: "screenshots-private"
|
|
753
|
+
});
|
|
754
|
+
```
|
|
755
|
+
|
|
756
|
+
**TTL Limits by Tier:**
|
|
757
|
+
|
|
758
|
+
| Tier | Min TTL | Max TTL | Default TTL |
|
|
759
|
+
|------------|---------|---------|-------------|
|
|
760
|
+
| Free | 1 hour | 24 hours| 24 hours |
|
|
761
|
+
| Pro | 1 hour | 7 days | 7 days |
|
|
762
|
+
| Enterprise | 1 hour | 30 days | 7 days |
|
|
763
|
+
|
|
764
|
+
**Security Note:** All screenshots are private by default. Public URLs (Enterprise only) don't require refresh as they don't expire.
|
|
765
|
+
|
|
515
766
|
### Keys - API Key Management
|
|
516
767
|
|
|
517
768
|
```typescript
|
|
@@ -650,7 +901,7 @@ const result = await client.crawlAndWait({
|
|
|
650
901
|
});
|
|
651
902
|
```
|
|
652
903
|
|
|
653
|
-
## Field Selection
|
|
904
|
+
## Field Selection
|
|
654
905
|
|
|
655
906
|
Reduce response payload size by selecting only the fields you need:
|
|
656
907
|
|
|
@@ -670,6 +921,22 @@ const deals = await client.data.listDeals({
|
|
|
670
921
|
const jobs = await client.data.listJobs({
|
|
671
922
|
fields: ["id", "status", "result.deals.title", "result.deals.price"],
|
|
672
923
|
});
|
|
924
|
+
|
|
925
|
+
// Agent job field selection
|
|
926
|
+
const agentStatus = await client.status.get(agentJobId, {
|
|
927
|
+
fields: [
|
|
928
|
+
"id",
|
|
929
|
+
"status",
|
|
930
|
+
"data.extractedData", // Final extracted data
|
|
931
|
+
"data.steps.action", // Just action details (skip observations)
|
|
932
|
+
"data.totalSteps",
|
|
933
|
+
],
|
|
934
|
+
});
|
|
935
|
+
|
|
936
|
+
// Markdown content selection
|
|
937
|
+
const scrapeResult = await client.status.get(scrapeJobId, {
|
|
938
|
+
fields: ["id", "status", "result.parsed.markdown", "result.parsed.title"],
|
|
939
|
+
});
|
|
673
940
|
```
|
|
674
941
|
|
|
675
942
|
**Benefits:**
|
|
@@ -757,11 +1024,29 @@ import type {
|
|
|
757
1024
|
SearchJobResponse,
|
|
758
1025
|
BatchScrapeResponse,
|
|
759
1026
|
|
|
1027
|
+
// Action Types
|
|
1028
|
+
ActionInput,
|
|
1029
|
+
ClickAction,
|
|
1030
|
+
ScrollAction,
|
|
1031
|
+
WriteAction,
|
|
1032
|
+
WaitAction,
|
|
1033
|
+
PressAction,
|
|
1034
|
+
HoverAction,
|
|
1035
|
+
SelectAction,
|
|
1036
|
+
|
|
1037
|
+
// Screenshot Options & Responses
|
|
1038
|
+
ScreenshotOptions,
|
|
1039
|
+
ScreenshotResult,
|
|
1040
|
+
RefreshScreenshotOptions,
|
|
1041
|
+
ScreenshotRefreshResponse,
|
|
1042
|
+
ScreenshotLimitsResponse,
|
|
1043
|
+
|
|
760
1044
|
// Re-exports from @dealcrawl/shared
|
|
761
1045
|
ScrapeResult,
|
|
762
1046
|
CrawlResult,
|
|
763
1047
|
ExtractedDeal,
|
|
764
1048
|
Signal,
|
|
1049
|
+
ParsedPage, // Includes markdown field
|
|
765
1050
|
} from "@dealcrawl/sdk";
|
|
766
1051
|
```
|
|
767
1052
|
|
|
@@ -876,4 +1161,6 @@ const client = new DealCrawl({
|
|
|
876
1161
|
|
|
877
1162
|
## License
|
|
878
1163
|
|
|
1164
|
+
By @Shipfastgo
|
|
1165
|
+
|
|
879
1166
|
MIT © [DealUp](https://dealup.cc)
|