@dealcrawl/sdk 2.10.0 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +246 -37
- package/dist/index.d.mts +1174 -50
- package/dist/index.d.ts +1174 -50
- package/dist/index.js +668 -52
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +665 -53
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,14 +6,48 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
|
|
|
6
6
|
[](https://www.typescriptlang.org/)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
8
|
|
|
9
|
-
## What's New in January 2026
|
|
9
|
+
## What's New in v2.11.1 (January 2026) 🐛
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
- **DataResource**: Fixed syntax error in `getDealsByCategory()` method (unclosed docstring + duplicate line)
|
|
14
|
+
- **SDK-API Alignment**: Verified 87% endpoint coverage with detailed alignment report
|
|
15
|
+
|
|
16
|
+
### Known Gaps
|
|
17
|
+
|
|
18
|
+
The following API endpoints do not have SDK methods yet (see [API-SDK Alignment Report](../../docs/API-SDK-ALIGNMENT-REPORT.md)):
|
|
19
|
+
|
|
20
|
+
- `GET /v1/status/:jobId/errors` - Get job errors
|
|
21
|
+
- `GET /v1/data/jobs/:jobId` - Get full job details
|
|
22
|
+
- `GET /v1/data/jobs/:jobId/result` - Get job result
|
|
23
|
+
- `GET /v1/data/jobs/:jobId/export` - Export job in multiple formats
|
|
24
|
+
- `POST /v1/webhooks/:id/rotate` - Rotate webhook secret
|
|
25
|
+
- `GET /v1/webhooks/:id/secret-status` - Get webhook secret status
|
|
26
|
+
- `POST /v1/webhooks/verify` - Verify webhook signature
|
|
27
|
+
|
|
28
|
+
These methods will be added in a future release.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## What's New in v2.11.0 (January 2026) 🎉
|
|
33
|
+
|
|
34
|
+
### Breaking Changes ⚠️
|
|
35
|
+
|
|
36
|
+
- **SearchOptions**: `maxResults` → `limit`, `autoScrape` → `scrapeResults`, `autoScrapeLimit` → `maxScrapeResults`
|
|
37
|
+
- **BatchScrapeOptions**: `delay` → `delayMs`
|
|
38
|
+
- **ExtractModel**: Updated to match API (`claude-3-5-haiku-20241022`, `claude-3-5-sonnet-20241022`, etc.)
|
|
39
|
+
- **ApiKeyScope**: Removed `scrape:batch` and `search` (use `scrape` scope for both)
|
|
40
|
+
|
|
41
|
+
### New Features
|
|
42
|
+
|
|
43
|
+
- **📸 Screenshot Storage (SEC-011)** - Private by default with configurable signed URL TTL
|
|
44
|
+
- **🎯 Priority Crawl System** - 3-tier queue system (high/medium/low) based on SmartFrontier deal scores
|
|
45
|
+
- **🤖 AI Deal Extraction** - LLM-powered extraction with customizable score thresholds
|
|
15
46
|
- **📝 Markdown Output** - Convert scraped content to clean Markdown with GFM support
|
|
16
|
-
- **🎬 Browser Actions** - Execute preset actions (click, scroll, write, etc.) before scraping
|
|
47
|
+
- **🎬 Browser Actions** - Execute preset actions (click, scroll, write, etc.) before scraping
|
|
48
|
+
- **🔴 Real-Time SSE Events** - Track jobs in real-time with Server-Sent Events (browser only)
|
|
49
|
+
- **🛡️ Batch Scrape** - Added `ignoreInvalidURLs` for Firecrawl-compatible error handling
|
|
50
|
+
- **🔄 HTML to Markdown** - New `client.convert.htmlToMarkdown()` utility
|
|
17
51
|
|
|
18
52
|
## Features
|
|
19
53
|
|
|
@@ -63,6 +97,130 @@ console.log(result.data.parsed.markdown); // Markdown content
|
|
|
63
97
|
console.log(result.data.screenshot); // Public screenshot URL
|
|
64
98
|
```
|
|
65
99
|
|
|
100
|
+
## Real-Time Events (SSE) - Browser Only 🔴
|
|
101
|
+
|
|
102
|
+
Track jobs in real-time using Server-Sent Events (SSE). **Browser only** - for Node.js, use polling via `client.waitForResult()`.
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
// 1. Generate SSE token (required for EventSource)
|
|
106
|
+
const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
107
|
+
console.log(`Token expires at: ${expiresAt}`); // 5 minutes
|
|
108
|
+
|
|
109
|
+
// 2. Subscribe to all events
|
|
110
|
+
const eventSource = client.events.subscribe(token, {
|
|
111
|
+
onEvent: (event) => {
|
|
112
|
+
console.log('Event:', event.type);
|
|
113
|
+
const data = JSON.parse(event.data);
|
|
114
|
+
console.log('Data:', data);
|
|
115
|
+
},
|
|
116
|
+
onError: (error) => {
|
|
117
|
+
console.error('SSE error:', error);
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// 3. Listen for specific event types
|
|
122
|
+
eventSource.addEventListener('job.completed', (event) => {
|
|
123
|
+
const data = JSON.parse(event.data);
|
|
124
|
+
console.log('Job completed!', data.summary);
|
|
125
|
+
eventSource.close(); // Clean up
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
eventSource.addEventListener('job.progress', (event) => {
|
|
129
|
+
const data = JSON.parse(event.data);
|
|
130
|
+
console.log(`Progress: ${data.progress}%`);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
eventSource.addEventListener('deal.found', (event) => {
|
|
134
|
+
const data = JSON.parse(event.data);
|
|
135
|
+
console.log('Deal found!', data.title, data.score);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// 4. Subscribe to specific job only
|
|
139
|
+
const job = await client.scrape.create({ url: "https://example.com" });
|
|
140
|
+
const jobToken = await client.auth.generateSSEToken({ jobId: job.jobId });
|
|
141
|
+
|
|
142
|
+
const jobEvents = client.events.subscribeToJob(job.jobId, jobToken.token, {
|
|
143
|
+
onEvent: (event) => {
|
|
144
|
+
const data = JSON.parse(event.data);
|
|
145
|
+
console.log(`[${event.type}]`, data);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// 5. Check connection limits before subscribing
|
|
150
|
+
const limits = await client.auth.getLimits();
|
|
151
|
+
console.log(`Available SSE connections: ${limits.sse.available}/${limits.sse.maxConnections}`);
|
|
152
|
+
// Free: 2 concurrent, Pro: 10 concurrent, Enterprise: 50 concurrent
|
|
153
|
+
|
|
154
|
+
// 6. Helper: Wait for completion via SSE
|
|
155
|
+
const result = await client.events.waitForCompletion(job.jobId, (progress) => {
|
|
156
|
+
console.log(`Progress: ${progress}%`);
|
|
157
|
+
});
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Available Event Types:**
|
|
161
|
+
|
|
162
|
+
| Event Type | Description |
|
|
163
|
+
| ---------- | ----------- |
|
|
164
|
+
| `job.created` | Job was created |
|
|
165
|
+
| `job.queued` | Job entered queue |
|
|
166
|
+
| `job.started` | Worker picked up job |
|
|
167
|
+
| `job.progress` | Progress update (includes `progress`, `stats`, `eta`) |
|
|
168
|
+
| `job.status` | Status changed |
|
|
169
|
+
| `job.completed` | Job finished successfully |
|
|
170
|
+
| `job.failed` | Job failed (includes error details) |
|
|
171
|
+
| `job.cancelled` | Job was cancelled |
|
|
172
|
+
| `job.log` | Important log message |
|
|
173
|
+
| `job.metric` | Performance/business metric |
|
|
174
|
+
| `job.alert` | Important alert (quota warning, etc.) |
|
|
175
|
+
| `job.checkpoint` | Checkpoint saved (for resumable jobs) |
|
|
176
|
+
| `deal.found` | Deal detected during crawl |
|
|
177
|
+
| `deal.validated` | Deal scored/validated |
|
|
178
|
+
| `ping` | Keepalive (every 15 seconds) |
|
|
179
|
+
| `connection.open` | SSE connection established |
|
|
180
|
+
| `connection.close` | SSE connection closing |
|
|
181
|
+
| `error` | Error occurred |
|
|
182
|
+
|
|
183
|
+
**TypeScript Support:**
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
import type {
|
|
187
|
+
SSEEvent,
|
|
188
|
+
JobProgressEvent,
|
|
189
|
+
JobCompletedEvent,
|
|
190
|
+
DealFoundEvent
|
|
191
|
+
} from "@dealcrawl/sdk";
|
|
192
|
+
|
|
193
|
+
// Type-safe event handling
|
|
194
|
+
eventSource.addEventListener('job.progress', (event: MessageEvent) => {
|
|
195
|
+
const data = JSON.parse(event.data) as JobProgressEvent['data'];
|
|
196
|
+
console.log(`Progress: ${data.progress}%`);
|
|
197
|
+
console.log(`ETA: ${data.eta?.remainingFormatted}`);
|
|
198
|
+
console.log(`Deals found: ${data.stats?.dealsFound}`);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
eventSource.addEventListener('job.completed', (event: MessageEvent) => {
|
|
202
|
+
const data = JSON.parse(event.data) as JobCompletedEvent['data'];
|
|
203
|
+
console.log('Completed in:', data.durationMs, 'ms');
|
|
204
|
+
console.log('Summary:', data.summary);
|
|
205
|
+
});
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Features:**
|
|
209
|
+
|
|
210
|
+
- ✅ Automatic reconnection on disconnect
|
|
211
|
+
- ✅ Event replay via `Last-Event-ID` (up to 50 missed events)
|
|
212
|
+
- ✅ Keepalive pings every 15 seconds
|
|
213
|
+
- ✅ Max connection time: 1 hour (auto-reconnect after)
|
|
214
|
+
- ✅ Multi-tenant isolation (only see your events)
|
|
215
|
+
- ✅ Token-based auth (works with EventSource)
|
|
216
|
+
|
|
217
|
+
**Security:**
|
|
218
|
+
|
|
219
|
+
- Tokens expire after 5 minutes
|
|
220
|
+
- Tokens can be restricted to specific jobs
|
|
221
|
+
- Tokens stored in Redis (revocable)
|
|
222
|
+
- Connection limits per tier (Free: 2, Pro: 10, Enterprise: 50)
|
|
223
|
+
|
|
66
224
|
## January 2026 Features in Detail
|
|
67
225
|
|
|
68
226
|
### 📸 Screenshot Storage (SEC-011)
|
|
@@ -288,6 +446,8 @@ const batch = await client.scrape.batch({
|
|
|
288
446
|
detectSignals: true,
|
|
289
447
|
timeout: 30000,
|
|
290
448
|
},
|
|
449
|
+
delayMs: 500, // ✨ Was: delay
|
|
450
|
+
ignoreInvalidURLs: true, // ✨ NEW: Skip invalid URLs instead of failing
|
|
291
451
|
});
|
|
292
452
|
|
|
293
453
|
// Get batch status
|
|
@@ -299,14 +459,14 @@ const results = await client.waitForAll(batch.jobIds);
|
|
|
299
459
|
|
|
300
460
|
**Batch Options:**
|
|
301
461
|
|
|
302
|
-
| Option
|
|
303
|
-
|
|
|
304
|
-
| `urls`
|
|
305
|
-
| `defaults`
|
|
306
|
-
| `priority`
|
|
307
|
-
| `
|
|
308
|
-
| `webhookUrl`
|
|
309
|
-
| `
|
|
462
|
+
| Option | Type | Default | Description |
|
|
463
|
+
| ------------------ | ------- | -------- | ---------------------------------------------------- |
|
|
464
|
+
| `urls` | array | required | 1-100 URL objects with optional overrides |
|
|
465
|
+
| `defaults` | object | - | Default options applied to all URLs |
|
|
466
|
+
| `priority` | number | 5 | Priority 1-10 (higher = faster) |
|
|
467
|
+
| `delayMs` | number | 0 | Delay between URLs (0-5000ms) |
|
|
468
|
+
| `webhookUrl` | string | - | Webhook for batch completion |
|
|
469
|
+
| `ignoreInvalidURLs`| boolean | false | Continue on invalid URLs (Firecrawl-compatible) |
|
|
310
470
|
|
|
311
471
|
### Search - Web Search with AI
|
|
312
472
|
|
|
@@ -314,7 +474,7 @@ const results = await client.waitForAll(batch.jobIds);
|
|
|
314
474
|
// Basic search
|
|
315
475
|
const job = await client.search.create({
|
|
316
476
|
query: "laptop deals black friday",
|
|
317
|
-
|
|
477
|
+
limit: 20, // ✨ Was: maxResults
|
|
318
478
|
});
|
|
319
479
|
|
|
320
480
|
// AI-optimized search with deal scoring
|
|
@@ -329,8 +489,8 @@ const job = await client.search.create({
|
|
|
329
489
|
// Search with auto-scraping of results
|
|
330
490
|
const job = await client.search.create({
|
|
331
491
|
query: "promo codes electronics",
|
|
332
|
-
|
|
333
|
-
|
|
492
|
+
scrapeResults: true, // ✨ Was: autoScrape
|
|
493
|
+
maxScrapeResults: 5, // ✨ Was: autoScrapeLimit
|
|
334
494
|
});
|
|
335
495
|
|
|
336
496
|
// Filtered search
|
|
@@ -340,7 +500,7 @@ const job = await client.search.create({
|
|
|
340
500
|
location: "fr",
|
|
341
501
|
language: "fr",
|
|
342
502
|
dateRange: "month",
|
|
343
|
-
|
|
503
|
+
domain: "amazon.fr", // Single domain filter
|
|
344
504
|
},
|
|
345
505
|
});
|
|
346
506
|
|
|
@@ -359,14 +519,14 @@ const result = await client.searchAndWait({
|
|
|
359
519
|
| Option | Type | Default | Description |
|
|
360
520
|
| ------------------- | ------- | -------- | ----------------------------------------------- |
|
|
361
521
|
| `query` | string | required | Search query |
|
|
362
|
-
| `
|
|
522
|
+
| `limit` | number | 10 | Results to return (1-100) |
|
|
363
523
|
| `useAiOptimization` | boolean | false | AI-enhance the query |
|
|
364
524
|
| `aiProvider` | string | "openai" | "openai" or "anthropic" |
|
|
365
525
|
| `aiModel` | string | - | Model ID (gpt-4o-mini, claude-3-5-sonnet, etc.) |
|
|
366
526
|
| `useDealScoring` | boolean | false | Score results for deal relevance |
|
|
367
|
-
| `
|
|
368
|
-
| `
|
|
369
|
-
| `filters` | object | - | Location, language, date,
|
|
527
|
+
| `scrapeResults` | boolean | false | Auto-scrape top results |
|
|
528
|
+
| `maxScrapeResults` | number | 5 | Number of results to scrape (1-10) |
|
|
529
|
+
| `filters` | object | - | Location, language, date, domain |
|
|
370
530
|
|
|
371
531
|
### Crawl - Website Crawling
|
|
372
532
|
|
|
@@ -793,20 +953,18 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
|
|
|
793
953
|
|
|
794
954
|
**Available Scopes:**
|
|
795
955
|
|
|
796
|
-
| Scope | Endpoint
|
|
797
|
-
| ----------------- |
|
|
798
|
-
| `scrape` | `POST /v1/scrape`
|
|
799
|
-
| `
|
|
800
|
-
| `
|
|
801
|
-
| `
|
|
802
|
-
| `
|
|
803
|
-
| `
|
|
804
|
-
| `
|
|
805
|
-
| `
|
|
806
|
-
| `
|
|
807
|
-
| `
|
|
808
|
-
| `keys:manage` | `/v1/keys` | Manage API keys |
|
|
809
|
-
| `webhooks:manage` | `/v1/webhooks` | Manage webhooks |
|
|
956
|
+
| Scope | Endpoint | Description |
|
|
957
|
+
| ----------------- | --------------------------------- | ------------------------- |
|
|
958
|
+
| `scrape` | `POST /v1/scrape`, `/v1/scrape/batch` | Create scrape jobs |
|
|
959
|
+
| `crawl` | `POST /v1/crawl` | Create crawl jobs |
|
|
960
|
+
| `dork` | `POST /v1/dork` | Create dork searches |
|
|
961
|
+
| `extract` | `POST /v1/extract` | Create extraction jobs |
|
|
962
|
+
| `agent` | `POST /v1/agent` | Create AI agent jobs |
|
|
963
|
+
| `status` | `GET /v1/status/:id` | Read job status |
|
|
964
|
+
| `data:read` | `GET /v1/data/*` | Read jobs/deals |
|
|
965
|
+
| `data:export` | `GET /v1/data/export` | Export data |
|
|
966
|
+
| `keys:manage` | `/v1/keys` | Manage API keys |
|
|
967
|
+
| `webhooks:manage` | `/v1/webhooks` | Manage webhooks |
|
|
810
968
|
|
|
811
969
|
**Scope Examples:**
|
|
812
970
|
|
|
@@ -832,7 +990,6 @@ await client.keys.create({
|
|
|
832
990
|
"dork",
|
|
833
991
|
"extract",
|
|
834
992
|
"agent",
|
|
835
|
-
"search",
|
|
836
993
|
"status",
|
|
837
994
|
"data:read",
|
|
838
995
|
"data:export",
|
|
@@ -1152,6 +1309,58 @@ const client = new DealCrawl({
|
|
|
1152
1309
|
|
|
1153
1310
|
> **Warning:** Never expose your API key in client-side code. Use a backend proxy or edge function.
|
|
1154
1311
|
|
|
1312
|
+
## Migration Guide (v2.10.x → v2.11.0)
|
|
1313
|
+
|
|
1314
|
+
### SearchOptions
|
|
1315
|
+
|
|
1316
|
+
```diff
|
|
1317
|
+
const result = await client.search.create({
|
|
1318
|
+
query: "laptop deals",
|
|
1319
|
+
- maxResults: 20,
|
|
1320
|
+
+ limit: 20,
|
|
1321
|
+
- autoScrape: true,
|
|
1322
|
+
+ scrapeResults: true,
|
|
1323
|
+
- autoScrapeLimit: 5,
|
|
1324
|
+
+ maxScrapeResults: 5,
|
|
1325
|
+
});
|
|
1326
|
+
```
|
|
1327
|
+
|
|
1328
|
+
### BatchScrapeOptions
|
|
1329
|
+
|
|
1330
|
+
```diff
|
|
1331
|
+
const batch = await client.scrape.batch({
|
|
1332
|
+
urls: [...],
|
|
1333
|
+
- delay: 500,
|
|
1334
|
+
+ delayMs: 500,
|
|
1335
|
+
+ ignoreInvalidURLs: true, // NEW: Firecrawl-compatible
|
|
1336
|
+
});
|
|
1337
|
+
```
|
|
1338
|
+
|
|
1339
|
+
### ExtractModel
|
|
1340
|
+
|
|
1341
|
+
```diff
|
|
1342
|
+
const job = await client.extract.create({
|
|
1343
|
+
url: "...",
|
|
1344
|
+
- model: "claude-3-haiku",
|
|
1345
|
+
+ model: "claude-3-5-haiku-20241022",
|
|
1346
|
+
});
|
|
1347
|
+
```
|
|
1348
|
+
|
|
1349
|
+
### ApiKeyScope
|
|
1350
|
+
|
|
1351
|
+
```diff
|
|
1352
|
+
await client.keys.create({
|
|
1353
|
+
name: "My Key",
|
|
1354
|
+
scopes: [
|
|
1355
|
+
"scrape",
|
|
1356
|
+
- "scrape:batch", // REMOVED - use "scrape" instead
|
|
1357
|
+
- "search", // REMOVED - use "scrape" instead
|
|
1358
|
+
"crawl",
|
|
1359
|
+
"status",
|
|
1360
|
+
],
|
|
1361
|
+
});
|
|
1362
|
+
```
|
|
1363
|
+
|
|
1155
1364
|
## Compatibility
|
|
1156
1365
|
|
|
1157
1366
|
- **Node.js**: 18.0+
|