@dealcrawl/sdk 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -37
- package/dist/index.d.mts +1145 -26
- package/dist/index.d.ts +1145 -26
- package/dist/index.js +631 -9
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +628 -10
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,14 +6,25 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
|
|
|
6
6
|
[](https://www.typescriptlang.org/)
|
|
7
7
|
[](https://opensource.org/licenses/MIT)
|
|
8
8
|
|
|
9
|
-
## What's New in January 2026 🎉
|
|
9
|
+
## What's New in v2.11.0 (January 2026) 🎉
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
11
|
+
### Breaking Changes ⚠️
|
|
12
|
+
|
|
13
|
+
- **SearchOptions**: `maxResults` → `limit`, `autoScrape` → `scrapeResults`, `autoScrapeLimit` → `maxScrapeResults`
|
|
14
|
+
- **BatchScrapeOptions**: `delay` → `delayMs`
|
|
15
|
+
- **ExtractModel**: Updated to match API (`claude-3-5-haiku-20241022`, `claude-3-5-sonnet-20241022`, etc.)
|
|
16
|
+
- **ApiKeyScope**: Removed `scrape:batch` and `search` (use `scrape` scope for both)
|
|
17
|
+
|
|
18
|
+
### New Features
|
|
19
|
+
|
|
20
|
+
- **📸 Screenshot Storage (SEC-011)** - Private by default with configurable signed URL TTL
|
|
21
|
+
- **🎯 Priority Crawl System** - 3-tier queue system (high/medium/low) based on SmartFrontier deal scores
|
|
22
|
+
- **🤖 AI Deal Extraction** - LLM-powered extraction with customizable score thresholds
|
|
15
23
|
- **📝 Markdown Output** - Convert scraped content to clean Markdown with GFM support
|
|
16
|
-
- **🎬 Browser Actions** - Execute preset actions (click, scroll, write, etc.) before scraping
|
|
24
|
+
- **🎬 Browser Actions** - Execute preset actions (click, scroll, write, etc.) before scraping
|
|
25
|
+
- **🔴 Real-Time SSE Events** - Track jobs in real-time with Server-Sent Events (browser only)
|
|
26
|
+
- **🛡️ Batch Scrape** - Added `ignoreInvalidURLs` for Firecrawl-compatible error handling
|
|
27
|
+
- **🔄 HTML to Markdown** - New `client.convert.htmlToMarkdown()` utility
|
|
17
28
|
|
|
18
29
|
## Features
|
|
19
30
|
|
|
@@ -63,6 +74,130 @@ console.log(result.data.parsed.markdown); // Markdown content
|
|
|
63
74
|
console.log(result.data.screenshot); // Public screenshot URL
|
|
64
75
|
```
|
|
65
76
|
|
|
77
|
+
## Real-Time Events (SSE) - Browser Only 🔴
|
|
78
|
+
|
|
79
|
+
Track jobs in real-time using Server-Sent Events (SSE). **Browser only** - for Node.js, use polling via `client.waitForResult()`.
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
// 1. Generate SSE token (required for EventSource)
|
|
83
|
+
const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
84
|
+
console.log(`Token expires at: ${expiresAt}`); // 5 minutes
|
|
85
|
+
|
|
86
|
+
// 2. Subscribe to all events
|
|
87
|
+
const eventSource = client.events.subscribe(token, {
|
|
88
|
+
onEvent: (event) => {
|
|
89
|
+
console.log('Event:', event.type);
|
|
90
|
+
const data = JSON.parse(event.data);
|
|
91
|
+
console.log('Data:', data);
|
|
92
|
+
},
|
|
93
|
+
onError: (error) => {
|
|
94
|
+
console.error('SSE error:', error);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// 3. Listen for specific event types
|
|
99
|
+
eventSource.addEventListener('job.completed', (event) => {
|
|
100
|
+
const data = JSON.parse(event.data);
|
|
101
|
+
console.log('Job completed!', data.summary);
|
|
102
|
+
eventSource.close(); // Clean up
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
eventSource.addEventListener('job.progress', (event) => {
|
|
106
|
+
const data = JSON.parse(event.data);
|
|
107
|
+
console.log(`Progress: ${data.progress}%`);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
eventSource.addEventListener('deal.found', (event) => {
|
|
111
|
+
const data = JSON.parse(event.data);
|
|
112
|
+
console.log('Deal found!', data.title, data.score);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// 4. Subscribe to specific job only
|
|
116
|
+
const job = await client.scrape.create({ url: "https://example.com" });
|
|
117
|
+
const jobToken = await client.auth.generateSSEToken({ jobId: job.jobId });
|
|
118
|
+
|
|
119
|
+
const jobEvents = client.events.subscribeToJob(job.jobId, jobToken.token, {
|
|
120
|
+
onEvent: (event) => {
|
|
121
|
+
const data = JSON.parse(event.data);
|
|
122
|
+
console.log(`[${event.type}]`, data);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// 5. Check connection limits before subscribing
|
|
127
|
+
const limits = await client.auth.getLimits();
|
|
128
|
+
console.log(`Available SSE connections: ${limits.sse.available}/${limits.sse.maxConnections}`);
|
|
129
|
+
// Free: 2 concurrent, Pro: 10 concurrent, Enterprise: 50 concurrent
|
|
130
|
+
|
|
131
|
+
// 6. Helper: Wait for completion via SSE
|
|
132
|
+
const result = await client.events.waitForCompletion(job.jobId, (progress) => {
|
|
133
|
+
console.log(`Progress: ${progress}%`);
|
|
134
|
+
});
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Available Event Types:**
|
|
138
|
+
|
|
139
|
+
| Event Type | Description |
|
|
140
|
+
| ---------- | ----------- |
|
|
141
|
+
| `job.created` | Job was created |
|
|
142
|
+
| `job.queued` | Job entered queue |
|
|
143
|
+
| `job.started` | Worker picked up job |
|
|
144
|
+
| `job.progress` | Progress update (includes `progress`, `stats`, `eta`) |
|
|
145
|
+
| `job.status` | Status changed |
|
|
146
|
+
| `job.completed` | Job finished successfully |
|
|
147
|
+
| `job.failed` | Job failed (includes error details) |
|
|
148
|
+
| `job.cancelled` | Job was cancelled |
|
|
149
|
+
| `job.log` | Important log message |
|
|
150
|
+
| `job.metric` | Performance/business metric |
|
|
151
|
+
| `job.alert` | Important alert (quota warning, etc.) |
|
|
152
|
+
| `job.checkpoint` | Checkpoint saved (for resumable jobs) |
|
|
153
|
+
| `deal.found` | Deal detected during crawl |
|
|
154
|
+
| `deal.validated` | Deal scored/validated |
|
|
155
|
+
| `ping` | Keepalive (every 15 seconds) |
|
|
156
|
+
| `connection.open` | SSE connection established |
|
|
157
|
+
| `connection.close` | SSE connection closing |
|
|
158
|
+
| `error` | Error occurred |
|
|
159
|
+
|
|
160
|
+
**TypeScript Support:**
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
import type {
|
|
164
|
+
SSEEvent,
|
|
165
|
+
JobProgressEvent,
|
|
166
|
+
JobCompletedEvent,
|
|
167
|
+
DealFoundEvent
|
|
168
|
+
} from "@dealcrawl/sdk";
|
|
169
|
+
|
|
170
|
+
// Type-safe event handling
|
|
171
|
+
eventSource.addEventListener('job.progress', (event: MessageEvent) => {
|
|
172
|
+
const data = JSON.parse(event.data) as JobProgressEvent['data'];
|
|
173
|
+
console.log(`Progress: ${data.progress}%`);
|
|
174
|
+
console.log(`ETA: ${data.eta?.remainingFormatted}`);
|
|
175
|
+
console.log(`Deals found: ${data.stats?.dealsFound}`);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
eventSource.addEventListener('job.completed', (event: MessageEvent) => {
|
|
179
|
+
const data = JSON.parse(event.data) as JobCompletedEvent['data'];
|
|
180
|
+
console.log('Completed in:', data.durationMs, 'ms');
|
|
181
|
+
console.log('Summary:', data.summary);
|
|
182
|
+
});
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Features:**
|
|
186
|
+
|
|
187
|
+
- ✅ Automatic reconnection on disconnect
|
|
188
|
+
- ✅ Event replay via `Last-Event-ID` (up to 50 missed events)
|
|
189
|
+
- ✅ Keepalive pings every 15 seconds
|
|
190
|
+
- ✅ Max connection time: 1 hour (auto-reconnect after)
|
|
191
|
+
- ✅ Multi-tenant isolation (only see your events)
|
|
192
|
+
- ✅ Token-based auth (works with EventSource)
|
|
193
|
+
|
|
194
|
+
**Security:**
|
|
195
|
+
|
|
196
|
+
- Tokens expire after 5 minutes
|
|
197
|
+
- Tokens can be restricted to specific jobs
|
|
198
|
+
- Tokens stored in Redis (revocable)
|
|
199
|
+
- Connection limits per tier (Free: 2, Pro: 10, Enterprise: 50)
|
|
200
|
+
|
|
66
201
|
## January 2026 Features in Detail
|
|
67
202
|
|
|
68
203
|
### 📸 Screenshot Storage (SEC-011)
|
|
@@ -288,6 +423,8 @@ const batch = await client.scrape.batch({
|
|
|
288
423
|
detectSignals: true,
|
|
289
424
|
timeout: 30000,
|
|
290
425
|
},
|
|
426
|
+
delayMs: 500, // ✨ Was: delay
|
|
427
|
+
ignoreInvalidURLs: true, // ✨ NEW: Skip invalid URLs instead of failing
|
|
291
428
|
});
|
|
292
429
|
|
|
293
430
|
// Get batch status
|
|
@@ -299,14 +436,14 @@ const results = await client.waitForAll(batch.jobIds);
|
|
|
299
436
|
|
|
300
437
|
**Batch Options:**
|
|
301
438
|
|
|
302
|
-
| Option
|
|
303
|
-
|
|
|
304
|
-
| `urls`
|
|
305
|
-
| `defaults`
|
|
306
|
-
| `priority`
|
|
307
|
-
| `
|
|
308
|
-
| `webhookUrl`
|
|
309
|
-
| `
|
|
439
|
+
| Option | Type | Default | Description |
|
|
440
|
+
| ------------------ | ------- | -------- | ---------------------------------------------------- |
|
|
441
|
+
| `urls` | array | required | 1-100 URL objects with optional overrides |
|
|
442
|
+
| `defaults` | object | - | Default options applied to all URLs |
|
|
443
|
+
| `priority` | number | 5 | Priority 1-10 (higher = faster) |
|
|
444
|
+
| `delayMs` | number | 0 | Delay between URLs (0-5000ms) |
|
|
445
|
+
| `webhookUrl` | string | - | Webhook for batch completion |
|
|
446
|
+
| `ignoreInvalidURLs`| boolean | false | Continue on invalid URLs (Firecrawl-compatible) |
|
|
310
447
|
|
|
311
448
|
### Search - Web Search with AI
|
|
312
449
|
|
|
@@ -314,7 +451,7 @@ const results = await client.waitForAll(batch.jobIds);
|
|
|
314
451
|
// Basic search
|
|
315
452
|
const job = await client.search.create({
|
|
316
453
|
query: "laptop deals black friday",
|
|
317
|
-
|
|
454
|
+
limit: 20, // ✨ Was: maxResults
|
|
318
455
|
});
|
|
319
456
|
|
|
320
457
|
// AI-optimized search with deal scoring
|
|
@@ -329,8 +466,8 @@ const job = await client.search.create({
|
|
|
329
466
|
// Search with auto-scraping of results
|
|
330
467
|
const job = await client.search.create({
|
|
331
468
|
query: "promo codes electronics",
|
|
332
|
-
|
|
333
|
-
|
|
469
|
+
scrapeResults: true, // ✨ Was: autoScrape
|
|
470
|
+
maxScrapeResults: 5, // ✨ Was: autoScrapeLimit
|
|
334
471
|
});
|
|
335
472
|
|
|
336
473
|
// Filtered search
|
|
@@ -340,7 +477,7 @@ const job = await client.search.create({
|
|
|
340
477
|
location: "fr",
|
|
341
478
|
language: "fr",
|
|
342
479
|
dateRange: "month",
|
|
343
|
-
|
|
480
|
+
domain: "amazon.fr", // Single domain filter
|
|
344
481
|
},
|
|
345
482
|
});
|
|
346
483
|
|
|
@@ -359,14 +496,14 @@ const result = await client.searchAndWait({
|
|
|
359
496
|
| Option | Type | Default | Description |
|
|
360
497
|
| ------------------- | ------- | -------- | ----------------------------------------------- |
|
|
361
498
|
| `query` | string | required | Search query |
|
|
362
|
-
| `
|
|
499
|
+
| `limit` | number | 10 | Results to return (1-100) |
|
|
363
500
|
| `useAiOptimization` | boolean | false | AI-enhance the query |
|
|
364
501
|
| `aiProvider` | string | "openai" | "openai" or "anthropic" |
|
|
365
502
|
| `aiModel` | string | - | Model ID (gpt-4o-mini, claude-3-5-sonnet, etc.) |
|
|
366
503
|
| `useDealScoring` | boolean | false | Score results for deal relevance |
|
|
367
|
-
| `
|
|
368
|
-
| `
|
|
369
|
-
| `filters` | object | - | Location, language, date,
|
|
504
|
+
| `scrapeResults` | boolean | false | Auto-scrape top results |
|
|
505
|
+
| `maxScrapeResults` | number | 5 | Number of results to scrape (1-10) |
|
|
506
|
+
| `filters` | object | - | Location, language, date, domain |
|
|
370
507
|
|
|
371
508
|
### Crawl - Website Crawling
|
|
372
509
|
|
|
@@ -793,20 +930,18 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
|
|
|
793
930
|
|
|
794
931
|
**Available Scopes:**
|
|
795
932
|
|
|
796
|
-
| Scope | Endpoint
|
|
797
|
-
| ----------------- |
|
|
798
|
-
| `scrape` | `POST /v1/scrape`
|
|
799
|
-
| `
|
|
800
|
-
| `
|
|
801
|
-
| `
|
|
802
|
-
| `
|
|
803
|
-
| `
|
|
804
|
-
| `
|
|
805
|
-
| `
|
|
806
|
-
| `
|
|
807
|
-
| `
|
|
808
|
-
| `keys:manage` | `/v1/keys` | Manage API keys |
|
|
809
|
-
| `webhooks:manage` | `/v1/webhooks` | Manage webhooks |
|
|
933
|
+
| Scope | Endpoint | Description |
|
|
934
|
+
| ----------------- | --------------------------------- | ------------------------- |
|
|
935
|
+
| `scrape` | `POST /v1/scrape`, `/v1/scrape/batch` | Create scrape jobs |
|
|
936
|
+
| `crawl` | `POST /v1/crawl` | Create crawl jobs |
|
|
937
|
+
| `dork` | `POST /v1/dork` | Create dork searches |
|
|
938
|
+
| `extract` | `POST /v1/extract` | Create extraction jobs |
|
|
939
|
+
| `agent` | `POST /v1/agent` | Create AI agent jobs |
|
|
940
|
+
| `status` | `GET /v1/status/:id` | Read job status |
|
|
941
|
+
| `data:read` | `GET /v1/data/*` | Read jobs/deals |
|
|
942
|
+
| `data:export` | `GET /v1/data/export` | Export data |
|
|
943
|
+
| `keys:manage` | `/v1/keys` | Manage API keys |
|
|
944
|
+
| `webhooks:manage` | `/v1/webhooks` | Manage webhooks |
|
|
810
945
|
|
|
811
946
|
**Scope Examples:**
|
|
812
947
|
|
|
@@ -832,7 +967,6 @@ await client.keys.create({
|
|
|
832
967
|
"dork",
|
|
833
968
|
"extract",
|
|
834
969
|
"agent",
|
|
835
|
-
"search",
|
|
836
970
|
"status",
|
|
837
971
|
"data:read",
|
|
838
972
|
"data:export",
|
|
@@ -1152,6 +1286,58 @@ const client = new DealCrawl({
|
|
|
1152
1286
|
|
|
1153
1287
|
> **Warning:** Never expose your API key in client-side code. Use a backend proxy or edge function.
|
|
1154
1288
|
|
|
1289
|
+
## Migration Guide (v2.10.x → v2.11.0)
|
|
1290
|
+
|
|
1291
|
+
### SearchOptions
|
|
1292
|
+
|
|
1293
|
+
```diff
|
|
1294
|
+
const result = await client.search.create({
|
|
1295
|
+
query: "laptop deals",
|
|
1296
|
+
- maxResults: 20,
|
|
1297
|
+
+ limit: 20,
|
|
1298
|
+
- autoScrape: true,
|
|
1299
|
+
+ scrapeResults: true,
|
|
1300
|
+
- autoScrapeLimit: 5,
|
|
1301
|
+
+ maxScrapeResults: 5,
|
|
1302
|
+
});
|
|
1303
|
+
```
|
|
1304
|
+
|
|
1305
|
+
### BatchScrapeOptions
|
|
1306
|
+
|
|
1307
|
+
```diff
|
|
1308
|
+
const batch = await client.scrape.batch({
|
|
1309
|
+
urls: [...],
|
|
1310
|
+
- delay: 500,
|
|
1311
|
+
+ delayMs: 500,
|
|
1312
|
+
+ ignoreInvalidURLs: true, // NEW: Firecrawl-compatible
|
|
1313
|
+
});
|
|
1314
|
+
```
|
|
1315
|
+
|
|
1316
|
+
### ExtractModel
|
|
1317
|
+
|
|
1318
|
+
```diff
|
|
1319
|
+
const job = await client.extract.create({
|
|
1320
|
+
url: "...",
|
|
1321
|
+
- model: "claude-3-haiku",
|
|
1322
|
+
+ model: "claude-3-5-haiku-20241022",
|
|
1323
|
+
});
|
|
1324
|
+
```
|
|
1325
|
+
|
|
1326
|
+
### ApiKeyScope
|
|
1327
|
+
|
|
1328
|
+
```diff
|
|
1329
|
+
await client.keys.create({
|
|
1330
|
+
name: "My Key",
|
|
1331
|
+
scopes: [
|
|
1332
|
+
"scrape",
|
|
1333
|
+
- "scrape:batch", // REMOVED - use "scrape" instead
|
|
1334
|
+
- "search", // REMOVED - use "scrape" instead
|
|
1335
|
+
"crawl",
|
|
1336
|
+
"status",
|
|
1337
|
+
],
|
|
1338
|
+
});
|
|
1339
|
+
```
|
|
1340
|
+
|
|
1155
1341
|
## Compatibility
|
|
1156
1342
|
|
|
1157
1343
|
- **Node.js**: 18.0+
|