@dealcrawl/sdk 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
8
8
 
9
9
  ## Features
10
10
 
11
- - 🚀 **Full API Coverage** - Access all 45 DealCrawl API endpoints
11
+ - 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
12
12
  - 📦 **Zero Dependencies** - Uses native `fetch`, works everywhere
13
13
  - 🔒 **Type-Safe** - Complete TypeScript definitions
14
14
  - ⚡ **Automatic Retries** - Built-in retry logic with exponential backoff
@@ -89,14 +89,20 @@ const job = await client.scrape.withScreenshot("https://example.com", {
89
89
  | Option | Type | Default | Description |
90
90
  |--------|------|---------|-------------|
91
91
  | `url` | string | required | URL to scrape |
92
+ | `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
92
93
  | `detectSignals` | boolean | true | Detect prices, discounts, urgency |
93
94
  | `extractDeal` | boolean | false | Extract deal information |
95
+ | `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
96
+ | `maxDeals` | number | 20 | Max deals to extract (max: 50) |
94
97
  | `extractWithAI` | boolean | false | Use AI for extraction |
95
98
  | `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
96
99
  | `minDealScore` | number | 0 | Minimum deal score (0-100) |
97
100
  | `screenshot` | object | - | Screenshot options |
98
101
  | `excludeTags` | string[] | - | HTML tags to exclude |
102
+ | `excludeSelectors` | string[] | - | CSS selectors to exclude |
99
103
  | `onlyMainContent` | boolean | true | Extract main content only |
104
+ | `headers` | object | - | Custom HTTP headers |
105
+ | `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
100
106
 
101
107
  ### Batch Scrape - Bulk URL Scraping (NEW)
102
108
 
@@ -215,6 +221,27 @@ console.log(analysis.estimatedPages);
215
221
  const job = await client.crawl.forDeals("https://shop.example.com", {
216
222
  minDealScore: 70,
217
223
  });
224
+
225
+ // Advanced crawl with filtering
226
+ const job = await client.crawl.create({
227
+ url: "https://marketplace.example.com",
228
+ maxDepth: 4,
229
+ maxPages: 500,
230
+ extractDeal: true,
231
+ minDealScore: 50,
232
+ categories: ["software", "courses"],
233
+ priceRange: { min: 0, max: 100 },
234
+ onlyHighQuality: true,
235
+ webhookUrl: "https://my-server.com/crawl-updates",
236
+ syncToDealup: true,
237
+ });
238
+
239
+ // Enterprise: priority queue override
240
+ const job = await client.crawl.create({
241
+ url: "https://time-sensitive-deals.com",
242
+ priority: "high", // Enterprise only
243
+ onlyHighQuality: true,
244
+ });
218
245
  ```
219
246
 
220
247
  **Available Templates:**
@@ -225,6 +252,28 @@ const job = await client.crawl.forDeals("https://shop.example.com", {
225
252
  - `docs` - Documentation sites
226
253
  - `custom` - No preset, use your own settings
227
254
 
255
+ **Crawl Options:**
256
+ | Option | Type | Default | Description |
257
+ |--------|------|---------|-------------|
258
+ | `url` | string | required | Starting URL |
259
+ | `maxDepth` | number | 3 | Max crawl depth (1-5) |
260
+ | `maxPages` | number | 100 | Max pages to crawl (1-1000) |
261
+ | `detectSignals` | boolean | true | Detect prices, discounts |
262
+ | `extractDeal` | boolean | false | Extract deal info with AI |
263
+ | `minDealScore` | number | 30 | Min deal score threshold (0-100) |
264
+ | `categories` | array | - | Filter: courses, software, physical, services, other |
265
+ | `priceRange` | object | - | Filter: { min, max } price |
266
+ | `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
267
+ | `allowedMerchants` | string[] | - | Only these merchants |
268
+ | `blockedMerchants` | string[] | - | Exclude these merchants |
269
+ | `webhookUrl` | string | - | Real-time notifications URL |
270
+ | `syncToDealup` | boolean | false | Auto-sync to DealUp |
271
+ | `template` | string | - | Job template to use |
272
+ | `useSmartRouting` | boolean | true | Auto-detect best settings |
273
+ | `priority` | string | - | Queue priority (Enterprise only) |
274
+ | `requireJS` | boolean | false | Force JavaScript rendering |
275
+ | `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
276
+
228
277
  ### Extract - LLM-Based Extraction
229
278
 
230
279
  ```typescript
@@ -278,6 +327,101 @@ const query = client.dork.buildQuery({
278
327
  // Returns: "laptop deals site:amazon.com intitle:discount"
279
328
  ```
280
329
 
330
+ ### Agent - AI Autonomous Navigation (NEW)
331
+
332
+ Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
333
+
334
+ ```typescript
335
+ // Basic agent - navigate and extract data
336
+ const job = await client.agent.create({
337
+ url: "https://amazon.com",
338
+ prompt: "Search for wireless headphones under $50 and extract the top 5 results",
339
+ schema: {
340
+ type: "object",
341
+ properties: {
342
+ products: {
343
+ type: "array",
344
+ items: {
345
+ type: "object",
346
+ properties: {
347
+ name: { type: "string" },
348
+ price: { type: "number" },
349
+ rating: { type: "number" },
350
+ },
351
+ },
352
+ },
353
+ },
354
+ },
355
+ maxSteps: 15,
356
+ });
357
+
358
+ // Wait for result
359
+ const result = await client.agentAndWait({
360
+ url: "https://booking.com",
361
+ prompt: "Find hotels in Paris for 2 adults, March 15-17",
362
+ takeScreenshots: true,
363
+ });
364
+
365
+ // Generate schema from natural language (helper)
366
+ const schemaResult = await client.agent.generateSchema({
367
+ prompt: "Find student deals on marketing courses with price and discount",
368
+ });
369
+ // Returns: { schema, refinedPrompt, confidence, suggestedQuestions? }
370
+
371
+ // Use generated schema
372
+ const job = await client.agent.create({
373
+ url: "https://coursera.org",
374
+ prompt: schemaResult.refinedPrompt,
375
+ schema: schemaResult.schema,
376
+ });
377
+
378
+ // Preset actions (handle popups, cookies, etc.)
379
+ const job = await client.agent.withPresetActions(
380
+ "https://shop.com",
381
+ "Find the best discounts",
382
+ [
383
+ { type: "click", selector: "#accept-cookies" },
384
+ { type: "wait", milliseconds: 1000 },
385
+ ]
386
+ );
387
+
388
+ // Deal-focused agent with pre-built schema
389
+ const job = await client.agent.forDeals(
390
+ "https://slickdeals.net",
391
+ "Find the top 10 tech deals posted today"
392
+ );
393
+
394
+ // Use Claude instead of GPT
395
+ const job = await client.agent.withClaude(
396
+ "https://complex-site.com",
397
+ "Navigate the checkout process"
398
+ );
399
+ ```
400
+
401
+ **Agent Options:**
402
+ | Option | Type | Default | Description |
403
+ |--------|------|---------|-------------|
404
+ | `url` | string | required | Starting URL |
405
+ | `prompt` | string | required | Natural language instructions (10-2000 chars) |
406
+ | `schema` | object | - | JSON Schema for structured output |
407
+ | `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
408
+ | `actions` | array | - | Preset actions to execute first |
409
+ | `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
410
+ | `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
411
+ | `takeScreenshots` | boolean | false | Capture screenshot at each step |
412
+ | `onlyMainContent` | boolean | true | Extract main content only |
413
+
414
+ **Action Types:**
415
+
416
+ - `click` - Click an element
417
+ - `scroll` - Scroll page or to element
418
+ - `write` - Type text into input
419
+ - `wait` - Wait for time or element
420
+ - `press` - Press keyboard key
421
+ - `screenshot` - Capture screenshot
422
+ - `hover` - Hover over element
423
+ - `select` - Select dropdown option
424
+
281
425
  ### Status - Job Management
282
426
 
283
427
  ```typescript
@@ -406,6 +550,7 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
406
550
  | `crawl` | `POST /v1/crawl` | Create crawl jobs |
407
551
  | `dork` | `POST /v1/dork` | Create dork searches |
408
552
  | `extract` | `POST /v1/extract` | Create extraction jobs |
553
+ | `agent` | `POST /v1/agent` | Create AI agent jobs |
409
554
  | `status` | `GET /v1/status/:id` | Read job status |
410
555
  | `data:read` | `GET /v1/data/*` | Read jobs/deals |
411
556
  | `data:export` | `GET /v1/data/export` | Export data |
@@ -435,10 +580,13 @@ await client.keys.create({
435
580
  "crawl",
436
581
  "dork",
437
582
  "extract",
583
+ "agent",
584
+ "search",
438
585
  "status",
439
586
  "data:read",
440
587
  "data:export",
441
588
  "keys:manage",
589
+ "webhooks:manage",
442
590
  ],
443
591
  });
444
592
  ```
@@ -587,14 +735,27 @@ import type {
587
735
 
588
736
  // Request Options
589
737
  ScrapeOptions,
738
+ BatchScrapeOptions,
590
739
  CrawlOptions,
740
+ CrawlPriority,
741
+ CrawlCategory,
742
+ PriceRange,
743
+ SearchOptions,
591
744
  ExtractOptions,
592
745
  DorkOptions,
746
+ AgentOptions,
747
+ SchemaGenerationOptions,
593
748
 
594
749
  // Responses
595
750
  JobStatusResponse,
596
751
  ListDealsResponse,
597
752
  DealItem,
753
+ AgentJobResponse,
754
+ AgentStatusResponse,
755
+ AgentResultResponse,
756
+ SchemaGenerationResponse,
757
+ SearchJobResponse,
758
+ BatchScrapeResponse,
598
759
 
599
760
  // Re-exports from @dealcrawl/shared
600
761
  ScrapeResult,
package/dist/index.d.mts CHANGED
@@ -932,6 +932,8 @@ interface ScreenshotOptions {
932
932
  interface ScrapeOptions {
933
933
  /** URL to scrape (required) */
934
934
  url: string;
935
+ /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
936
+ noStore?: boolean;
935
937
  /** Detect signals like prices, discounts, urgency (default: true) */
936
938
  detectSignals?: boolean;
937
939
  /** Extract content using AI */
@@ -1071,6 +1073,17 @@ interface SearchOptions {
1071
1073
  headers?: Record<string, string>;
1072
1074
  };
1073
1075
  }
1076
+ /** Crawl priority queue (enterprise only) */
1077
+ type CrawlPriority = "high" | "medium" | "low";
1078
+ /** Product category filter for crawls */
1079
+ type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
1080
+ /** Price range filter for crawls */
1081
+ interface PriceRange {
1082
+ /** Minimum price */
1083
+ min?: number;
1084
+ /** Maximum price */
1085
+ max?: number;
1086
+ }
1074
1087
  /** Options for crawling a website */
1075
1088
  interface CrawlOptions {
1076
1089
  /** Starting URL for the crawl (required) */
@@ -1091,7 +1104,7 @@ interface CrawlOptions {
1091
1104
  extractWithAI?: boolean;
1092
1105
  /** Extract deal-specific information from each page */
1093
1106
  extractDeal?: boolean;
1094
- /** Minimum deal score threshold (0-100) */
1107
+ /** Minimum deal score threshold (0-100, default: 30) */
1095
1108
  minDealScore?: number;
1096
1109
  /** Prioritize pages likely to contain deals (default: true) */
1097
1110
  prioritizeDealPages?: boolean;
@@ -1101,6 +1114,34 @@ interface CrawlOptions {
1101
1114
  allowedDomains?: string[];
1102
1115
  /** URL patterns to exclude from crawling */
1103
1116
  excludePatterns?: string[];
1117
+ /** Filter by product categories */
1118
+ categories?: CrawlCategory[];
1119
+ /** Filter by price range */
1120
+ priceRange?: PriceRange;
1121
+ /** Prefer static scraping (faster, default: true) */
1122
+ preferStatic?: boolean;
1123
+ /** Require JavaScript rendering for all pages */
1124
+ requireJS?: boolean;
1125
+ /** Use advanced anti-bot bypass techniques */
1126
+ bypassAntiBot?: boolean;
1127
+ /** Only return high-quality deals (score >= 70) */
1128
+ onlyHighQuality?: boolean;
1129
+ /** Only crawl pages from these merchants */
1130
+ allowedMerchants?: string[];
1131
+ /** Skip pages from these merchants */
1132
+ blockedMerchants?: string[];
1133
+ /** Webhook URL for real-time notifications */
1134
+ webhookUrl?: string;
1135
+ /** Auto-sync discovered deals to DealUp */
1136
+ syncToDealup?: boolean;
1137
+ /** Site-specific config name from registry */
1138
+ siteConfig?: string;
1139
+ /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
1140
+ template?: CrawlTemplateId;
1141
+ /** Use smart job routing to auto-detect best settings (default: true) */
1142
+ useSmartRouting?: boolean;
1143
+ /** Priority queue override (Enterprise only) */
1144
+ priority?: CrawlPriority;
1104
1145
  }
1105
1146
  /** Crawl template identifier */
1106
1147
  type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1273,7 +1314,7 @@ interface UpdateWebhookOptions {
1273
1314
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
1274
1315
  * These are the actual scopes enforced by the backend via requireScope() middleware
1275
1316
  */
1276
- type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1317
+ type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1277
1318
  /**
1278
1319
  * All available scopes (for reference and validation)
1279
1320
  */
package/dist/index.d.ts CHANGED
@@ -932,6 +932,8 @@ interface ScreenshotOptions {
932
932
  interface ScrapeOptions {
933
933
  /** URL to scrape (required) */
934
934
  url: string;
935
+ /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
936
+ noStore?: boolean;
935
937
  /** Detect signals like prices, discounts, urgency (default: true) */
936
938
  detectSignals?: boolean;
937
939
  /** Extract content using AI */
@@ -1071,6 +1073,17 @@ interface SearchOptions {
1071
1073
  headers?: Record<string, string>;
1072
1074
  };
1073
1075
  }
1076
+ /** Crawl priority queue (enterprise only) */
1077
+ type CrawlPriority = "high" | "medium" | "low";
1078
+ /** Product category filter for crawls */
1079
+ type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
1080
+ /** Price range filter for crawls */
1081
+ interface PriceRange {
1082
+ /** Minimum price */
1083
+ min?: number;
1084
+ /** Maximum price */
1085
+ max?: number;
1086
+ }
1074
1087
  /** Options for crawling a website */
1075
1088
  interface CrawlOptions {
1076
1089
  /** Starting URL for the crawl (required) */
@@ -1091,7 +1104,7 @@ interface CrawlOptions {
1091
1104
  extractWithAI?: boolean;
1092
1105
  /** Extract deal-specific information from each page */
1093
1106
  extractDeal?: boolean;
1094
- /** Minimum deal score threshold (0-100) */
1107
+ /** Minimum deal score threshold (0-100, default: 30) */
1095
1108
  minDealScore?: number;
1096
1109
  /** Prioritize pages likely to contain deals (default: true) */
1097
1110
  prioritizeDealPages?: boolean;
@@ -1101,6 +1114,34 @@ interface CrawlOptions {
1101
1114
  allowedDomains?: string[];
1102
1115
  /** URL patterns to exclude from crawling */
1103
1116
  excludePatterns?: string[];
1117
+ /** Filter by product categories */
1118
+ categories?: CrawlCategory[];
1119
+ /** Filter by price range */
1120
+ priceRange?: PriceRange;
1121
+ /** Prefer static scraping (faster, default: true) */
1122
+ preferStatic?: boolean;
1123
+ /** Require JavaScript rendering for all pages */
1124
+ requireJS?: boolean;
1125
+ /** Use advanced anti-bot bypass techniques */
1126
+ bypassAntiBot?: boolean;
1127
+ /** Only return high-quality deals (score >= 70) */
1128
+ onlyHighQuality?: boolean;
1129
+ /** Only crawl pages from these merchants */
1130
+ allowedMerchants?: string[];
1131
+ /** Skip pages from these merchants */
1132
+ blockedMerchants?: string[];
1133
+ /** Webhook URL for real-time notifications */
1134
+ webhookUrl?: string;
1135
+ /** Auto-sync discovered deals to DealUp */
1136
+ syncToDealup?: boolean;
1137
+ /** Site-specific config name from registry */
1138
+ siteConfig?: string;
1139
+ /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
1140
+ template?: CrawlTemplateId;
1141
+ /** Use smart job routing to auto-detect best settings (default: true) */
1142
+ useSmartRouting?: boolean;
1143
+ /** Priority queue override (Enterprise only) */
1144
+ priority?: CrawlPriority;
1104
1145
  }
1105
1146
  /** Crawl template identifier */
1106
1147
  type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1273,7 +1314,7 @@ interface UpdateWebhookOptions {
1273
1314
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
1274
1315
  * These are the actual scopes enforced by the backend via requireScope() middleware
1275
1316
  */
1276
- type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1317
+ type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1277
1318
  /**
1278
1319
  * All available scopes (for reference and validation)
1279
1320
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dealcrawl/sdk",
3
- "version": "2.4.0",
3
+ "version": "2.5.0",
4
4
  "private": false,
5
5
  "description": "Official SDK for DealCrawl web scraping, crawling and AI agent API",
6
6
  "author": "DealUp <contact@dealup.cc>",