@dealcrawl/sdk 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
8
8
 
9
9
  ## Features
10
10
 
11
- - 🚀 **Full API Coverage** - Access all 45 DealCrawl API endpoints
11
+ - 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
12
12
  - 📦 **Zero Dependencies** - Uses native `fetch`, works everywhere
13
13
  - 🔒 **Type-Safe** - Complete TypeScript definitions
14
14
  - ⚡ **Automatic Retries** - Built-in retry logic with exponential backoff
@@ -89,14 +89,20 @@ const job = await client.scrape.withScreenshot("https://example.com", {
89
89
  | Option | Type | Default | Description |
90
90
  |--------|------|---------|-------------|
91
91
  | `url` | string | required | URL to scrape |
92
+ | `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
92
93
  | `detectSignals` | boolean | true | Detect prices, discounts, urgency |
93
94
  | `extractDeal` | boolean | false | Extract deal information |
95
+ | `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
96
+ | `maxDeals` | number | 20 | Max deals to extract (max: 50) |
94
97
  | `extractWithAI` | boolean | false | Use AI for extraction |
95
98
  | `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
96
99
  | `minDealScore` | number | 0 | Minimum deal score (0-100) |
97
100
  | `screenshot` | object | - | Screenshot options |
98
101
  | `excludeTags` | string[] | - | HTML tags to exclude |
102
+ | `excludeSelectors` | string[] | - | CSS selectors to exclude |
99
103
  | `onlyMainContent` | boolean | true | Extract main content only |
104
+ | `headers` | object | - | Custom HTTP headers |
105
+ | `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
100
106
 
101
107
  ### Batch Scrape - Bulk URL Scraping (NEW)
102
108
 
@@ -215,6 +221,27 @@ console.log(analysis.estimatedPages);
215
221
  const job = await client.crawl.forDeals("https://shop.example.com", {
216
222
  minDealScore: 70,
217
223
  });
224
+
225
+ // Advanced crawl with filtering
226
+ const job = await client.crawl.create({
227
+ url: "https://marketplace.example.com",
228
+ maxDepth: 4,
229
+ maxPages: 500,
230
+ extractDeal: true,
231
+ minDealScore: 50,
232
+ categories: ["software", "courses"],
233
+ priceRange: { min: 0, max: 100 },
234
+ onlyHighQuality: true,
235
+ webhookUrl: "https://my-server.com/crawl-updates",
236
+ syncToDealup: true,
237
+ });
238
+
239
+ // Enterprise: priority queue override
240
+ const job = await client.crawl.create({
241
+ url: "https://time-sensitive-deals.com",
242
+ priority: "high", // Enterprise only
243
+ onlyHighQuality: true,
244
+ });
218
245
  ```
219
246
 
220
247
  **Available Templates:**
@@ -225,6 +252,28 @@ const job = await client.crawl.forDeals("https://shop.example.com", {
225
252
  - `docs` - Documentation sites
226
253
  - `custom` - No preset, use your own settings
227
254
 
255
+ **Crawl Options:**
256
+ | Option | Type | Default | Description |
257
+ |--------|------|---------|-------------|
258
+ | `url` | string | required | Starting URL |
259
+ | `maxDepth` | number | 3 | Max crawl depth (1-5) |
260
+ | `maxPages` | number | 100 | Max pages to crawl (1-1000) |
261
+ | `detectSignals` | boolean | true | Detect prices, discounts |
262
+ | `extractDeal` | boolean | false | Extract deal info with AI |
263
+ | `minDealScore` | number | 30 | Min deal score threshold (0-100) |
264
+ | `categories` | array | - | Filter: courses, software, physical, services, other |
265
+ | `priceRange` | object | - | Filter: { min, max } price |
266
+ | `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
267
+ | `allowedMerchants` | string[] | - | Only these merchants |
268
+ | `blockedMerchants` | string[] | - | Exclude these merchants |
269
+ | `webhookUrl` | string | - | Real-time notifications URL |
270
+ | `syncToDealup` | boolean | false | Auto-sync to DealUp |
271
+ | `template` | string | - | Job template to use |
272
+ | `useSmartRouting` | boolean | true | Auto-detect best settings |
273
+ | `priority` | string | - | Queue priority (Enterprise only) |
274
+ | `requireJS` | boolean | false | Force JavaScript rendering |
275
+ | `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
276
+
228
277
  ### Extract - LLM-Based Extraction
229
278
 
230
279
  ```typescript
@@ -278,6 +327,101 @@ const query = client.dork.buildQuery({
278
327
  // Returns: "laptop deals site:amazon.com intitle:discount"
279
328
  ```
280
329
 
330
+ ### Agent - AI Autonomous Navigation (NEW)
331
+
332
+ Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
333
+
334
+ ```typescript
335
+ // Basic agent - navigate and extract data
336
+ const job = await client.agent.create({
337
+ url: "https://amazon.com",
338
+ prompt: "Search for wireless headphones under $50 and extract the top 5 results",
339
+ schema: {
340
+ type: "object",
341
+ properties: {
342
+ products: {
343
+ type: "array",
344
+ items: {
345
+ type: "object",
346
+ properties: {
347
+ name: { type: "string" },
348
+ price: { type: "number" },
349
+ rating: { type: "number" },
350
+ },
351
+ },
352
+ },
353
+ },
354
+ },
355
+ maxSteps: 15,
356
+ });
357
+
358
+ // Wait for result
359
+ const result = await client.agentAndWait({
360
+ url: "https://booking.com",
361
+ prompt: "Find hotels in Paris for 2 adults, March 15-17",
362
+ takeScreenshots: true,
363
+ });
364
+
365
+ // Generate schema from natural language (helper)
366
+ const schemaResult = await client.agent.generateSchema({
367
+ prompt: "Find student deals on marketing courses with price and discount",
368
+ });
369
+ // Returns: { schema, refinedPrompt, confidence, suggestedQuestions? }
370
+
371
+ // Use generated schema
372
+ const job = await client.agent.create({
373
+ url: "https://coursera.org",
374
+ prompt: schemaResult.refinedPrompt,
375
+ schema: schemaResult.schema,
376
+ });
377
+
378
+ // Preset actions (handle popups, cookies, etc.)
379
+ const job = await client.agent.withPresetActions(
380
+ "https://shop.com",
381
+ "Find the best discounts",
382
+ [
383
+ { type: "click", selector: "#accept-cookies" },
384
+ { type: "wait", milliseconds: 1000 },
385
+ ]
386
+ );
387
+
388
+ // Deal-focused agent with pre-built schema
389
+ const job = await client.agent.forDeals(
390
+ "https://slickdeals.net",
391
+ "Find the top 10 tech deals posted today"
392
+ );
393
+
394
+ // Use Claude instead of GPT
395
+ const job = await client.agent.withClaude(
396
+ "https://complex-site.com",
397
+ "Navigate the checkout process"
398
+ );
399
+ ```
400
+
401
+ **Agent Options:**
402
+ | Option | Type | Default | Description |
403
+ |--------|------|---------|-------------|
404
+ | `url` | string | required | Starting URL |
405
+ | `prompt` | string | required | Natural language instructions (10-2000 chars) |
406
+ | `schema` | object | - | JSON Schema for structured output |
407
+ | `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
408
+ | `actions` | array | - | Preset actions to execute first |
409
+ | `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
410
+ | `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
411
+ | `takeScreenshots` | boolean | false | Capture screenshot at each step |
412
+ | `onlyMainContent` | boolean | true | Extract main content only |
413
+
414
+ **Action Types:**
415
+
416
+ - `click` - Click an element
417
+ - `scroll` - Scroll page or to element
418
+ - `write` - Type text into input
419
+ - `wait` - Wait for time or element
420
+ - `press` - Press keyboard key
421
+ - `screenshot` - Capture screenshot
422
+ - `hover` - Hover over element
423
+ - `select` - Select dropdown option
424
+
281
425
  ### Status - Job Management
282
426
 
283
427
  ```typescript
@@ -406,6 +550,7 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
406
550
  | `crawl` | `POST /v1/crawl` | Create crawl jobs |
407
551
  | `dork` | `POST /v1/dork` | Create dork searches |
408
552
  | `extract` | `POST /v1/extract` | Create extraction jobs |
553
+ | `agent` | `POST /v1/agent` | Create AI agent jobs |
409
554
  | `status` | `GET /v1/status/:id` | Read job status |
410
555
  | `data:read` | `GET /v1/data/*` | Read jobs/deals |
411
556
  | `data:export` | `GET /v1/data/export` | Export data |
@@ -435,10 +580,13 @@ await client.keys.create({
435
580
  "crawl",
436
581
  "dork",
437
582
  "extract",
583
+ "agent",
584
+ "search",
438
585
  "status",
439
586
  "data:read",
440
587
  "data:export",
441
588
  "keys:manage",
589
+ "webhooks:manage",
442
590
  ],
443
591
  });
444
592
  ```
@@ -587,14 +735,27 @@ import type {
587
735
 
588
736
  // Request Options
589
737
  ScrapeOptions,
738
+ BatchScrapeOptions,
590
739
  CrawlOptions,
740
+ CrawlPriority,
741
+ CrawlCategory,
742
+ PriceRange,
743
+ SearchOptions,
591
744
  ExtractOptions,
592
745
  DorkOptions,
746
+ AgentOptions,
747
+ SchemaGenerationOptions,
593
748
 
594
749
  // Responses
595
750
  JobStatusResponse,
596
751
  ListDealsResponse,
597
752
  DealItem,
753
+ AgentJobResponse,
754
+ AgentStatusResponse,
755
+ AgentResultResponse,
756
+ SchemaGenerationResponse,
757
+ SearchJobResponse,
758
+ BatchScrapeResponse,
598
759
 
599
760
  // Re-exports from @dealcrawl/shared
600
761
  ScrapeResult,
package/dist/index.d.mts CHANGED
@@ -81,6 +81,16 @@ interface OpenGraphMetadata {
81
81
  locale?: string;
82
82
  localeAlternate?: string[];
83
83
  }
84
+ /** Type d'engine de scraping */
85
+ type EngineType = "static" | "dynamic" | "stealth";
86
+ /** Mode de sélection d'engine pour le crawl */
87
+ type CrawlMode = "static" | "auto";
88
+ /** Détails de sélection d'engine */
89
+ interface EngineSelection {
90
+ engine: EngineType;
91
+ reason: string;
92
+ confidence: number;
93
+ }
84
94
  interface ParsedPage {
85
95
  url: string;
86
96
  depth?: number;
@@ -99,6 +109,10 @@ interface ParsedPage {
99
109
  statusCode?: number;
100
110
  crawlDurationMs?: number;
101
111
  signals?: Signal[];
112
+ /** Engine used to fetch this page */
113
+ engine?: EngineType;
114
+ /** Engine selection details */
115
+ engineSelection?: EngineSelection;
102
116
  }
103
117
  interface DealScoreSummary {
104
118
  score: number;
@@ -222,6 +236,69 @@ interface DorkResult {
222
236
  totalResults?: number;
223
237
  searchedAt: string;
224
238
  }
239
+ /** Source of extraction result */
240
+ type FallbackSource = "primary" | "retry" | "heuristic" | "default";
241
+ /** Result from fallback extraction */
242
+ interface FallbackResult<T> {
243
+ /** Extracted data */
244
+ data: T;
245
+ /** Source of the extraction */
246
+ source: FallbackSource;
247
+ /** Confidence score (0-1) */
248
+ confidence: number;
249
+ /** Metadata about the extraction process */
250
+ metadata: FallbackMetadata;
251
+ }
252
+ /** Metadata about fallback extraction */
253
+ interface FallbackMetadata {
254
+ /** Total attempts made */
255
+ attempts: number;
256
+ /** Errors encountered during extraction */
257
+ errors: string[];
258
+ /** Time taken in milliseconds */
259
+ durationMs: number;
260
+ /** Fields that used default values */
261
+ defaultedFields?: string[];
262
+ /** Fields that were transformed */
263
+ transformedFields?: string[];
264
+ }
265
+ /** Configuration for fallback behavior */
266
+ interface FallbackConfig {
267
+ /** Enable automatic retry (default: true) */
268
+ enableRetry?: boolean;
269
+ /** Maximum retry attempts (default: 2) */
270
+ maxRetries?: number;
271
+ /** Enable heuristic fallback (default: true) */
272
+ enableHeuristic?: boolean;
273
+ /** Enable default value fallback (default: true) */
274
+ enableDefault?: boolean;
275
+ /** Minimum confidence to accept result (default: 0.5) */
276
+ minConfidence?: number;
277
+ }
278
+ /** Validation result for extracted data */
279
+ interface ValidationResult<T> {
280
+ /** Whether validation passed */
281
+ success: boolean;
282
+ /** Validated and transformed data */
283
+ data?: T;
284
+ /** Validation errors */
285
+ errors: ValidationError[];
286
+ /** Fields that received default values */
287
+ defaultedFields: string[];
288
+ /** Fields that were transformed */
289
+ transformedFields: string[];
290
+ }
291
+ /** Individual validation error */
292
+ interface ValidationError {
293
+ /** Path to the field that failed validation */
294
+ path: string;
295
+ /** Error message */
296
+ message: string;
297
+ /** Expected value or type */
298
+ expected?: string;
299
+ /** Received value */
300
+ received?: unknown;
301
+ }
225
302
 
226
303
  /**
227
304
  * SDK Configuration options
@@ -932,6 +1009,8 @@ interface ScreenshotOptions {
932
1009
  interface ScrapeOptions {
933
1010
  /** URL to scrape (required) */
934
1011
  url: string;
1012
+ /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
1013
+ noStore?: boolean;
935
1014
  /** Detect signals like prices, discounts, urgency (default: true) */
936
1015
  detectSignals?: boolean;
937
1016
  /** Extract content using AI */
@@ -1071,6 +1150,17 @@ interface SearchOptions {
1071
1150
  headers?: Record<string, string>;
1072
1151
  };
1073
1152
  }
1153
+ /** Crawl priority queue (enterprise only) */
1154
+ type CrawlPriority = "high" | "medium" | "low";
1155
+ /** Product category filter for crawls */
1156
+ type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
1157
+ /** Price range filter for crawls */
1158
+ interface PriceRange {
1159
+ /** Minimum price */
1160
+ min?: number;
1161
+ /** Maximum price */
1162
+ max?: number;
1163
+ }
1074
1164
  /** Options for crawling a website */
1075
1165
  interface CrawlOptions {
1076
1166
  /** Starting URL for the crawl (required) */
@@ -1091,7 +1181,7 @@ interface CrawlOptions {
1091
1181
  extractWithAI?: boolean;
1092
1182
  /** Extract deal-specific information from each page */
1093
1183
  extractDeal?: boolean;
1094
- /** Minimum deal score threshold (0-100) */
1184
+ /** Minimum deal score threshold (0-100, default: 30) */
1095
1185
  minDealScore?: number;
1096
1186
  /** Prioritize pages likely to contain deals (default: true) */
1097
1187
  prioritizeDealPages?: boolean;
@@ -1101,6 +1191,34 @@ interface CrawlOptions {
1101
1191
  allowedDomains?: string[];
1102
1192
  /** URL patterns to exclude from crawling */
1103
1193
  excludePatterns?: string[];
1194
+ /** Filter by product categories */
1195
+ categories?: CrawlCategory[];
1196
+ /** Filter by price range */
1197
+ priceRange?: PriceRange;
1198
+ /** Prefer static scraping (faster, default: true) */
1199
+ preferStatic?: boolean;
1200
+ /** Require JavaScript rendering for all pages */
1201
+ requireJS?: boolean;
1202
+ /** Use advanced anti-bot bypass techniques */
1203
+ bypassAntiBot?: boolean;
1204
+ /** Only return high-quality deals (score >= 70) */
1205
+ onlyHighQuality?: boolean;
1206
+ /** Only crawl pages from these merchants */
1207
+ allowedMerchants?: string[];
1208
+ /** Skip pages from these merchants */
1209
+ blockedMerchants?: string[];
1210
+ /** Webhook URL for real-time notifications */
1211
+ webhookUrl?: string;
1212
+ /** Auto-sync discovered deals to DealUp */
1213
+ syncToDealup?: boolean;
1214
+ /** Site-specific config name from registry */
1215
+ siteConfig?: string;
1216
+ /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
1217
+ template?: CrawlTemplateId;
1218
+ /** Use smart job routing to auto-detect best settings (default: true) */
1219
+ useSmartRouting?: boolean;
1220
+ /** Priority queue override (Enterprise only) */
1221
+ priority?: CrawlPriority;
1104
1222
  }
1105
1223
  /** Crawl template identifier */
1106
1224
  type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1273,7 +1391,7 @@ interface UpdateWebhookOptions {
1273
1391
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
1274
1392
  * These are the actual scopes enforced by the backend via requireScope() middleware
1275
1393
  */
1276
- type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1394
+ type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1277
1395
  /**
1278
1396
  * All available scopes (for reference and validation)
1279
1397
  */
@@ -3127,33 +3245,162 @@ declare class DealCrawl {
3127
3245
  }
3128
3246
 
3129
3247
  /**
3130
- * Error codes used by DealCrawl API
3131
- * Copied from @dealcrawl/shared to avoid bundling the entire package
3248
+ * Error codes used by DealCrawl API (SEC-005 compliant)
3249
+ *
3250
+ * Machine-readable error codes for API responses.
3251
+ * These codes are stable identifiers that clients can programmatically handle.
3252
+ *
3253
+ * @see https://docs.dealcrawl.dev/api/errors for full documentation
3132
3254
  */
3133
3255
  declare const ERROR_CODES: {
3134
- readonly INVALID_API_KEY: "INVALID_API_KEY";
3135
- readonly MISSING_API_KEY: "MISSING_API_KEY";
3136
- readonly API_KEY_EXPIRED: "API_KEY_EXPIRED";
3137
- readonly ACCOUNT_SUSPENDED: "ACCOUNT_SUSPENDED";
3256
+ /** Missing or invalid Authorization header */
3257
+ readonly AUTH_MISSING_HEADER: "AUTH_MISSING_HEADER";
3258
+ /** The provided API key is invalid or expired */
3259
+ readonly AUTH_INVALID_API_KEY: "AUTH_INVALID_API_KEY";
3260
+ /** Your API key has expired */
3261
+ readonly AUTH_API_KEY_EXPIRED: "AUTH_API_KEY_EXPIRED";
3262
+ /** Your API key has been revoked */
3263
+ readonly AUTH_API_KEY_REVOKED: "AUTH_API_KEY_REVOKED";
3264
+ /** API key does not have required permissions */
3265
+ readonly AUTH_INSUFFICIENT_SCOPE: "AUTH_INSUFFICIENT_SCOPE";
3266
+ /** Your account has been suspended */
3267
+ readonly AUTH_ACCOUNT_SUSPENDED: "AUTH_ACCOUNT_SUSPENDED";
3268
+ /** @deprecated Use AUTH_INVALID_API_KEY instead */
3269
+ readonly INVALID_API_KEY: "AUTH_INVALID_API_KEY";
3270
+ /** @deprecated Use AUTH_MISSING_HEADER instead */
3271
+ readonly MISSING_API_KEY: "AUTH_MISSING_HEADER";
3272
+ /** @deprecated Use AUTH_API_KEY_EXPIRED instead */
3273
+ readonly API_KEY_EXPIRED: "AUTH_API_KEY_EXPIRED";
3274
+ /** @deprecated Use AUTH_ACCOUNT_SUSPENDED instead */
3275
+ readonly ACCOUNT_SUSPENDED: "AUTH_ACCOUNT_SUSPENDED";
3276
+ /** Rate limit exceeded. Please try again later */
3138
3277
  readonly RATE_LIMIT_EXCEEDED: "RATE_LIMIT_EXCEEDED";
3139
- readonly QUOTA_EXCEEDED: "QUOTA_EXCEEDED";
3140
- readonly INVALID_URL: "INVALID_URL";
3141
- readonly INVALID_REQUEST: "INVALID_REQUEST";
3142
- readonly MISSING_REQUIRED_FIELD: "MISSING_REQUIRED_FIELD";
3143
- readonly JOB_NOT_FOUND: "JOB_NOT_FOUND";
3144
- readonly JOB_FAILED: "JOB_FAILED";
3278
+ /** Monthly quota exceeded. Upgrade your plan for more */
3279
+ readonly RATE_QUOTA_EXCEEDED: "RATE_QUOTA_EXCEEDED";
3280
+ /** Concurrent job limit reached */
3281
+ readonly RATE_CONCURRENT_LIMIT: "RATE_CONCURRENT_LIMIT";
3282
+ /** @deprecated Use RATE_QUOTA_EXCEEDED instead */
3283
+ readonly QUOTA_EXCEEDED: "RATE_QUOTA_EXCEEDED";
3284
+ /** The provided input is invalid */
3285
+ readonly VALID_INVALID_INPUT: "VALID_INVALID_INPUT";
3286
+ /** Required field is missing */
3287
+ readonly VALID_MISSING_FIELD: "VALID_MISSING_FIELD";
3288
+ /** The provided URL is not valid */
3289
+ readonly VALID_INVALID_URL: "VALID_INVALID_URL";
3290
+ /** Invalid schema format */
3291
+ readonly VALID_INVALID_SCHEMA: "VALID_INVALID_SCHEMA";
3292
+ /** Invalid format */
3293
+ readonly VALID_INVALID_FORMAT: "VALID_INVALID_FORMAT";
3294
+ /** @deprecated Use VALID_INVALID_URL instead */
3295
+ readonly INVALID_URL: "VALID_INVALID_URL";
3296
+ /** @deprecated Use VALID_INVALID_INPUT instead */
3297
+ readonly INVALID_REQUEST: "VALID_INVALID_INPUT";
3298
+ /** @deprecated Use VALID_MISSING_FIELD instead */
3299
+ readonly MISSING_REQUIRED_FIELD: "VALID_MISSING_FIELD";
3300
+ /** The requested resource was not found */
3301
+ readonly RESOURCE_NOT_FOUND: "RESOURCE_NOT_FOUND";
3302
+ /** Job not found */
3303
+ readonly RESOURCE_JOB_NOT_FOUND: "RESOURCE_JOB_NOT_FOUND";
3304
+ /** Webhook not found */
3305
+ readonly RESOURCE_WEBHOOK_NOT_FOUND: "RESOURCE_WEBHOOK_NOT_FOUND";
3306
+ /** API key not found */
3307
+ readonly RESOURCE_KEY_NOT_FOUND: "RESOURCE_KEY_NOT_FOUND";
3308
+ /** @deprecated Use RESOURCE_JOB_NOT_FOUND instead */
3309
+ readonly JOB_NOT_FOUND: "RESOURCE_JOB_NOT_FOUND";
3310
+ /** Failed to create job */
3311
+ readonly JOB_CREATION_FAILED: "JOB_CREATION_FAILED";
3312
+ /** Job timed out */
3145
3313
  readonly JOB_TIMEOUT: "JOB_TIMEOUT";
3146
- readonly FETCH_FAILED: "FETCH_FAILED";
3147
- readonly PARSE_FAILED: "PARSE_FAILED";
3148
- readonly BLOCKED_BY_ROBOTS: "BLOCKED_BY_ROBOTS";
3149
- readonly CAPTCHA_DETECTED: "CAPTCHA_DETECTED";
3150
- readonly SITE_UNREACHABLE: "SITE_UNREACHABLE";
3314
+ /** Job processing failed */
3315
+ readonly JOB_FAILED: "JOB_FAILED";
3316
+ /** Job was cancelled */
3317
+ readonly JOB_CANCELLED: "JOB_CANCELLED";
3318
+ /** Failed to fetch URL */
3319
+ readonly SCRAPE_FETCH_FAILED: "SCRAPE_FETCH_FAILED";
3320
+ /** Failed to parse page content */
3321
+ readonly SCRAPE_PARSE_FAILED: "SCRAPE_PARSE_FAILED";
3322
+ /** URL is blocked by robots.txt */
3323
+ readonly SCRAPE_BLOCKED_BY_ROBOTS: "SCRAPE_BLOCKED_BY_ROBOTS";
3324
+ /** CAPTCHA detected on page */
3325
+ readonly SCRAPE_CAPTCHA_DETECTED: "SCRAPE_CAPTCHA_DETECTED";
3326
+ /** Site is unreachable */
3327
+ readonly SCRAPE_SITE_UNREACHABLE: "SCRAPE_SITE_UNREACHABLE";
3328
+ /** Scraping operation timed out */
3329
+ readonly SCRAPE_TIMEOUT: "SCRAPE_TIMEOUT";
3330
+ /** @deprecated Use SCRAPE_FETCH_FAILED instead */
3331
+ readonly FETCH_FAILED: "SCRAPE_FETCH_FAILED";
3332
+ /** @deprecated Use SCRAPE_PARSE_FAILED instead */
3333
+ readonly PARSE_FAILED: "SCRAPE_PARSE_FAILED";
3334
+ /** @deprecated Use SCRAPE_BLOCKED_BY_ROBOTS instead */
3335
+ readonly BLOCKED_BY_ROBOTS: "SCRAPE_BLOCKED_BY_ROBOTS";
3336
+ /** @deprecated Use SCRAPE_CAPTCHA_DETECTED instead */
3337
+ readonly CAPTCHA_DETECTED: "SCRAPE_CAPTCHA_DETECTED";
3338
+ /** @deprecated Use SCRAPE_SITE_UNREACHABLE instead */
3339
+ readonly SITE_UNREACHABLE: "SCRAPE_SITE_UNREACHABLE";
3340
+ /** A webhook with this event and URL already exists */
3341
+ readonly WEBHOOK_ALREADY_EXISTS: "WEBHOOK_ALREADY_EXISTS";
3342
+ /** Webhook URL is not valid or not publicly accessible */
3343
+ readonly WEBHOOK_INVALID_URL: "WEBHOOK_INVALID_URL";
3344
+ /** Webhook replay attack detected */
3345
+ readonly WEBHOOK_REPLAY_DETECTED: "WEBHOOK_REPLAY_DETECTED";
3346
+ /** Webhook request is too old */
3347
+ readonly WEBHOOK_EXPIRED: "WEBHOOK_EXPIRED";
3348
+ /** Webhook signature is invalid */
3349
+ readonly WEBHOOK_INVALID_SIGNATURE: "WEBHOOK_INVALID_SIGNATURE";
3350
+ /** AI extraction failed to process content */
3351
+ readonly AI_EXTRACTION_FAILED: "AI_EXTRACTION_FAILED";
3352
+ /** Failed to parse JSON from AI response */
3353
+ readonly AI_JSON_PARSE_FAILED: "AI_JSON_PARSE_FAILED";
3354
+ /** AI response did not match expected schema */
3355
+ readonly AI_SCHEMA_VALIDATION_FAILED: "AI_SCHEMA_VALIDATION_FAILED";
3356
+ /** No price information could be extracted */
3357
+ readonly AI_NO_PRICE_FOUND: "AI_NO_PRICE_FOUND";
3358
+ /** Extraction confidence is below acceptable threshold */
3359
+ readonly AI_LOW_CONFIDENCE: "AI_LOW_CONFIDENCE";
3360
+ /** Only partial data could be extracted */
3361
+ readonly AI_PARTIAL_RESULT: "AI_PARTIAL_RESULT";
3362
+ /** Fallback extraction was used */
3363
+ readonly AI_FALLBACK_USED: "AI_FALLBACK_USED";
3364
+ /** All fallback strategies failed */
3365
+ readonly AI_FALLBACK_FAILED: "AI_FALLBACK_FAILED";
3366
+ /** LLM provider rate limit exceeded */
3367
+ readonly AI_PROVIDER_RATE_LIMIT: "AI_PROVIDER_RATE_LIMIT";
3368
+ /** LLM provider authentication failed */
3369
+ readonly AI_PROVIDER_AUTH_FAILED: "AI_PROVIDER_AUTH_FAILED";
3370
+ /** LLM provider timeout */
3371
+ readonly AI_PROVIDER_TIMEOUT: "AI_PROVIDER_TIMEOUT";
3372
+ /** An internal server error occurred */
3151
3373
  readonly INTERNAL_ERROR: "INTERNAL_ERROR";
3152
- readonly SERVICE_UNAVAILABLE: "SERVICE_UNAVAILABLE";
3153
- readonly REDIS_ERROR: "REDIS_ERROR";
3374
+ /** Database operation failed */
3154
3375
  readonly DATABASE_ERROR: "DATABASE_ERROR";
3376
+ /** Redis operation failed */
3377
+ readonly REDIS_ERROR: "REDIS_ERROR";
3378
+ /** External service error */
3379
+ readonly EXTERNAL_SERVICE_ERROR: "EXTERNAL_SERVICE_ERROR";
3380
+ /** Service is temporarily unavailable */
3381
+ readonly SERVICE_UNAVAILABLE: "SERVICE_UNAVAILABLE";
3155
3382
  };
3156
3383
  type ErrorCode = (typeof ERROR_CODES)[keyof typeof ERROR_CODES];
3384
+ /**
3385
+ * Details for AI extraction errors
3386
+ */
3387
+ interface ExtractionErrorDetails {
3388
+ /** Fields that failed validation */
3389
+ failedFields?: string[];
3390
+ /** Fields that could not be extracted */
3391
+ missingFields?: string[];
3392
+ /** Validation errors from schema */
3393
+ validationErrors?: Array<{
3394
+ field: string;
3395
+ message: string;
3396
+ }>;
3397
+ /** Extraction confidence score (0-1) */
3398
+ confidence?: number;
3399
+ /** Whether fallback was used */
3400
+ fallbackUsed?: boolean;
3401
+ /** Source of the extraction */
3402
+ source?: "ai" | "heuristic" | "fallback";
3403
+ }
3157
3404
  /**
3158
3405
  * Custom error class for DealCrawl SDK
3159
3406
  * Provides structured error handling with error codes
@@ -3176,7 +3423,7 @@ declare class DealCrawlError extends Error {
3176
3423
  });
3177
3424
  /**
3178
3425
  * Check if the error is retryable
3179
- * Rate limits and transient errors are retryable
3426
+ * Rate limits, transient errors, and AI provider issues are retryable
3180
3427
  */
3181
3428
  isRetryable(): boolean;
3182
3429
  /**
@@ -3191,6 +3438,54 @@ declare class DealCrawlError extends Error {
3191
3438
  * Check if the error is due to quota exceeded
3192
3439
  */
3193
3440
  isQuotaExceeded(): boolean;
3441
+ /**
3442
+ * Check if the error is due to AI extraction issues
3443
+ */
3444
+ isExtractionError(): boolean;
3445
+ /**
3446
+ * Check if result is partial (some data extracted but incomplete)
3447
+ */
3448
+ isPartialResult(): boolean;
3449
+ /**
3450
+ * Check if extraction failed due to schema validation
3451
+ */
3452
+ isSchemaValidationError(): boolean;
3453
+ /**
3454
+ * Check if the error is due to validation issues
3455
+ */
3456
+ isValidationError(): boolean;
3457
+ /**
3458
+ * Check if the error is due to scraping issues
3459
+ */
3460
+ isScrapeError(): boolean;
3461
+ /**
3462
+ * Check if the error is due to webhook issues
3463
+ */
3464
+ isWebhookError(): boolean;
3465
+ /**
3466
+ * Check if the error is a server error (5xx)
3467
+ */
3468
+ isServerError(): boolean;
3469
+ /**
3470
+ * Check if the error is a client error (4xx)
3471
+ */
3472
+ isClientError(): boolean;
3473
+ /**
3474
+ * Check if fallback was used during extraction
3475
+ */
3476
+ isFallbackUsed(): boolean;
3477
+ /**
3478
+ * Check if all fallback strategies failed
3479
+ */
3480
+ isFallbackFailed(): boolean;
3481
+ /**
3482
+ * Check if error is due to AI provider issues
3483
+ */
3484
+ isAIProviderError(): boolean;
3485
+ /**
3486
+ * Get extraction-specific error details
3487
+ */
3488
+ getExtractionDetails(): ExtractionErrorDetails | undefined;
3194
3489
  /**
3195
3490
  * Convert error to JSON-serializable object
3196
3491
  */
@@ -3206,5 +3501,13 @@ declare class DealCrawlError extends Error {
3206
3501
  retryAfter?: number;
3207
3502
  }, retryAfter?: number | string | null): DealCrawlError;
3208
3503
  }
3504
+ /**
3505
+ * Human-readable error messages for all error codes
3506
+ */
3507
+ declare const ERROR_MESSAGES: Record<ErrorCode, string>;
3508
+ /**
3509
+ * Get human-readable message for error code
3510
+ */
3511
+ declare function getErrorMessage(code: ErrorCode): string;
3209
3512
 
3210
- export { ALL_API_KEY_SCOPES, type AccountInfoResponse, type AccountMetricsResponse, AccountResource, type AgentAction, type AgentActionType, type AgentCompletionReason, type AgentJobResponse, type AgentModel, type AgentOptions, AgentResource, type AgentResultResponse, type AgentStatusResponse, type AgentStepResponse, type ApiError, type ApiKeyInfo, type ApiKeyScope, type ApiResponse, type BatchScrapeDefaults, type BatchScrapeItem, type BatchScrapeOptions, type BatchScrapeResponse, type BatchScrapeResultItem, type BatchStatusResponse, type CancelJobResponse, type CheckpointInfo, type ClickAction, type ClientPreferences, type ClientStatsResponse, type CrawlAnalysisResponse, type CrawlJobResponse, type CrawlOptions, type CrawlRecommendation, CrawlResource, type CrawlResult, type CrawlTemplate, type CrawlTemplateId, type CreateApiKeyOptions, type CreateKeyResponse, type CreateWebhookOptions, type CreateWebhookResponse, type CreatedApiKey, DEFAULT_API_KEY_SCOPES, DEFAULT_CONFIG, DataResource, DealCrawl, type DealCrawlConfig, DealCrawlError, type DealDetails, type DealItem, type DealMetrics, type DealScoreSummary, type DealSummary, type DealUpMetrics, type DealUpMetricsResponse, type DeleteKeyResponse, type DeleteWebhookResponse, type DiscountSignal, type DorkJobResponse, type DorkOptions, DorkResource, type DorkResult, ERROR_CODES, type ErrorCode, type ExportDealsOptions, type ExportFormat, type ExportJobsOptions, type ExtractJobResponse, type ExtractModel, type ExtractOptions, ExtractResource, type ExtractedDeal, type GetApiKeyStatsOptions, type GetDealsOptions, type HoverAction, type JobDealsResponse, type JobMetricsResponse, type JobResponse, type JobStatus, type JobStatusFilter, type JobStatusResponse, type JobSummary, type JobTypeFilter, type KeyStatsResponse, KeysResource, type ListApiKeysOptions, type ListDealsOptions, type ListDealsResponse, type ListJobsOptions, type ListJobsResponse, type ListKeysResponse, type ListWebhooksResponse, type PaginatedResponse, type PaginationInfo, type ParsedPage, type PreferencesResponse, type PressAction, type PriceSignal, type PricingInfo, type ProductCategory, type ProductInfo, type RateLimitInfo, type RecommendationsResponse, type RequestContext, type ResumeJobResponse, type RevokeApiKeyOptions, type RotateApiKeyOptions, type RotateKeyResponse, type ScrapeJobResponse, type ScrapeOptions, ScrapeResource, type ScrapeResult, type ScreenshotAgentAction, type ScreenshotOptions, type ScrollAction, type SearchAiModel, type SearchAiProvider, type SearchData, type SearchDateRange, type SearchFilters, type SearchJobResponse, type SearchOptions, SearchResource, type SearchResultItem, type SearchStatusResponse, type SelectAction, type Signal, type SortOrder, StatusResource, type TestWebhookResponse, type UpdatePreferencesOptions, type UpdatePreferencesResponse, type UpdateWebhookOptions, type UpdateWebhookResponse, type UsageStats, type WaitAction, type WaitOptions, type WaitResult, type WebhookEvent, type WebhookItem, WebhooksResource, type WriteAction, DealCrawl as default, pollUntil, waitForAll, waitForAny, waitForResult };
3513
+ export { ALL_API_KEY_SCOPES, type AccountInfoResponse, type AccountMetricsResponse, AccountResource, type AgentAction, type AgentActionType, type AgentCompletionReason, type AgentJobResponse, type AgentModel, type AgentOptions, AgentResource, type AgentResultResponse, type AgentStatusResponse, type AgentStepResponse, type ApiError, type ApiKeyInfo, type ApiKeyScope, type ApiResponse, type BatchScrapeDefaults, type BatchScrapeItem, type BatchScrapeOptions, type BatchScrapeResponse, type BatchScrapeResultItem, type BatchStatusResponse, type CancelJobResponse, type CheckpointInfo, type ClickAction, type ClientPreferences, type ClientStatsResponse, type CrawlAnalysisResponse, type CrawlError, type CrawlJobResponse, type CrawlMode, type CrawlOptions, type CrawlRecommendation, CrawlResource, type CrawlResult, type CrawlStats, type CrawlTemplate, type CrawlTemplateId, type CreateApiKeyOptions, type CreateKeyResponse, type CreateWebhookOptions, type CreateWebhookResponse, type CreatedApiKey, DEFAULT_API_KEY_SCOPES, DEFAULT_CONFIG, DataResource, DealCrawl, type DealCrawlConfig, DealCrawlError, type DealDetails, type DealItem, type DealMetrics, type DealScoreSummary, type DealSummary, type DealUpMetrics, type DealUpMetricsResponse, type DeleteKeyResponse, type DeleteWebhookResponse, type DiscountSignal, type DorkJobResponse, type DorkOptions, DorkResource, type DorkResult, ERROR_CODES, ERROR_MESSAGES, type EngineSelection, type EngineType, type ErrorCode, type ExportDealsOptions, type ExportFormat, type ExportJobsOptions, type ExtractJobResponse, type ExtractModel, type ExtractOptions, ExtractResource, type ExtractedDeal, type ExtractionErrorDetails, type FallbackConfig, type FallbackMetadata, type FallbackResult, type FallbackSource, type GetApiKeyStatsOptions, type GetDealsOptions, type HoverAction, type JobDealsResponse, type JobMetricsResponse, type JobResponse, type JobStatus, type JobStatusFilter, type JobStatusResponse, type JobSummary, type JobTypeFilter, type KeyStatsResponse, KeysResource, type ListApiKeysOptions, type ListDealsOptions, type ListDealsResponse, type ListJobsOptions, type ListJobsResponse, type ListKeysResponse, type ListWebhooksResponse, type PaginatedResponse, type PaginationInfo, type ParsedPage, type PreferencesResponse, type PressAction, type PriceSignal, type PricingInfo, type ProductCategory, type ProductInfo, type RateLimitInfo, type RecommendationsResponse, type RequestContext, type ResumeJobResponse, type RevokeApiKeyOptions, type RotateApiKeyOptions, type RotateKeyResponse, type ScrapeJobResponse, type ScrapeOptions, ScrapeResource, type ScrapeResult, type ScreenshotAgentAction, type ScreenshotOptions, type ScreenshotResult, type ScrollAction, type SearchAiModel, type SearchAiProvider, type SearchData, type SearchDateRange, type SearchFilters, type SearchJobResponse, type SearchOptions, SearchResource, type SearchResultItem, type SearchStatusResponse, type SelectAction, type Signal, type SortOrder, StatusResource, type TestWebhookResponse, type UpdatePreferencesOptions, type UpdatePreferencesResponse, type UpdateWebhookOptions, type UpdateWebhookResponse, type UrgencyLevel, type UsageStats, type ValidationError, type ValidationResult, type WaitAction, type WaitOptions, type WaitResult, type WebhookEvent, type WebhookItem, WebhooksResource, type WriteAction, DealCrawl as default, getErrorMessage, pollUntil, waitForAll, waitForAny, waitForResult };