npm - @dealcrawl/sdk - Versions diffs - 2.4.0 → 2.5.0 - Mend

@dealcrawl/sdk 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
 ## Features
-- 🚀 **Full API Coverage** - Access all 45 DealCrawl API endpoints
+- 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
 - 📦 **Zero Dependencies** - Uses native `fetch`, works everywhere
 - 🔒 **Type-Safe** - Complete TypeScript definitions
 - ⚡ **Automatic Retries** - Built-in retry logic with exponential backoff
@@ -89,14 +89,20 @@ const job = await client.scrape.withScreenshot("https://example.com", {
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `url` | string | required | URL to scrape |
+| `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
 | `detectSignals` | boolean | true | Detect prices, discounts, urgency |
 | `extractDeal` | boolean | false | Extract deal information |
+| `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
+| `maxDeals` | number | 20 | Max deals to extract (max: 50) |
 | `extractWithAI` | boolean | false | Use AI for extraction |
 | `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
 | `minDealScore` | number | 0 | Minimum deal score (0-100) |
 | `screenshot` | object | - | Screenshot options |
 | `excludeTags` | string[] | - | HTML tags to exclude |
+| `excludeSelectors` | string[] | - | CSS selectors to exclude |
 | `onlyMainContent` | boolean | true | Extract main content only |
+| `headers` | object | - | Custom HTTP headers |
+| `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
 ### Batch Scrape - Bulk URL Scraping (NEW)
@@ -215,6 +221,27 @@ console.log(analysis.estimatedPages);
 const job = await client.crawl.forDeals("https://shop.example.com", {
   minDealScore: 70,
 });
+// Advanced crawl with filtering
+const job = await client.crawl.create({
+  url: "https://marketplace.example.com",
+  maxDepth: 4,
+  maxPages: 500,
+  extractDeal: true,
+  minDealScore: 50,
+  categories: ["software", "courses"],
+  priceRange: { min: 0, max: 100 },
+  onlyHighQuality: true,
+  webhookUrl: "https://my-server.com/crawl-updates",
+  syncToDealup: true,
+});
+// Enterprise: priority queue override
+const job = await client.crawl.create({
+  url: "https://time-sensitive-deals.com",
+  priority: "high", // Enterprise only
+  onlyHighQuality: true,
+});
 ```
 **Available Templates:**
@@ -225,6 +252,28 @@ const job = await client.crawl.forDeals("https://shop.example.com", {
 - `docs` - Documentation sites
 - `custom` - No preset, use your own settings
+**Crawl Options:**
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `url` | string | required | Starting URL |
+| `maxDepth` | number | 3 | Max crawl depth (1-5) |
+| `maxPages` | number | 100 | Max pages to crawl (1-1000) |
+| `detectSignals` | boolean | true | Detect prices, discounts |
+| `extractDeal` | boolean | false | Extract deal info with AI |
+| `minDealScore` | number | 30 | Min deal score threshold (0-100) |
+| `categories` | array | - | Filter: courses, software, physical, services, other |
+| `priceRange` | object | - | Filter: { min, max } price |
+| `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
+| `allowedMerchants` | string[] | - | Only these merchants |
+| `blockedMerchants` | string[] | - | Exclude these merchants |
+| `webhookUrl` | string | - | Real-time notifications URL |
+| `syncToDealup` | boolean | false | Auto-sync to DealUp |
+| `template` | string | - | Job template to use |
+| `useSmartRouting` | boolean | true | Auto-detect best settings |
+| `priority` | string | - | Queue priority (Enterprise only) |
+| `requireJS` | boolean | false | Force JavaScript rendering |
+| `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
 ### Extract - LLM-Based Extraction
 ```typescript
@@ -278,6 +327,101 @@ const query = client.dork.buildQuery({
 // Returns: "laptop deals site:amazon.com intitle:discount"
 ```
+### Agent - AI Autonomous Navigation (NEW)
+Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
+```typescript
+// Basic agent - navigate and extract data
+const job = await client.agent.create({
+  url: "https://amazon.com",
+  prompt: "Search for wireless headphones under $50 and extract the top 5 results",
+  schema: {
+    type: "object",
+    properties: {
+      products: {
+        type: "array",
+        items: {
+          type: "object",
+          properties: {
+            name: { type: "string" },
+            price: { type: "number" },
+            rating: { type: "number" },
+          },
+        },
+      },
+    },
+  },
+  maxSteps: 15,
+});
+// Wait for result
+const result = await client.agentAndWait({
+  url: "https://booking.com",
+  prompt: "Find hotels in Paris for 2 adults, March 15-17",
+  takeScreenshots: true,
+});
+// Generate schema from natural language (helper)
+const schemaResult = await client.agent.generateSchema({
+  prompt: "Find student deals on marketing courses with price and discount",
+});
+// Returns: { schema, refinedPrompt, confidence, suggestedQuestions? }
+// Use generated schema
+const job = await client.agent.create({
+  url: "https://coursera.org",
+  prompt: schemaResult.refinedPrompt,
+  schema: schemaResult.schema,
+});
+// Preset actions (handle popups, cookies, etc.)
+const job = await client.agent.withPresetActions(
+  "https://shop.com",
+  "Find the best discounts",
+  [
+    { type: "click", selector: "#accept-cookies" },
+    { type: "wait", milliseconds: 1000 },
+  ]
+);
+// Deal-focused agent with pre-built schema
+const job = await client.agent.forDeals(
+  "https://slickdeals.net",
+  "Find the top 10 tech deals posted today"
+);
+// Use Claude instead of GPT
+const job = await client.agent.withClaude(
+  "https://complex-site.com",
+  "Navigate the checkout process"
+);
+```
+**Agent Options:**
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `url` | string | required | Starting URL |
+| `prompt` | string | required | Natural language instructions (10-2000 chars) |
+| `schema` | object | - | JSON Schema for structured output |
+| `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
+| `actions` | array | - | Preset actions to execute first |
+| `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
+| `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
+| `takeScreenshots` | boolean | false | Capture screenshot at each step |
+| `onlyMainContent` | boolean | true | Extract main content only |
+**Action Types:**
+- `click` - Click an element
+- `scroll` - Scroll page or to element
+- `write` - Type text into input
+- `wait` - Wait for time or element
+- `press` - Press keyboard key
+- `screenshot` - Capture screenshot
+- `hover` - Hover over element
+- `select` - Select dropdown option
 ### Status - Job Management
 ```typescript
@@ -406,6 +550,7 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
 | `crawl`           | `POST /v1/crawl`        | Create crawl jobs        |
 | `dork`            | `POST /v1/dork`         | Create dork searches     |
 | `extract`         | `POST /v1/extract`      | Create extraction jobs   |
+| `agent`           | `POST /v1/agent`        | Create AI agent jobs     |
 | `status`          | `GET /v1/status/:id`    | Read job status          |
 | `data:read`       | `GET /v1/data/*`        | Read jobs/deals          |
 | `data:export`     | `GET /v1/data/export`   | Export data              |
@@ -435,10 +580,13 @@ await client.keys.create({
     "crawl",
     "dork",
     "extract",
+    "agent",
+    "search",
     "status",
     "data:read",
     "data:export",
     "keys:manage",
+    "webhooks:manage",
   ],
 });
 ```
@@ -587,14 +735,27 @@ import type {
   // Request Options
   ScrapeOptions,
+  BatchScrapeOptions,
   CrawlOptions,
+  CrawlPriority,
+  CrawlCategory,
+  PriceRange,
+  SearchOptions,
   ExtractOptions,
   DorkOptions,
+  AgentOptions,
+  SchemaGenerationOptions,
   // Responses
   JobStatusResponse,
   ListDealsResponse,
   DealItem,
+  AgentJobResponse,
+  AgentStatusResponse,
+  AgentResultResponse,
+  SchemaGenerationResponse,
+  SearchJobResponse,
+  BatchScrapeResponse,
   // Re-exports from @dealcrawl/shared
   ScrapeResult,

package/dist/index.d.mts CHANGED Viewed

@@ -932,6 +932,8 @@ interface ScreenshotOptions {
 interface ScrapeOptions {
     /** URL to scrape (required) */
     url: string;
+    /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
+    noStore?: boolean;
     /** Detect signals like prices, discounts, urgency (default: true) */
     detectSignals?: boolean;
     /** Extract content using AI */
@@ -1071,6 +1073,17 @@ interface SearchOptions {
         headers?: Record<string, string>;
     };
 }
+/** Crawl priority queue (enterprise only) */
+type CrawlPriority = "high" | "medium" | "low";
+/** Product category filter for crawls */
+type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
+/** Price range filter for crawls */
+interface PriceRange {
+    /** Minimum price */
+    min?: number;
+    /** Maximum price */
+    max?: number;
+}
 /** Options for crawling a website */
 interface CrawlOptions {
     /** Starting URL for the crawl (required) */
@@ -1091,7 +1104,7 @@ interface CrawlOptions {
     extractWithAI?: boolean;
     /** Extract deal-specific information from each page */
     extractDeal?: boolean;
-    /** Minimum deal score threshold (0-100) */
+    /** Minimum deal score threshold (0-100, default: 30) */
     minDealScore?: number;
     /** Prioritize pages likely to contain deals (default: true) */
     prioritizeDealPages?: boolean;
@@ -1101,6 +1114,34 @@ interface CrawlOptions {
     allowedDomains?: string[];
     /** URL patterns to exclude from crawling */
     excludePatterns?: string[];
+    /** Filter by product categories */
+    categories?: CrawlCategory[];
+    /** Filter by price range */
+    priceRange?: PriceRange;
+    /** Prefer static scraping (faster, default: true) */
+    preferStatic?: boolean;
+    /** Require JavaScript rendering for all pages */
+    requireJS?: boolean;
+    /** Use advanced anti-bot bypass techniques */
+    bypassAntiBot?: boolean;
+    /** Only return high-quality deals (score >= 70) */
+    onlyHighQuality?: boolean;
+    /** Only crawl pages from these merchants */
+    allowedMerchants?: string[];
+    /** Skip pages from these merchants */
+    blockedMerchants?: string[];
+    /** Webhook URL for real-time notifications */
+    webhookUrl?: string;
+    /** Auto-sync discovered deals to DealUp */
+    syncToDealup?: boolean;
+    /** Site-specific config name from registry */
+    siteConfig?: string;
+    /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
+    template?: CrawlTemplateId;
+    /** Use smart job routing to auto-detect best settings (default: true) */
+    useSmartRouting?: boolean;
+    /** Priority queue override (Enterprise only) */
+    priority?: CrawlPriority;
 }
 /** Crawl template identifier */
 type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1273,7 +1314,7 @@ interface UpdateWebhookOptions {
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
  * These are the actual scopes enforced by the backend via requireScope() middleware
  */
-type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
+type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
 /**
  * All available scopes (for reference and validation)
  */

package/dist/index.d.ts CHANGED Viewed

@@ -932,6 +932,8 @@ interface ScreenshotOptions {
 interface ScrapeOptions {
     /** URL to scrape (required) */
     url: string;
+    /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
+    noStore?: boolean;
     /** Detect signals like prices, discounts, urgency (default: true) */
     detectSignals?: boolean;
     /** Extract content using AI */
@@ -1071,6 +1073,17 @@ interface SearchOptions {
         headers?: Record<string, string>;
     };
 }
+/** Crawl priority queue (enterprise only) */
+type CrawlPriority = "high" | "medium" | "low";
+/** Product category filter for crawls */
+type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
+/** Price range filter for crawls */
+interface PriceRange {
+    /** Minimum price */
+    min?: number;
+    /** Maximum price */
+    max?: number;
+}
 /** Options for crawling a website */
 interface CrawlOptions {
     /** Starting URL for the crawl (required) */
@@ -1091,7 +1104,7 @@ interface CrawlOptions {
     extractWithAI?: boolean;
     /** Extract deal-specific information from each page */
     extractDeal?: boolean;
-    /** Minimum deal score threshold (0-100) */
+    /** Minimum deal score threshold (0-100, default: 30) */
     minDealScore?: number;
     /** Prioritize pages likely to contain deals (default: true) */
     prioritizeDealPages?: boolean;
@@ -1101,6 +1114,34 @@ interface CrawlOptions {
     allowedDomains?: string[];
     /** URL patterns to exclude from crawling */
     excludePatterns?: string[];
+    /** Filter by product categories */
+    categories?: CrawlCategory[];
+    /** Filter by price range */
+    priceRange?: PriceRange;
+    /** Prefer static scraping (faster, default: true) */
+    preferStatic?: boolean;
+    /** Require JavaScript rendering for all pages */
+    requireJS?: boolean;
+    /** Use advanced anti-bot bypass techniques */
+    bypassAntiBot?: boolean;
+    /** Only return high-quality deals (score >= 70) */
+    onlyHighQuality?: boolean;
+    /** Only crawl pages from these merchants */
+    allowedMerchants?: string[];
+    /** Skip pages from these merchants */
+    blockedMerchants?: string[];
+    /** Webhook URL for real-time notifications */
+    webhookUrl?: string;
+    /** Auto-sync discovered deals to DealUp */
+    syncToDealup?: boolean;
+    /** Site-specific config name from registry */
+    siteConfig?: string;
+    /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
+    template?: CrawlTemplateId;
+    /** Use smart job routing to auto-detect best settings (default: true) */
+    useSmartRouting?: boolean;
+    /** Priority queue override (Enterprise only) */
+    priority?: CrawlPriority;
 }
 /** Crawl template identifier */
 type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1273,7 +1314,7 @@ interface UpdateWebhookOptions {
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
  * These are the actual scopes enforced by the backend via requireScope() middleware
  */
-type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
+type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
 /**
  * All available scopes (for reference and validation)
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dealcrawl/sdk",
-  "version": "2.4.0",
+  "version": "2.5.0",
   "private": false,
   "description": "Official SDK for DealCrawl web scraping, crawling and AI agent API",
   "author": "DealUp <contact@dealup.cc>",