@dealcrawl/sdk 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -1
- package/dist/index.d.mts +123 -2
- package/dist/index.d.ts +123 -2
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +59 -0
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
|
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
|
-
- 🚀 **Full API Coverage** - Access all
|
|
11
|
+
- 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
|
|
12
12
|
- 📦 **Zero Dependencies** - Uses native `fetch`, works everywhere
|
|
13
13
|
- 🔒 **Type-Safe** - Complete TypeScript definitions
|
|
14
14
|
- ⚡ **Automatic Retries** - Built-in retry logic with exponential backoff
|
|
@@ -89,14 +89,20 @@ const job = await client.scrape.withScreenshot("https://example.com", {
|
|
|
89
89
|
| Option | Type | Default | Description |
|
|
90
90
|
|--------|------|---------|-------------|
|
|
91
91
|
| `url` | string | required | URL to scrape |
|
|
92
|
+
| `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
|
|
92
93
|
| `detectSignals` | boolean | true | Detect prices, discounts, urgency |
|
|
93
94
|
| `extractDeal` | boolean | false | Extract deal information |
|
|
95
|
+
| `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
|
|
96
|
+
| `maxDeals` | number | 20 | Max deals to extract (max: 50) |
|
|
94
97
|
| `extractWithAI` | boolean | false | Use AI for extraction |
|
|
95
98
|
| `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
|
|
96
99
|
| `minDealScore` | number | 0 | Minimum deal score (0-100) |
|
|
97
100
|
| `screenshot` | object | - | Screenshot options |
|
|
98
101
|
| `excludeTags` | string[] | - | HTML tags to exclude |
|
|
102
|
+
| `excludeSelectors` | string[] | - | CSS selectors to exclude |
|
|
99
103
|
| `onlyMainContent` | boolean | true | Extract main content only |
|
|
104
|
+
| `headers` | object | - | Custom HTTP headers |
|
|
105
|
+
| `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
|
|
100
106
|
|
|
101
107
|
### Batch Scrape - Bulk URL Scraping (NEW)
|
|
102
108
|
|
|
@@ -215,6 +221,27 @@ console.log(analysis.estimatedPages);
|
|
|
215
221
|
const job = await client.crawl.forDeals("https://shop.example.com", {
|
|
216
222
|
minDealScore: 70,
|
|
217
223
|
});
|
|
224
|
+
|
|
225
|
+
// Advanced crawl with filtering
|
|
226
|
+
const job = await client.crawl.create({
|
|
227
|
+
url: "https://marketplace.example.com",
|
|
228
|
+
maxDepth: 4,
|
|
229
|
+
maxPages: 500,
|
|
230
|
+
extractDeal: true,
|
|
231
|
+
minDealScore: 50,
|
|
232
|
+
categories: ["software", "courses"],
|
|
233
|
+
priceRange: { min: 0, max: 100 },
|
|
234
|
+
onlyHighQuality: true,
|
|
235
|
+
webhookUrl: "https://my-server.com/crawl-updates",
|
|
236
|
+
syncToDealup: true,
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
// Enterprise: priority queue override
|
|
240
|
+
const job = await client.crawl.create({
|
|
241
|
+
url: "https://time-sensitive-deals.com",
|
|
242
|
+
priority: "high", // Enterprise only
|
|
243
|
+
onlyHighQuality: true,
|
|
244
|
+
});
|
|
218
245
|
```
|
|
219
246
|
|
|
220
247
|
**Available Templates:**
|
|
@@ -225,6 +252,28 @@ const job = await client.crawl.forDeals("https://shop.example.com", {
|
|
|
225
252
|
- `docs` - Documentation sites
|
|
226
253
|
- `custom` - No preset, use your own settings
|
|
227
254
|
|
|
255
|
+
**Crawl Options:**
|
|
256
|
+
| Option | Type | Default | Description |
|
|
257
|
+
|--------|------|---------|-------------|
|
|
258
|
+
| `url` | string | required | Starting URL |
|
|
259
|
+
| `maxDepth` | number | 3 | Max crawl depth (1-5) |
|
|
260
|
+
| `maxPages` | number | 100 | Max pages to crawl (1-1000) |
|
|
261
|
+
| `detectSignals` | boolean | true | Detect prices, discounts |
|
|
262
|
+
| `extractDeal` | boolean | false | Extract deal info with AI |
|
|
263
|
+
| `minDealScore` | number | 30 | Min deal score threshold (0-100) |
|
|
264
|
+
| `categories` | array | - | Filter: courses, software, physical, services, other |
|
|
265
|
+
| `priceRange` | object | - | Filter: { min, max } price |
|
|
266
|
+
| `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
|
|
267
|
+
| `allowedMerchants` | string[] | - | Only these merchants |
|
|
268
|
+
| `blockedMerchants` | string[] | - | Exclude these merchants |
|
|
269
|
+
| `webhookUrl` | string | - | Real-time notifications URL |
|
|
270
|
+
| `syncToDealup` | boolean | false | Auto-sync to DealUp |
|
|
271
|
+
| `template` | string | - | Job template to use |
|
|
272
|
+
| `useSmartRouting` | boolean | true | Auto-detect best settings |
|
|
273
|
+
| `priority` | string | - | Queue priority (Enterprise only) |
|
|
274
|
+
| `requireJS` | boolean | false | Force JavaScript rendering |
|
|
275
|
+
| `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
|
|
276
|
+
|
|
228
277
|
### Extract - LLM-Based Extraction
|
|
229
278
|
|
|
230
279
|
```typescript
|
|
@@ -278,6 +327,101 @@ const query = client.dork.buildQuery({
|
|
|
278
327
|
// Returns: "laptop deals site:amazon.com intitle:discount"
|
|
279
328
|
```
|
|
280
329
|
|
|
330
|
+
### Agent - AI Autonomous Navigation (NEW)
|
|
331
|
+
|
|
332
|
+
Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
// Basic agent - navigate and extract data
|
|
336
|
+
const job = await client.agent.create({
|
|
337
|
+
url: "https://amazon.com",
|
|
338
|
+
prompt: "Search for wireless headphones under $50 and extract the top 5 results",
|
|
339
|
+
schema: {
|
|
340
|
+
type: "object",
|
|
341
|
+
properties: {
|
|
342
|
+
products: {
|
|
343
|
+
type: "array",
|
|
344
|
+
items: {
|
|
345
|
+
type: "object",
|
|
346
|
+
properties: {
|
|
347
|
+
name: { type: "string" },
|
|
348
|
+
price: { type: "number" },
|
|
349
|
+
rating: { type: "number" },
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
},
|
|
353
|
+
},
|
|
354
|
+
},
|
|
355
|
+
maxSteps: 15,
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// Wait for result
|
|
359
|
+
const result = await client.agentAndWait({
|
|
360
|
+
url: "https://booking.com",
|
|
361
|
+
prompt: "Find hotels in Paris for 2 adults, March 15-17",
|
|
362
|
+
takeScreenshots: true,
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// Generate schema from natural language (helper)
|
|
366
|
+
const schemaResult = await client.agent.generateSchema({
|
|
367
|
+
prompt: "Find student deals on marketing courses with price and discount",
|
|
368
|
+
});
|
|
369
|
+
// Returns: { schema, refinedPrompt, confidence, suggestedQuestions? }
|
|
370
|
+
|
|
371
|
+
// Use generated schema
|
|
372
|
+
const job = await client.agent.create({
|
|
373
|
+
url: "https://coursera.org",
|
|
374
|
+
prompt: schemaResult.refinedPrompt,
|
|
375
|
+
schema: schemaResult.schema,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// Preset actions (handle popups, cookies, etc.)
|
|
379
|
+
const job = await client.agent.withPresetActions(
|
|
380
|
+
"https://shop.com",
|
|
381
|
+
"Find the best discounts",
|
|
382
|
+
[
|
|
383
|
+
{ type: "click", selector: "#accept-cookies" },
|
|
384
|
+
{ type: "wait", milliseconds: 1000 },
|
|
385
|
+
]
|
|
386
|
+
);
|
|
387
|
+
|
|
388
|
+
// Deal-focused agent with pre-built schema
|
|
389
|
+
const job = await client.agent.forDeals(
|
|
390
|
+
"https://slickdeals.net",
|
|
391
|
+
"Find the top 10 tech deals posted today"
|
|
392
|
+
);
|
|
393
|
+
|
|
394
|
+
// Use Claude instead of GPT
|
|
395
|
+
const job = await client.agent.withClaude(
|
|
396
|
+
"https://complex-site.com",
|
|
397
|
+
"Navigate the checkout process"
|
|
398
|
+
);
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
**Agent Options:**
|
|
402
|
+
| Option | Type | Default | Description |
|
|
403
|
+
|--------|------|---------|-------------|
|
|
404
|
+
| `url` | string | required | Starting URL |
|
|
405
|
+
| `prompt` | string | required | Natural language instructions (10-2000 chars) |
|
|
406
|
+
| `schema` | object | - | JSON Schema for structured output |
|
|
407
|
+
| `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
|
|
408
|
+
| `actions` | array | - | Preset actions to execute first |
|
|
409
|
+
| `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
|
|
410
|
+
| `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
|
|
411
|
+
| `takeScreenshots` | boolean | false | Capture screenshot at each step |
|
|
412
|
+
| `onlyMainContent` | boolean | true | Extract main content only |
|
|
413
|
+
|
|
414
|
+
**Action Types:**
|
|
415
|
+
|
|
416
|
+
- `click` - Click an element
|
|
417
|
+
- `scroll` - Scroll page or to element
|
|
418
|
+
- `write` - Type text into input
|
|
419
|
+
- `wait` - Wait for time or element
|
|
420
|
+
- `press` - Press keyboard key
|
|
421
|
+
- `screenshot` - Capture screenshot
|
|
422
|
+
- `hover` - Hover over element
|
|
423
|
+
- `select` - Select dropdown option
|
|
424
|
+
|
|
281
425
|
### Status - Job Management
|
|
282
426
|
|
|
283
427
|
```typescript
|
|
@@ -406,6 +550,7 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
|
|
|
406
550
|
| `crawl` | `POST /v1/crawl` | Create crawl jobs |
|
|
407
551
|
| `dork` | `POST /v1/dork` | Create dork searches |
|
|
408
552
|
| `extract` | `POST /v1/extract` | Create extraction jobs |
|
|
553
|
+
| `agent` | `POST /v1/agent` | Create AI agent jobs |
|
|
409
554
|
| `status` | `GET /v1/status/:id` | Read job status |
|
|
410
555
|
| `data:read` | `GET /v1/data/*` | Read jobs/deals |
|
|
411
556
|
| `data:export` | `GET /v1/data/export` | Export data |
|
|
@@ -435,10 +580,13 @@ await client.keys.create({
|
|
|
435
580
|
"crawl",
|
|
436
581
|
"dork",
|
|
437
582
|
"extract",
|
|
583
|
+
"agent",
|
|
584
|
+
"search",
|
|
438
585
|
"status",
|
|
439
586
|
"data:read",
|
|
440
587
|
"data:export",
|
|
441
588
|
"keys:manage",
|
|
589
|
+
"webhooks:manage",
|
|
442
590
|
],
|
|
443
591
|
});
|
|
444
592
|
```
|
|
@@ -587,14 +735,27 @@ import type {
|
|
|
587
735
|
|
|
588
736
|
// Request Options
|
|
589
737
|
ScrapeOptions,
|
|
738
|
+
BatchScrapeOptions,
|
|
590
739
|
CrawlOptions,
|
|
740
|
+
CrawlPriority,
|
|
741
|
+
CrawlCategory,
|
|
742
|
+
PriceRange,
|
|
743
|
+
SearchOptions,
|
|
591
744
|
ExtractOptions,
|
|
592
745
|
DorkOptions,
|
|
746
|
+
AgentOptions,
|
|
747
|
+
SchemaGenerationOptions,
|
|
593
748
|
|
|
594
749
|
// Responses
|
|
595
750
|
JobStatusResponse,
|
|
596
751
|
ListDealsResponse,
|
|
597
752
|
DealItem,
|
|
753
|
+
AgentJobResponse,
|
|
754
|
+
AgentStatusResponse,
|
|
755
|
+
AgentResultResponse,
|
|
756
|
+
SchemaGenerationResponse,
|
|
757
|
+
SearchJobResponse,
|
|
758
|
+
BatchScrapeResponse,
|
|
598
759
|
|
|
599
760
|
// Re-exports from @dealcrawl/shared
|
|
600
761
|
ScrapeResult,
|
package/dist/index.d.mts
CHANGED
|
@@ -839,6 +839,19 @@ interface AgentStatusResponse extends JobStatusResponse {
|
|
|
839
839
|
/** Final result when completed */
|
|
840
840
|
result?: AgentResultResponse;
|
|
841
841
|
}
|
|
842
|
+
/** Schema generation response from /v1/agent/schema */
|
|
843
|
+
interface SchemaGenerationResponse {
|
|
844
|
+
/** Generated JSON Schema for data extraction */
|
|
845
|
+
schema: Record<string, unknown>;
|
|
846
|
+
/** Refined prompt optimized for agent execution */
|
|
847
|
+
refinedPrompt: string;
|
|
848
|
+
/** Human-readable description of the schema */
|
|
849
|
+
schemaDescription: string;
|
|
850
|
+
/** Suggested follow-up questions if prompt was ambiguous */
|
|
851
|
+
suggestedQuestions?: string[];
|
|
852
|
+
/** Confidence score (0-1) in the generated schema */
|
|
853
|
+
confidence: number;
|
|
854
|
+
}
|
|
842
855
|
|
|
843
856
|
/**
|
|
844
857
|
* Polling Utilities
|
|
@@ -919,6 +932,8 @@ interface ScreenshotOptions {
|
|
|
919
932
|
interface ScrapeOptions {
|
|
920
933
|
/** URL to scrape (required) */
|
|
921
934
|
url: string;
|
|
935
|
+
/** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
|
|
936
|
+
noStore?: boolean;
|
|
922
937
|
/** Detect signals like prices, discounts, urgency (default: true) */
|
|
923
938
|
detectSignals?: boolean;
|
|
924
939
|
/** Extract content using AI */
|
|
@@ -1058,6 +1073,17 @@ interface SearchOptions {
|
|
|
1058
1073
|
headers?: Record<string, string>;
|
|
1059
1074
|
};
|
|
1060
1075
|
}
|
|
1076
|
+
/** Crawl priority queue (enterprise only) */
|
|
1077
|
+
type CrawlPriority = "high" | "medium" | "low";
|
|
1078
|
+
/** Product category filter for crawls */
|
|
1079
|
+
type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
|
|
1080
|
+
/** Price range filter for crawls */
|
|
1081
|
+
interface PriceRange {
|
|
1082
|
+
/** Minimum price */
|
|
1083
|
+
min?: number;
|
|
1084
|
+
/** Maximum price */
|
|
1085
|
+
max?: number;
|
|
1086
|
+
}
|
|
1061
1087
|
/** Options for crawling a website */
|
|
1062
1088
|
interface CrawlOptions {
|
|
1063
1089
|
/** Starting URL for the crawl (required) */
|
|
@@ -1078,7 +1104,7 @@ interface CrawlOptions {
|
|
|
1078
1104
|
extractWithAI?: boolean;
|
|
1079
1105
|
/** Extract deal-specific information from each page */
|
|
1080
1106
|
extractDeal?: boolean;
|
|
1081
|
-
/** Minimum deal score threshold (0-100) */
|
|
1107
|
+
/** Minimum deal score threshold (0-100, default: 30) */
|
|
1082
1108
|
minDealScore?: number;
|
|
1083
1109
|
/** Prioritize pages likely to contain deals (default: true) */
|
|
1084
1110
|
prioritizeDealPages?: boolean;
|
|
@@ -1088,6 +1114,34 @@ interface CrawlOptions {
|
|
|
1088
1114
|
allowedDomains?: string[];
|
|
1089
1115
|
/** URL patterns to exclude from crawling */
|
|
1090
1116
|
excludePatterns?: string[];
|
|
1117
|
+
/** Filter by product categories */
|
|
1118
|
+
categories?: CrawlCategory[];
|
|
1119
|
+
/** Filter by price range */
|
|
1120
|
+
priceRange?: PriceRange;
|
|
1121
|
+
/** Prefer static scraping (faster, default: true) */
|
|
1122
|
+
preferStatic?: boolean;
|
|
1123
|
+
/** Require JavaScript rendering for all pages */
|
|
1124
|
+
requireJS?: boolean;
|
|
1125
|
+
/** Use advanced anti-bot bypass techniques */
|
|
1126
|
+
bypassAntiBot?: boolean;
|
|
1127
|
+
/** Only return high-quality deals (score >= 70) */
|
|
1128
|
+
onlyHighQuality?: boolean;
|
|
1129
|
+
/** Only crawl pages from these merchants */
|
|
1130
|
+
allowedMerchants?: string[];
|
|
1131
|
+
/** Skip pages from these merchants */
|
|
1132
|
+
blockedMerchants?: string[];
|
|
1133
|
+
/** Webhook URL for real-time notifications */
|
|
1134
|
+
webhookUrl?: string;
|
|
1135
|
+
/** Auto-sync discovered deals to DealUp */
|
|
1136
|
+
syncToDealup?: boolean;
|
|
1137
|
+
/** Site-specific config name from registry */
|
|
1138
|
+
siteConfig?: string;
|
|
1139
|
+
/** Job template to use (ecommerce, blog, docs, marketplace, custom) */
|
|
1140
|
+
template?: CrawlTemplateId;
|
|
1141
|
+
/** Use smart job routing to auto-detect best settings (default: true) */
|
|
1142
|
+
useSmartRouting?: boolean;
|
|
1143
|
+
/** Priority queue override (Enterprise only) */
|
|
1144
|
+
priority?: CrawlPriority;
|
|
1091
1145
|
}
|
|
1092
1146
|
/** Crawl template identifier */
|
|
1093
1147
|
type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
|
|
@@ -1260,7 +1314,7 @@ interface UpdateWebhookOptions {
|
|
|
1260
1314
|
* API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
|
|
1261
1315
|
* These are the actual scopes enforced by the backend via requireScope() middleware
|
|
1262
1316
|
*/
|
|
1263
|
-
type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
|
|
1317
|
+
type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
|
|
1264
1318
|
/**
|
|
1265
1319
|
* All available scopes (for reference and validation)
|
|
1266
1320
|
*/
|
|
@@ -1418,6 +1472,26 @@ interface AgentOptions {
|
|
|
1418
1472
|
headers?: Record<string, string>;
|
|
1419
1473
|
};
|
|
1420
1474
|
}
|
|
1475
|
+
/** Context for schema generation from conversation */
|
|
1476
|
+
interface SchemaGenerationContext {
|
|
1477
|
+
/** Specific domains/topics mentioned (e.g., ['marketing', 'web development']) */
|
|
1478
|
+
domains?: string[];
|
|
1479
|
+
/** Types of data to extract (e.g., ['free courses', 'discounts']) */
|
|
1480
|
+
dataTypes?: string[];
|
|
1481
|
+
/** Preferred output format */
|
|
1482
|
+
format?: "json" | "csv" | "table";
|
|
1483
|
+
/** Additional clarifications from user */
|
|
1484
|
+
clarifications?: string[];
|
|
1485
|
+
}
|
|
1486
|
+
/** Options for generating a JSON Schema from natural language */
|
|
1487
|
+
interface SchemaGenerationOptions {
|
|
1488
|
+
/** Natural language description of what data to extract (required, 5-2000 chars) */
|
|
1489
|
+
prompt: string;
|
|
1490
|
+
/** Optional context from conversation to refine the schema */
|
|
1491
|
+
context?: SchemaGenerationContext;
|
|
1492
|
+
/** LLM provider for generation (default: openai) */
|
|
1493
|
+
model?: AgentModel;
|
|
1494
|
+
}
|
|
1421
1495
|
|
|
1422
1496
|
/**
|
|
1423
1497
|
* Account Resource
|
|
@@ -1713,6 +1787,53 @@ declare class AgentResource {
|
|
|
1713
1787
|
* ```
|
|
1714
1788
|
*/
|
|
1715
1789
|
withClaude(url: string, prompt: string, options?: Omit<AgentOptions, "url" | "prompt" | "model">): Promise<AgentJobResponse>;
|
|
1790
|
+
/**
|
|
1791
|
+
* Generate a JSON Schema from a natural language prompt
|
|
1792
|
+
*
|
|
1793
|
+
* This is useful for building extraction schemas without manual JSON writing.
|
|
1794
|
+
* The generated schema can be used with the main agent.create() method.
|
|
1795
|
+
*
|
|
1796
|
+
* @param options - Schema generation options
|
|
1797
|
+
* @returns Generated schema with refined prompt and confidence score
|
|
1798
|
+
*
|
|
1799
|
+
* @example Basic usage:
|
|
1800
|
+
* ```ts
|
|
1801
|
+
* const result = await client.agent.generateSchema({
|
|
1802
|
+
* prompt: "Find the best student deals on Coursera for marketing courses"
|
|
1803
|
+
* });
|
|
1804
|
+
*
|
|
1805
|
+
* console.log(result.schema);
|
|
1806
|
+
* // { type: "object", properties: { courses: { ... } } }
|
|
1807
|
+
*
|
|
1808
|
+
* console.log(result.refinedPrompt);
|
|
1809
|
+
* // "Extract student offers for marketing courses..."
|
|
1810
|
+
*
|
|
1811
|
+
* // Use the generated schema with an agent
|
|
1812
|
+
* const job = await client.agent.create({
|
|
1813
|
+
* url: "https://coursera.org",
|
|
1814
|
+
* prompt: result.refinedPrompt,
|
|
1815
|
+
* schema: result.schema
|
|
1816
|
+
* });
|
|
1817
|
+
* ```
|
|
1818
|
+
*
|
|
1819
|
+
* @example With context from conversation:
|
|
1820
|
+
* ```ts
|
|
1821
|
+
* const result = await client.agent.generateSchema({
|
|
1822
|
+
* prompt: "Find student deals on online courses",
|
|
1823
|
+
* context: {
|
|
1824
|
+
* domains: ["marketing", "web development"],
|
|
1825
|
+
* dataTypes: ["free courses", "discounts"],
|
|
1826
|
+
* format: "json"
|
|
1827
|
+
* }
|
|
1828
|
+
* });
|
|
1829
|
+
*
|
|
1830
|
+
* if (result.confidence < 0.7 && result.suggestedQuestions) {
|
|
1831
|
+
* // Ask user for clarification
|
|
1832
|
+
* console.log("Please clarify:", result.suggestedQuestions);
|
|
1833
|
+
* }
|
|
1834
|
+
* ```
|
|
1835
|
+
*/
|
|
1836
|
+
generateSchema(options: SchemaGenerationOptions): Promise<SchemaGenerationResponse>;
|
|
1716
1837
|
}
|
|
1717
1838
|
|
|
1718
1839
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -839,6 +839,19 @@ interface AgentStatusResponse extends JobStatusResponse {
|
|
|
839
839
|
/** Final result when completed */
|
|
840
840
|
result?: AgentResultResponse;
|
|
841
841
|
}
|
|
842
|
+
/** Schema generation response from /v1/agent/schema */
|
|
843
|
+
interface SchemaGenerationResponse {
|
|
844
|
+
/** Generated JSON Schema for data extraction */
|
|
845
|
+
schema: Record<string, unknown>;
|
|
846
|
+
/** Refined prompt optimized for agent execution */
|
|
847
|
+
refinedPrompt: string;
|
|
848
|
+
/** Human-readable description of the schema */
|
|
849
|
+
schemaDescription: string;
|
|
850
|
+
/** Suggested follow-up questions if prompt was ambiguous */
|
|
851
|
+
suggestedQuestions?: string[];
|
|
852
|
+
/** Confidence score (0-1) in the generated schema */
|
|
853
|
+
confidence: number;
|
|
854
|
+
}
|
|
842
855
|
|
|
843
856
|
/**
|
|
844
857
|
* Polling Utilities
|
|
@@ -919,6 +932,8 @@ interface ScreenshotOptions {
|
|
|
919
932
|
interface ScrapeOptions {
|
|
920
933
|
/** URL to scrape (required) */
|
|
921
934
|
url: string;
|
|
935
|
+
/** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
|
|
936
|
+
noStore?: boolean;
|
|
922
937
|
/** Detect signals like prices, discounts, urgency (default: true) */
|
|
923
938
|
detectSignals?: boolean;
|
|
924
939
|
/** Extract content using AI */
|
|
@@ -1058,6 +1073,17 @@ interface SearchOptions {
|
|
|
1058
1073
|
headers?: Record<string, string>;
|
|
1059
1074
|
};
|
|
1060
1075
|
}
|
|
1076
|
+
/** Crawl priority queue (enterprise only) */
|
|
1077
|
+
type CrawlPriority = "high" | "medium" | "low";
|
|
1078
|
+
/** Product category filter for crawls */
|
|
1079
|
+
type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
|
|
1080
|
+
/** Price range filter for crawls */
|
|
1081
|
+
interface PriceRange {
|
|
1082
|
+
/** Minimum price */
|
|
1083
|
+
min?: number;
|
|
1084
|
+
/** Maximum price */
|
|
1085
|
+
max?: number;
|
|
1086
|
+
}
|
|
1061
1087
|
/** Options for crawling a website */
|
|
1062
1088
|
interface CrawlOptions {
|
|
1063
1089
|
/** Starting URL for the crawl (required) */
|
|
@@ -1078,7 +1104,7 @@ interface CrawlOptions {
|
|
|
1078
1104
|
extractWithAI?: boolean;
|
|
1079
1105
|
/** Extract deal-specific information from each page */
|
|
1080
1106
|
extractDeal?: boolean;
|
|
1081
|
-
/** Minimum deal score threshold (0-100) */
|
|
1107
|
+
/** Minimum deal score threshold (0-100, default: 30) */
|
|
1082
1108
|
minDealScore?: number;
|
|
1083
1109
|
/** Prioritize pages likely to contain deals (default: true) */
|
|
1084
1110
|
prioritizeDealPages?: boolean;
|
|
@@ -1088,6 +1114,34 @@ interface CrawlOptions {
|
|
|
1088
1114
|
allowedDomains?: string[];
|
|
1089
1115
|
/** URL patterns to exclude from crawling */
|
|
1090
1116
|
excludePatterns?: string[];
|
|
1117
|
+
/** Filter by product categories */
|
|
1118
|
+
categories?: CrawlCategory[];
|
|
1119
|
+
/** Filter by price range */
|
|
1120
|
+
priceRange?: PriceRange;
|
|
1121
|
+
/** Prefer static scraping (faster, default: true) */
|
|
1122
|
+
preferStatic?: boolean;
|
|
1123
|
+
/** Require JavaScript rendering for all pages */
|
|
1124
|
+
requireJS?: boolean;
|
|
1125
|
+
/** Use advanced anti-bot bypass techniques */
|
|
1126
|
+
bypassAntiBot?: boolean;
|
|
1127
|
+
/** Only return high-quality deals (score >= 70) */
|
|
1128
|
+
onlyHighQuality?: boolean;
|
|
1129
|
+
/** Only crawl pages from these merchants */
|
|
1130
|
+
allowedMerchants?: string[];
|
|
1131
|
+
/** Skip pages from these merchants */
|
|
1132
|
+
blockedMerchants?: string[];
|
|
1133
|
+
/** Webhook URL for real-time notifications */
|
|
1134
|
+
webhookUrl?: string;
|
|
1135
|
+
/** Auto-sync discovered deals to DealUp */
|
|
1136
|
+
syncToDealup?: boolean;
|
|
1137
|
+
/** Site-specific config name from registry */
|
|
1138
|
+
siteConfig?: string;
|
|
1139
|
+
/** Job template to use (ecommerce, blog, docs, marketplace, custom) */
|
|
1140
|
+
template?: CrawlTemplateId;
|
|
1141
|
+
/** Use smart job routing to auto-detect best settings (default: true) */
|
|
1142
|
+
useSmartRouting?: boolean;
|
|
1143
|
+
/** Priority queue override (Enterprise only) */
|
|
1144
|
+
priority?: CrawlPriority;
|
|
1091
1145
|
}
|
|
1092
1146
|
/** Crawl template identifier */
|
|
1093
1147
|
type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
|
|
@@ -1260,7 +1314,7 @@ interface UpdateWebhookOptions {
|
|
|
1260
1314
|
* API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
|
|
1261
1315
|
* These are the actual scopes enforced by the backend via requireScope() middleware
|
|
1262
1316
|
*/
|
|
1263
|
-
type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
|
|
1317
|
+
type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
|
|
1264
1318
|
/**
|
|
1265
1319
|
* All available scopes (for reference and validation)
|
|
1266
1320
|
*/
|
|
@@ -1418,6 +1472,26 @@ interface AgentOptions {
|
|
|
1418
1472
|
headers?: Record<string, string>;
|
|
1419
1473
|
};
|
|
1420
1474
|
}
|
|
1475
|
+
/** Context for schema generation from conversation */
|
|
1476
|
+
interface SchemaGenerationContext {
|
|
1477
|
+
/** Specific domains/topics mentioned (e.g., ['marketing', 'web development']) */
|
|
1478
|
+
domains?: string[];
|
|
1479
|
+
/** Types of data to extract (e.g., ['free courses', 'discounts']) */
|
|
1480
|
+
dataTypes?: string[];
|
|
1481
|
+
/** Preferred output format */
|
|
1482
|
+
format?: "json" | "csv" | "table";
|
|
1483
|
+
/** Additional clarifications from user */
|
|
1484
|
+
clarifications?: string[];
|
|
1485
|
+
}
|
|
1486
|
+
/** Options for generating a JSON Schema from natural language */
|
|
1487
|
+
interface SchemaGenerationOptions {
|
|
1488
|
+
/** Natural language description of what data to extract (required, 5-2000 chars) */
|
|
1489
|
+
prompt: string;
|
|
1490
|
+
/** Optional context from conversation to refine the schema */
|
|
1491
|
+
context?: SchemaGenerationContext;
|
|
1492
|
+
/** LLM provider for generation (default: openai) */
|
|
1493
|
+
model?: AgentModel;
|
|
1494
|
+
}
|
|
1421
1495
|
|
|
1422
1496
|
/**
|
|
1423
1497
|
* Account Resource
|
|
@@ -1713,6 +1787,53 @@ declare class AgentResource {
|
|
|
1713
1787
|
* ```
|
|
1714
1788
|
*/
|
|
1715
1789
|
withClaude(url: string, prompt: string, options?: Omit<AgentOptions, "url" | "prompt" | "model">): Promise<AgentJobResponse>;
|
|
1790
|
+
/**
|
|
1791
|
+
* Generate a JSON Schema from a natural language prompt
|
|
1792
|
+
*
|
|
1793
|
+
* This is useful for building extraction schemas without manual JSON writing.
|
|
1794
|
+
* The generated schema can be used with the main agent.create() method.
|
|
1795
|
+
*
|
|
1796
|
+
* @param options - Schema generation options
|
|
1797
|
+
* @returns Generated schema with refined prompt and confidence score
|
|
1798
|
+
*
|
|
1799
|
+
* @example Basic usage:
|
|
1800
|
+
* ```ts
|
|
1801
|
+
* const result = await client.agent.generateSchema({
|
|
1802
|
+
* prompt: "Find the best student deals on Coursera for marketing courses"
|
|
1803
|
+
* });
|
|
1804
|
+
*
|
|
1805
|
+
* console.log(result.schema);
|
|
1806
|
+
* // { type: "object", properties: { courses: { ... } } }
|
|
1807
|
+
*
|
|
1808
|
+
* console.log(result.refinedPrompt);
|
|
1809
|
+
* // "Extract student offers for marketing courses..."
|
|
1810
|
+
*
|
|
1811
|
+
* // Use the generated schema with an agent
|
|
1812
|
+
* const job = await client.agent.create({
|
|
1813
|
+
* url: "https://coursera.org",
|
|
1814
|
+
* prompt: result.refinedPrompt,
|
|
1815
|
+
* schema: result.schema
|
|
1816
|
+
* });
|
|
1817
|
+
* ```
|
|
1818
|
+
*
|
|
1819
|
+
* @example With context from conversation:
|
|
1820
|
+
* ```ts
|
|
1821
|
+
* const result = await client.agent.generateSchema({
|
|
1822
|
+
* prompt: "Find student deals on online courses",
|
|
1823
|
+
* context: {
|
|
1824
|
+
* domains: ["marketing", "web development"],
|
|
1825
|
+
* dataTypes: ["free courses", "discounts"],
|
|
1826
|
+
* format: "json"
|
|
1827
|
+
* }
|
|
1828
|
+
* });
|
|
1829
|
+
*
|
|
1830
|
+
* if (result.confidence < 0.7 && result.suggestedQuestions) {
|
|
1831
|
+
* // Ask user for clarification
|
|
1832
|
+
* console.log("Please clarify:", result.suggestedQuestions);
|
|
1833
|
+
* }
|
|
1834
|
+
* ```
|
|
1835
|
+
*/
|
|
1836
|
+
generateSchema(options: SchemaGenerationOptions): Promise<SchemaGenerationResponse>;
|
|
1716
1837
|
}
|
|
1717
1838
|
|
|
1718
1839
|
/**
|
package/dist/index.js
CHANGED
|
@@ -784,6 +784,65 @@ var AgentResource = class {
|
|
|
784
784
|
...options
|
|
785
785
|
});
|
|
786
786
|
}
|
|
787
|
+
/**
|
|
788
|
+
* Generate a JSON Schema from a natural language prompt
|
|
789
|
+
*
|
|
790
|
+
* This is useful for building extraction schemas without manual JSON writing.
|
|
791
|
+
* The generated schema can be used with the main agent.create() method.
|
|
792
|
+
*
|
|
793
|
+
* @param options - Schema generation options
|
|
794
|
+
* @returns Generated schema with refined prompt and confidence score
|
|
795
|
+
*
|
|
796
|
+
* @example Basic usage:
|
|
797
|
+
* ```ts
|
|
798
|
+
* const result = await client.agent.generateSchema({
|
|
799
|
+
* prompt: "Find the best student deals on Coursera for marketing courses"
|
|
800
|
+
* });
|
|
801
|
+
*
|
|
802
|
+
* console.log(result.schema);
|
|
803
|
+
* // { type: "object", properties: { courses: { ... } } }
|
|
804
|
+
*
|
|
805
|
+
* console.log(result.refinedPrompt);
|
|
806
|
+
* // "Extract student offers for marketing courses..."
|
|
807
|
+
*
|
|
808
|
+
* // Use the generated schema with an agent
|
|
809
|
+
* const job = await client.agent.create({
|
|
810
|
+
* url: "https://coursera.org",
|
|
811
|
+
* prompt: result.refinedPrompt,
|
|
812
|
+
* schema: result.schema
|
|
813
|
+
* });
|
|
814
|
+
* ```
|
|
815
|
+
*
|
|
816
|
+
* @example With context from conversation:
|
|
817
|
+
* ```ts
|
|
818
|
+
* const result = await client.agent.generateSchema({
|
|
819
|
+
* prompt: "Find student deals on online courses",
|
|
820
|
+
* context: {
|
|
821
|
+
* domains: ["marketing", "web development"],
|
|
822
|
+
* dataTypes: ["free courses", "discounts"],
|
|
823
|
+
* format: "json"
|
|
824
|
+
* }
|
|
825
|
+
* });
|
|
826
|
+
*
|
|
827
|
+
* if (result.confidence < 0.7 && result.suggestedQuestions) {
|
|
828
|
+
* // Ask user for clarification
|
|
829
|
+
* console.log("Please clarify:", result.suggestedQuestions);
|
|
830
|
+
* }
|
|
831
|
+
* ```
|
|
832
|
+
*/
|
|
833
|
+
async generateSchema(options) {
|
|
834
|
+
const body = {
|
|
835
|
+
prompt: options.prompt,
|
|
836
|
+
context: options.context,
|
|
837
|
+
model: options.model ?? "openai"
|
|
838
|
+
};
|
|
839
|
+
const result = await post(
|
|
840
|
+
this.ctx,
|
|
841
|
+
"/v1/agent/schema",
|
|
842
|
+
body
|
|
843
|
+
);
|
|
844
|
+
return result.data;
|
|
845
|
+
}
|
|
787
846
|
};
|
|
788
847
|
|
|
789
848
|
// src/resources/crawl.ts
|