@dealcrawl/sdk 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@ Official TypeScript SDK for the DealCrawl web scraping and crawling API.
8
8
 
9
9
  ## Features
10
10
 
11
- - 🚀 **Full API Coverage** - Access all 45 DealCrawl API endpoints
11
+ - 🚀 **Full API Coverage** - Access all 50+ DealCrawl API endpoints
12
12
  - 📦 **Zero Dependencies** - Uses native `fetch`, works everywhere
13
13
  - 🔒 **Type-Safe** - Complete TypeScript definitions
14
14
  - ⚡ **Automatic Retries** - Built-in retry logic with exponential backoff
@@ -89,14 +89,20 @@ const job = await client.scrape.withScreenshot("https://example.com", {
89
89
  | Option | Type | Default | Description |
90
90
  |--------|------|---------|-------------|
91
91
  | `url` | string | required | URL to scrape |
92
+ | `noStore` | boolean | false | Zero Data Retention - don't save results (Pro/Enterprise) |
92
93
  | `detectSignals` | boolean | true | Detect prices, discounts, urgency |
93
94
  | `extractDeal` | boolean | false | Extract deal information |
95
+ | `extractMultipleDeals` | boolean | false | Extract multiple deals from list pages |
96
+ | `maxDeals` | number | 20 | Max deals to extract (max: 50) |
94
97
  | `extractWithAI` | boolean | false | Use AI for extraction |
95
98
  | `useAdvancedModel` | boolean | false | Use GPT-4o (higher cost) |
96
99
  | `minDealScore` | number | 0 | Minimum deal score (0-100) |
97
100
  | `screenshot` | object | - | Screenshot options |
98
101
  | `excludeTags` | string[] | - | HTML tags to exclude |
102
+ | `excludeSelectors` | string[] | - | CSS selectors to exclude |
99
103
  | `onlyMainContent` | boolean | true | Extract main content only |
104
+ | `headers` | object | - | Custom HTTP headers |
105
+ | `timeout` | number | 30000 | Request timeout in ms (max: 120000) |
100
106
 
101
107
  ### Batch Scrape - Bulk URL Scraping (NEW)
102
108
 
@@ -215,6 +221,27 @@ console.log(analysis.estimatedPages);
215
221
  const job = await client.crawl.forDeals("https://shop.example.com", {
216
222
  minDealScore: 70,
217
223
  });
224
+
225
+ // Advanced crawl with filtering
226
+ const job = await client.crawl.create({
227
+ url: "https://marketplace.example.com",
228
+ maxDepth: 4,
229
+ maxPages: 500,
230
+ extractDeal: true,
231
+ minDealScore: 50,
232
+ categories: ["software", "courses"],
233
+ priceRange: { min: 0, max: 100 },
234
+ onlyHighQuality: true,
235
+ webhookUrl: "https://my-server.com/crawl-updates",
236
+ syncToDealup: true,
237
+ });
238
+
239
+ // Enterprise: priority queue override
240
+ const job = await client.crawl.create({
241
+ url: "https://time-sensitive-deals.com",
242
+ priority: "high", // Enterprise only
243
+ onlyHighQuality: true,
244
+ });
218
245
  ```
219
246
 
220
247
  **Available Templates:**
@@ -225,6 +252,28 @@ const job = await client.crawl.forDeals("https://shop.example.com", {
225
252
  - `docs` - Documentation sites
226
253
  - `custom` - No preset, use your own settings
227
254
 
255
+ **Crawl Options:**
256
+ | Option | Type | Default | Description |
257
+ |--------|------|---------|-------------|
258
+ | `url` | string | required | Starting URL |
259
+ | `maxDepth` | number | 3 | Max crawl depth (1-5) |
260
+ | `maxPages` | number | 100 | Max pages to crawl (1-1000) |
261
+ | `detectSignals` | boolean | true | Detect prices, discounts |
262
+ | `extractDeal` | boolean | false | Extract deal info with AI |
263
+ | `minDealScore` | number | 30 | Min deal score threshold (0-100) |
264
+ | `categories` | array | - | Filter: courses, software, physical, services, other |
265
+ | `priceRange` | object | - | Filter: { min, max } price |
266
+ | `onlyHighQuality` | boolean | false | Only deals scoring 70+ |
267
+ | `allowedMerchants` | string[] | - | Only these merchants |
268
+ | `blockedMerchants` | string[] | - | Exclude these merchants |
269
+ | `webhookUrl` | string | - | Real-time notifications URL |
270
+ | `syncToDealup` | boolean | false | Auto-sync to DealUp |
271
+ | `template` | string | - | Job template to use |
272
+ | `useSmartRouting` | boolean | true | Auto-detect best settings |
273
+ | `priority` | string | - | Queue priority (Enterprise only) |
274
+ | `requireJS` | boolean | false | Force JavaScript rendering |
275
+ | `bypassAntiBot` | boolean | false | Advanced anti-bot techniques |
276
+
228
277
  ### Extract - LLM-Based Extraction
229
278
 
230
279
  ```typescript
@@ -278,6 +327,101 @@ const query = client.dork.buildQuery({
278
327
  // Returns: "laptop deals site:amazon.com intitle:discount"
279
328
  ```
280
329
 
330
+ ### Agent - AI Autonomous Navigation (NEW)
331
+
332
+ Create AI agents that can navigate websites, interact with elements, and extract structured data using natural language instructions.
333
+
334
+ ```typescript
335
+ // Basic agent - navigate and extract data
336
+ const job = await client.agent.create({
337
+ url: "https://amazon.com",
338
+ prompt: "Search for wireless headphones under $50 and extract the top 5 results",
339
+ schema: {
340
+ type: "object",
341
+ properties: {
342
+ products: {
343
+ type: "array",
344
+ items: {
345
+ type: "object",
346
+ properties: {
347
+ name: { type: "string" },
348
+ price: { type: "number" },
349
+ rating: { type: "number" },
350
+ },
351
+ },
352
+ },
353
+ },
354
+ },
355
+ maxSteps: 15,
356
+ });
357
+
358
+ // Wait for result
359
+ const result = await client.agentAndWait({
360
+ url: "https://booking.com",
361
+ prompt: "Find hotels in Paris for 2 adults, March 15-17",
362
+ takeScreenshots: true,
363
+ });
364
+
365
+ // Generate schema from natural language (helper)
366
+ const schemaResult = await client.agent.generateSchema({
367
+ prompt: "Find student deals on marketing courses with price and discount",
368
+ });
369
+ // Returns: { schema, refinedPrompt, confidence, suggestedQuestions? }
370
+
371
+ // Use generated schema
372
+ const job = await client.agent.create({
373
+ url: "https://coursera.org",
374
+ prompt: schemaResult.refinedPrompt,
375
+ schema: schemaResult.schema,
376
+ });
377
+
378
+ // Preset actions (handle popups, cookies, etc.)
379
+ const job = await client.agent.withPresetActions(
380
+ "https://shop.com",
381
+ "Find the best discounts",
382
+ [
383
+ { type: "click", selector: "#accept-cookies" },
384
+ { type: "wait", milliseconds: 1000 },
385
+ ]
386
+ );
387
+
388
+ // Deal-focused agent with pre-built schema
389
+ const job = await client.agent.forDeals(
390
+ "https://slickdeals.net",
391
+ "Find the top 10 tech deals posted today"
392
+ );
393
+
394
+ // Use Claude instead of GPT
395
+ const job = await client.agent.withClaude(
396
+ "https://complex-site.com",
397
+ "Navigate the checkout process"
398
+ );
399
+ ```
400
+
401
+ **Agent Options:**
402
+ | Option | Type | Default | Description |
403
+ |--------|------|---------|-------------|
404
+ | `url` | string | required | Starting URL |
405
+ | `prompt` | string | required | Natural language instructions (10-2000 chars) |
406
+ | `schema` | object | - | JSON Schema for structured output |
407
+ | `maxSteps` | number | 10 | Maximum navigation steps (max: 25) |
408
+ | `actions` | array | - | Preset actions to execute first |
409
+ | `model` | string | "openai" | LLM provider: "openai" or "anthropic" |
410
+ | `timeout` | number | 30000 | Per-step timeout in ms (max: 60000) |
411
+ | `takeScreenshots` | boolean | false | Capture screenshot at each step |
412
+ | `onlyMainContent` | boolean | true | Extract main content only |
413
+
414
+ **Action Types:**
415
+
416
+ - `click` - Click an element
417
+ - `scroll` - Scroll page or to element
418
+ - `write` - Type text into input
419
+ - `wait` - Wait for time or element
420
+ - `press` - Press keyboard key
421
+ - `screenshot` - Capture screenshot
422
+ - `hover` - Hover over element
423
+ - `select` - Select dropdown option
424
+
281
425
  ### Status - Job Management
282
426
 
283
427
  ```typescript
@@ -406,6 +550,7 @@ const stats = await client.keys.getStats(keyId, { days: 30 });
406
550
  | `crawl` | `POST /v1/crawl` | Create crawl jobs |
407
551
  | `dork` | `POST /v1/dork` | Create dork searches |
408
552
  | `extract` | `POST /v1/extract` | Create extraction jobs |
553
+ | `agent` | `POST /v1/agent` | Create AI agent jobs |
409
554
  | `status` | `GET /v1/status/:id` | Read job status |
410
555
  | `data:read` | `GET /v1/data/*` | Read jobs/deals |
411
556
  | `data:export` | `GET /v1/data/export` | Export data |
@@ -435,10 +580,13 @@ await client.keys.create({
435
580
  "crawl",
436
581
  "dork",
437
582
  "extract",
583
+ "agent",
584
+ "search",
438
585
  "status",
439
586
  "data:read",
440
587
  "data:export",
441
588
  "keys:manage",
589
+ "webhooks:manage",
442
590
  ],
443
591
  });
444
592
  ```
@@ -587,14 +735,27 @@ import type {
587
735
 
588
736
  // Request Options
589
737
  ScrapeOptions,
738
+ BatchScrapeOptions,
590
739
  CrawlOptions,
740
+ CrawlPriority,
741
+ CrawlCategory,
742
+ PriceRange,
743
+ SearchOptions,
591
744
  ExtractOptions,
592
745
  DorkOptions,
746
+ AgentOptions,
747
+ SchemaGenerationOptions,
593
748
 
594
749
  // Responses
595
750
  JobStatusResponse,
596
751
  ListDealsResponse,
597
752
  DealItem,
753
+ AgentJobResponse,
754
+ AgentStatusResponse,
755
+ AgentResultResponse,
756
+ SchemaGenerationResponse,
757
+ SearchJobResponse,
758
+ BatchScrapeResponse,
598
759
 
599
760
  // Re-exports from @dealcrawl/shared
600
761
  ScrapeResult,
package/dist/index.d.mts CHANGED
@@ -839,6 +839,19 @@ interface AgentStatusResponse extends JobStatusResponse {
839
839
  /** Final result when completed */
840
840
  result?: AgentResultResponse;
841
841
  }
842
+ /** Schema generation response from /v1/agent/schema */
843
+ interface SchemaGenerationResponse {
844
+ /** Generated JSON Schema for data extraction */
845
+ schema: Record<string, unknown>;
846
+ /** Refined prompt optimized for agent execution */
847
+ refinedPrompt: string;
848
+ /** Human-readable description of the schema */
849
+ schemaDescription: string;
850
+ /** Suggested follow-up questions if prompt was ambiguous */
851
+ suggestedQuestions?: string[];
852
+ /** Confidence score (0-1) in the generated schema */
853
+ confidence: number;
854
+ }
842
855
 
843
856
  /**
844
857
  * Polling Utilities
@@ -919,6 +932,8 @@ interface ScreenshotOptions {
919
932
  interface ScrapeOptions {
920
933
  /** URL to scrape (required) */
921
934
  url: string;
935
+ /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
936
+ noStore?: boolean;
922
937
  /** Detect signals like prices, discounts, urgency (default: true) */
923
938
  detectSignals?: boolean;
924
939
  /** Extract content using AI */
@@ -1058,6 +1073,17 @@ interface SearchOptions {
1058
1073
  headers?: Record<string, string>;
1059
1074
  };
1060
1075
  }
1076
+ /** Crawl priority queue (enterprise only) */
1077
+ type CrawlPriority = "high" | "medium" | "low";
1078
+ /** Product category filter for crawls */
1079
+ type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
1080
+ /** Price range filter for crawls */
1081
+ interface PriceRange {
1082
+ /** Minimum price */
1083
+ min?: number;
1084
+ /** Maximum price */
1085
+ max?: number;
1086
+ }
1061
1087
  /** Options for crawling a website */
1062
1088
  interface CrawlOptions {
1063
1089
  /** Starting URL for the crawl (required) */
@@ -1078,7 +1104,7 @@ interface CrawlOptions {
1078
1104
  extractWithAI?: boolean;
1079
1105
  /** Extract deal-specific information from each page */
1080
1106
  extractDeal?: boolean;
1081
- /** Minimum deal score threshold (0-100) */
1107
+ /** Minimum deal score threshold (0-100, default: 30) */
1082
1108
  minDealScore?: number;
1083
1109
  /** Prioritize pages likely to contain deals (default: true) */
1084
1110
  prioritizeDealPages?: boolean;
@@ -1088,6 +1114,34 @@ interface CrawlOptions {
1088
1114
  allowedDomains?: string[];
1089
1115
  /** URL patterns to exclude from crawling */
1090
1116
  excludePatterns?: string[];
1117
+ /** Filter by product categories */
1118
+ categories?: CrawlCategory[];
1119
+ /** Filter by price range */
1120
+ priceRange?: PriceRange;
1121
+ /** Prefer static scraping (faster, default: true) */
1122
+ preferStatic?: boolean;
1123
+ /** Require JavaScript rendering for all pages */
1124
+ requireJS?: boolean;
1125
+ /** Use advanced anti-bot bypass techniques */
1126
+ bypassAntiBot?: boolean;
1127
+ /** Only return high-quality deals (score >= 70) */
1128
+ onlyHighQuality?: boolean;
1129
+ /** Only crawl pages from these merchants */
1130
+ allowedMerchants?: string[];
1131
+ /** Skip pages from these merchants */
1132
+ blockedMerchants?: string[];
1133
+ /** Webhook URL for real-time notifications */
1134
+ webhookUrl?: string;
1135
+ /** Auto-sync discovered deals to DealUp */
1136
+ syncToDealup?: boolean;
1137
+ /** Site-specific config name from registry */
1138
+ siteConfig?: string;
1139
+ /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
1140
+ template?: CrawlTemplateId;
1141
+ /** Use smart job routing to auto-detect best settings (default: true) */
1142
+ useSmartRouting?: boolean;
1143
+ /** Priority queue override (Enterprise only) */
1144
+ priority?: CrawlPriority;
1091
1145
  }
1092
1146
  /** Crawl template identifier */
1093
1147
  type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1260,7 +1314,7 @@ interface UpdateWebhookOptions {
1260
1314
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
1261
1315
  * These are the actual scopes enforced by the backend via requireScope() middleware
1262
1316
  */
1263
- type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1317
+ type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1264
1318
  /**
1265
1319
  * All available scopes (for reference and validation)
1266
1320
  */
@@ -1418,6 +1472,26 @@ interface AgentOptions {
1418
1472
  headers?: Record<string, string>;
1419
1473
  };
1420
1474
  }
1475
+ /** Context for schema generation from conversation */
1476
+ interface SchemaGenerationContext {
1477
+ /** Specific domains/topics mentioned (e.g., ['marketing', 'web development']) */
1478
+ domains?: string[];
1479
+ /** Types of data to extract (e.g., ['free courses', 'discounts']) */
1480
+ dataTypes?: string[];
1481
+ /** Preferred output format */
1482
+ format?: "json" | "csv" | "table";
1483
+ /** Additional clarifications from user */
1484
+ clarifications?: string[];
1485
+ }
1486
+ /** Options for generating a JSON Schema from natural language */
1487
+ interface SchemaGenerationOptions {
1488
+ /** Natural language description of what data to extract (required, 5-2000 chars) */
1489
+ prompt: string;
1490
+ /** Optional context from conversation to refine the schema */
1491
+ context?: SchemaGenerationContext;
1492
+ /** LLM provider for generation (default: openai) */
1493
+ model?: AgentModel;
1494
+ }
1421
1495
 
1422
1496
  /**
1423
1497
  * Account Resource
@@ -1713,6 +1787,53 @@ declare class AgentResource {
1713
1787
  * ```
1714
1788
  */
1715
1789
  withClaude(url: string, prompt: string, options?: Omit<AgentOptions, "url" | "prompt" | "model">): Promise<AgentJobResponse>;
1790
+ /**
1791
+ * Generate a JSON Schema from a natural language prompt
1792
+ *
1793
+ * This is useful for building extraction schemas without manual JSON writing.
1794
+ * The generated schema can be used with the main agent.create() method.
1795
+ *
1796
+ * @param options - Schema generation options
1797
+ * @returns Generated schema with refined prompt and confidence score
1798
+ *
1799
+ * @example Basic usage:
1800
+ * ```ts
1801
+ * const result = await client.agent.generateSchema({
1802
+ * prompt: "Find the best student deals on Coursera for marketing courses"
1803
+ * });
1804
+ *
1805
+ * console.log(result.schema);
1806
+ * // { type: "object", properties: { courses: { ... } } }
1807
+ *
1808
+ * console.log(result.refinedPrompt);
1809
+ * // "Extract student offers for marketing courses..."
1810
+ *
1811
+ * // Use the generated schema with an agent
1812
+ * const job = await client.agent.create({
1813
+ * url: "https://coursera.org",
1814
+ * prompt: result.refinedPrompt,
1815
+ * schema: result.schema
1816
+ * });
1817
+ * ```
1818
+ *
1819
+ * @example With context from conversation:
1820
+ * ```ts
1821
+ * const result = await client.agent.generateSchema({
1822
+ * prompt: "Find student deals on online courses",
1823
+ * context: {
1824
+ * domains: ["marketing", "web development"],
1825
+ * dataTypes: ["free courses", "discounts"],
1826
+ * format: "json"
1827
+ * }
1828
+ * });
1829
+ *
1830
+ * if (result.confidence < 0.7 && result.suggestedQuestions) {
1831
+ * // Ask user for clarification
1832
+ * console.log("Please clarify:", result.suggestedQuestions);
1833
+ * }
1834
+ * ```
1835
+ */
1836
+ generateSchema(options: SchemaGenerationOptions): Promise<SchemaGenerationResponse>;
1716
1837
  }
1717
1838
 
1718
1839
  /**
package/dist/index.d.ts CHANGED
@@ -839,6 +839,19 @@ interface AgentStatusResponse extends JobStatusResponse {
839
839
  /** Final result when completed */
840
840
  result?: AgentResultResponse;
841
841
  }
842
+ /** Schema generation response from /v1/agent/schema */
843
+ interface SchemaGenerationResponse {
844
+ /** Generated JSON Schema for data extraction */
845
+ schema: Record<string, unknown>;
846
+ /** Refined prompt optimized for agent execution */
847
+ refinedPrompt: string;
848
+ /** Human-readable description of the schema */
849
+ schemaDescription: string;
850
+ /** Suggested follow-up questions if prompt was ambiguous */
851
+ suggestedQuestions?: string[];
852
+ /** Confidence score (0-1) in the generated schema */
853
+ confidence: number;
854
+ }
842
855
 
843
856
  /**
844
857
  * Polling Utilities
@@ -919,6 +932,8 @@ interface ScreenshotOptions {
919
932
  interface ScrapeOptions {
920
933
  /** URL to scrape (required) */
921
934
  url: string;
935
+ /** Don't save scrape results - Zero Data Retention (Pro/Enterprise only) */
936
+ noStore?: boolean;
922
937
  /** Detect signals like prices, discounts, urgency (default: true) */
923
938
  detectSignals?: boolean;
924
939
  /** Extract content using AI */
@@ -1058,6 +1073,17 @@ interface SearchOptions {
1058
1073
  headers?: Record<string, string>;
1059
1074
  };
1060
1075
  }
1076
+ /** Crawl priority queue (enterprise only) */
1077
+ type CrawlPriority = "high" | "medium" | "low";
1078
+ /** Product category filter for crawls */
1079
+ type CrawlCategory = "courses" | "software" | "physical" | "services" | "other";
1080
+ /** Price range filter for crawls */
1081
+ interface PriceRange {
1082
+ /** Minimum price */
1083
+ min?: number;
1084
+ /** Maximum price */
1085
+ max?: number;
1086
+ }
1061
1087
  /** Options for crawling a website */
1062
1088
  interface CrawlOptions {
1063
1089
  /** Starting URL for the crawl (required) */
@@ -1078,7 +1104,7 @@ interface CrawlOptions {
1078
1104
  extractWithAI?: boolean;
1079
1105
  /** Extract deal-specific information from each page */
1080
1106
  extractDeal?: boolean;
1081
- /** Minimum deal score threshold (0-100) */
1107
+ /** Minimum deal score threshold (0-100, default: 30) */
1082
1108
  minDealScore?: number;
1083
1109
  /** Prioritize pages likely to contain deals (default: true) */
1084
1110
  prioritizeDealPages?: boolean;
@@ -1088,6 +1114,34 @@ interface CrawlOptions {
1088
1114
  allowedDomains?: string[];
1089
1115
  /** URL patterns to exclude from crawling */
1090
1116
  excludePatterns?: string[];
1117
+ /** Filter by product categories */
1118
+ categories?: CrawlCategory[];
1119
+ /** Filter by price range */
1120
+ priceRange?: PriceRange;
1121
+ /** Prefer static scraping (faster, default: true) */
1122
+ preferStatic?: boolean;
1123
+ /** Require JavaScript rendering for all pages */
1124
+ requireJS?: boolean;
1125
+ /** Use advanced anti-bot bypass techniques */
1126
+ bypassAntiBot?: boolean;
1127
+ /** Only return high-quality deals (score >= 70) */
1128
+ onlyHighQuality?: boolean;
1129
+ /** Only crawl pages from these merchants */
1130
+ allowedMerchants?: string[];
1131
+ /** Skip pages from these merchants */
1132
+ blockedMerchants?: string[];
1133
+ /** Webhook URL for real-time notifications */
1134
+ webhookUrl?: string;
1135
+ /** Auto-sync discovered deals to DealUp */
1136
+ syncToDealup?: boolean;
1137
+ /** Site-specific config name from registry */
1138
+ siteConfig?: string;
1139
+ /** Job template to use (ecommerce, blog, docs, marketplace, custom) */
1140
+ template?: CrawlTemplateId;
1141
+ /** Use smart job routing to auto-detect best settings (default: true) */
1142
+ useSmartRouting?: boolean;
1143
+ /** Priority queue override (Enterprise only) */
1144
+ priority?: CrawlPriority;
1091
1145
  }
1092
1146
  /** Crawl template identifier */
1093
1147
  type CrawlTemplateId = "ecommerce" | "blog" | "docs" | "marketplace" | "custom";
@@ -1260,7 +1314,7 @@ interface UpdateWebhookOptions {
1260
1314
  * API key scope - Must match @dealcrawl/shared/src/types/api-key.types.ts
1261
1315
  * These are the actual scopes enforced by the backend via requireScope() middleware
1262
1316
  */
1263
- type ApiKeyScope = "scrape" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1317
+ type ApiKeyScope = "scrape" | "scrape:batch" | "search" | "crawl" | "dork" | "extract" | "agent" | "status" | "data:read" | "data:export" | "keys:manage" | "webhooks:manage";
1264
1318
  /**
1265
1319
  * All available scopes (for reference and validation)
1266
1320
  */
@@ -1418,6 +1472,26 @@ interface AgentOptions {
1418
1472
  headers?: Record<string, string>;
1419
1473
  };
1420
1474
  }
1475
+ /** Context for schema generation from conversation */
1476
+ interface SchemaGenerationContext {
1477
+ /** Specific domains/topics mentioned (e.g., ['marketing', 'web development']) */
1478
+ domains?: string[];
1479
+ /** Types of data to extract (e.g., ['free courses', 'discounts']) */
1480
+ dataTypes?: string[];
1481
+ /** Preferred output format */
1482
+ format?: "json" | "csv" | "table";
1483
+ /** Additional clarifications from user */
1484
+ clarifications?: string[];
1485
+ }
1486
+ /** Options for generating a JSON Schema from natural language */
1487
+ interface SchemaGenerationOptions {
1488
+ /** Natural language description of what data to extract (required, 5-2000 chars) */
1489
+ prompt: string;
1490
+ /** Optional context from conversation to refine the schema */
1491
+ context?: SchemaGenerationContext;
1492
+ /** LLM provider for generation (default: openai) */
1493
+ model?: AgentModel;
1494
+ }
1421
1495
 
1422
1496
  /**
1423
1497
  * Account Resource
@@ -1713,6 +1787,53 @@ declare class AgentResource {
1713
1787
  * ```
1714
1788
  */
1715
1789
  withClaude(url: string, prompt: string, options?: Omit<AgentOptions, "url" | "prompt" | "model">): Promise<AgentJobResponse>;
1790
+ /**
1791
+ * Generate a JSON Schema from a natural language prompt
1792
+ *
1793
+ * This is useful for building extraction schemas without manual JSON writing.
1794
+ * The generated schema can be used with the main agent.create() method.
1795
+ *
1796
+ * @param options - Schema generation options
1797
+ * @returns Generated schema with refined prompt and confidence score
1798
+ *
1799
+ * @example Basic usage:
1800
+ * ```ts
1801
+ * const result = await client.agent.generateSchema({
1802
+ * prompt: "Find the best student deals on Coursera for marketing courses"
1803
+ * });
1804
+ *
1805
+ * console.log(result.schema);
1806
+ * // { type: "object", properties: { courses: { ... } } }
1807
+ *
1808
+ * console.log(result.refinedPrompt);
1809
+ * // "Extract student offers for marketing courses..."
1810
+ *
1811
+ * // Use the generated schema with an agent
1812
+ * const job = await client.agent.create({
1813
+ * url: "https://coursera.org",
1814
+ * prompt: result.refinedPrompt,
1815
+ * schema: result.schema
1816
+ * });
1817
+ * ```
1818
+ *
1819
+ * @example With context from conversation:
1820
+ * ```ts
1821
+ * const result = await client.agent.generateSchema({
1822
+ * prompt: "Find student deals on online courses",
1823
+ * context: {
1824
+ * domains: ["marketing", "web development"],
1825
+ * dataTypes: ["free courses", "discounts"],
1826
+ * format: "json"
1827
+ * }
1828
+ * });
1829
+ *
1830
+ * if (result.confidence < 0.7 && result.suggestedQuestions) {
1831
+ * // Ask user for clarification
1832
+ * console.log("Please clarify:", result.suggestedQuestions);
1833
+ * }
1834
+ * ```
1835
+ */
1836
+ generateSchema(options: SchemaGenerationOptions): Promise<SchemaGenerationResponse>;
1716
1837
  }
1717
1838
 
1718
1839
  /**
package/dist/index.js CHANGED
@@ -784,6 +784,65 @@ var AgentResource = class {
784
784
  ...options
785
785
  });
786
786
  }
787
+ /**
788
+ * Generate a JSON Schema from a natural language prompt
789
+ *
790
+ * This is useful for building extraction schemas without manual JSON writing.
791
+ * The generated schema can be used with the main agent.create() method.
792
+ *
793
+ * @param options - Schema generation options
794
+ * @returns Generated schema with refined prompt and confidence score
795
+ *
796
+ * @example Basic usage:
797
+ * ```ts
798
+ * const result = await client.agent.generateSchema({
799
+ * prompt: "Find the best student deals on Coursera for marketing courses"
800
+ * });
801
+ *
802
+ * console.log(result.schema);
803
+ * // { type: "object", properties: { courses: { ... } } }
804
+ *
805
+ * console.log(result.refinedPrompt);
806
+ * // "Extract student offers for marketing courses..."
807
+ *
808
+ * // Use the generated schema with an agent
809
+ * const job = await client.agent.create({
810
+ * url: "https://coursera.org",
811
+ * prompt: result.refinedPrompt,
812
+ * schema: result.schema
813
+ * });
814
+ * ```
815
+ *
816
+ * @example With context from conversation:
817
+ * ```ts
818
+ * const result = await client.agent.generateSchema({
819
+ * prompt: "Find student deals on online courses",
820
+ * context: {
821
+ * domains: ["marketing", "web development"],
822
+ * dataTypes: ["free courses", "discounts"],
823
+ * format: "json"
824
+ * }
825
+ * });
826
+ *
827
+ * if (result.confidence < 0.7 && result.suggestedQuestions) {
828
+ * // Ask user for clarification
829
+ * console.log("Please clarify:", result.suggestedQuestions);
830
+ * }
831
+ * ```
832
+ */
833
+ async generateSchema(options) {
834
+ const body = {
835
+ prompt: options.prompt,
836
+ context: options.context,
837
+ model: options.model ?? "openai"
838
+ };
839
+ const result = await post(
840
+ this.ctx,
841
+ "/v1/agent/schema",
842
+ body
843
+ );
844
+ return result.data;
845
+ }
787
846
  };
788
847
 
789
848
  // src/resources/crawl.ts