firecrawl 1.18.0 → 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -79,7 +79,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
79
79
  * Defines the options and configurations available for scraping web content.
80
80
  */
81
81
  export interface CrawlScrapeOptions {
82
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
82
+ formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
83
83
  headers?: Record<string, string>;
84
84
  includeTags?: string[];
85
85
  excludeTags?: string[];
@@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
413
413
  summaries: string[];
414
414
  }
415
415
 
416
+ /**
417
+ * Parameters for LLMs.txt generation operations.
418
+ */
419
+ export interface GenerateLLMsTextParams {
420
+ /**
421
+ * Maximum number of URLs to process (1-100)
422
+ * @default 10
423
+ */
424
+ maxUrls?: number;
425
+ /**
426
+ * Whether to show the full LLMs-full.txt in the response
427
+ * @default false
428
+ */
429
+ showFullText?: boolean;
430
+ /**
431
+ * Experimental flag for streaming
432
+ */
433
+ __experimental_stream?: boolean;
434
+ }
435
+
436
+ /**
437
+ * Response interface for LLMs.txt generation operations.
438
+ */
439
+ export interface GenerateLLMsTextResponse {
440
+ success: boolean;
441
+ id: string;
442
+ }
443
+
444
+ /**
445
+ * Status response interface for LLMs.txt generation operations.
446
+ */
447
+ export interface GenerateLLMsTextStatusResponse {
448
+ success: boolean;
449
+ data: {
450
+ llmstxt: string;
451
+ llmsfulltxt?: string;
452
+ };
453
+ status: "processing" | "completed" | "failed";
454
+ error?: string;
455
+ expiresAt: string;
456
+ }
457
+
416
458
  /**
417
459
  * Main class for interacting with the Firecrawl API.
418
460
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -1459,6 +1501,118 @@ export default class FirecrawlApp {
1459
1501
  }
1460
1502
  return { success: false, error: "Internal server error." };
1461
1503
  }
1504
+
1505
+ /**
1506
+ * Generates LLMs.txt for a given URL and polls until completion.
1507
+ * @param url - The URL to generate LLMs.txt from.
1508
+ * @param params - Parameters for the LLMs.txt generation operation.
1509
+ * @returns The final generation results.
1510
+ */
1511
+ async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1512
+ try {
1513
+ const response = await this.asyncGenerateLLMsText(url, params);
1514
+
1515
+ if (!response.success || 'error' in response) {
1516
+ return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
1517
+ }
1518
+
1519
+ if (!response.id) {
1520
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
1521
+ }
1522
+
1523
+ const jobId = response.id;
1524
+ let generationStatus;
1525
+
1526
+ while (true) {
1527
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
1528
+
1529
+ if ('error' in generationStatus && !generationStatus.success) {
1530
+ return generationStatus;
1531
+ }
1532
+
1533
+ if (generationStatus.status === "completed") {
1534
+ return generationStatus;
1535
+ }
1536
+
1537
+ if (generationStatus.status === "failed") {
1538
+ throw new FirecrawlError(
1539
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
1540
+ 500
1541
+ );
1542
+ }
1543
+
1544
+ if (generationStatus.status !== "processing") {
1545
+ break;
1546
+ }
1547
+
1548
+ await new Promise(resolve => setTimeout(resolve, 2000));
1549
+ }
1550
+
1551
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
1552
+ } catch (error: any) {
1553
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
1554
+ }
1555
+ }
1556
+
1557
+ /**
1558
+ * Initiates a LLMs.txt generation operation without polling.
1559
+ * @param url - The URL to generate LLMs.txt from.
1560
+ * @param params - Parameters for the LLMs.txt generation operation.
1561
+ * @returns The response containing the generation job ID.
1562
+ */
1563
+ async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
1564
+ const headers = this.prepareHeaders();
1565
+ try {
1566
+ const response: AxiosResponse = await this.postRequest(
1567
+ `${this.apiUrl}/v1/llmstxt`,
1568
+ { url, ...params },
1569
+ headers
1570
+ );
1571
+
1572
+ if (response.status === 200) {
1573
+ return response.data;
1574
+ } else {
1575
+ this.handleError(response, "start LLMs.txt generation");
1576
+ }
1577
+ } catch (error: any) {
1578
+ if (error.response?.data?.error) {
1579
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1580
+ } else {
1581
+ throw new FirecrawlError(error.message, 500);
1582
+ }
1583
+ }
1584
+ return { success: false, error: "Internal server error." };
1585
+ }
1586
+
1587
+ /**
1588
+ * Checks the status of a LLMs.txt generation operation.
1589
+ * @param id - The ID of the LLMs.txt generation operation.
1590
+ * @returns The current status and results of the generation operation.
1591
+ */
1592
+ async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1593
+ const headers = this.prepareHeaders();
1594
+ try {
1595
+ const response: AxiosResponse = await this.getRequest(
1596
+ `${this.apiUrl}/v1/llmstxt/${id}`,
1597
+ headers
1598
+ );
1599
+
1600
+ if (response.status === 200) {
1601
+ return response.data;
1602
+ } else if (response.status === 404) {
1603
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
1604
+ } else {
1605
+ this.handleError(response, "check LLMs.txt generation status");
1606
+ }
1607
+ } catch (error: any) {
1608
+ if (error.response?.data?.error) {
1609
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1610
+ } else {
1611
+ throw new FirecrawlError(error.message, 500);
1612
+ }
1613
+ }
1614
+ return { success: false, error: "Internal server error." };
1615
+ }
1462
1616
  }
1463
1617
 
1464
1618
  interface CrawlWatcherEvents {