firecrawl 1.18.0 → 1.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +383 -22
- package/dist/index.d.cts +209 -5
- package/dist/index.d.ts +209 -5
- package/dist/index.js +383 -22
- package/dump.rdb +0 -0
- package/package.json +1 -1
- package/src/index.ts +155 -1
package/src/index.ts
CHANGED
|
@@ -79,7 +79,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
79
79
|
* Defines the options and configurations available for scraping web content.
|
|
80
80
|
*/
|
|
81
81
|
export interface CrawlScrapeOptions {
|
|
82
|
-
formats
|
|
82
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
83
83
|
headers?: Record<string, string>;
|
|
84
84
|
includeTags?: string[];
|
|
85
85
|
excludeTags?: string[];
|
|
@@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
|
|
|
413
413
|
summaries: string[];
|
|
414
414
|
}
|
|
415
415
|
|
|
416
|
+
/**
|
|
417
|
+
* Parameters for LLMs.txt generation operations.
|
|
418
|
+
*/
|
|
419
|
+
export interface GenerateLLMsTextParams {
|
|
420
|
+
/**
|
|
421
|
+
* Maximum number of URLs to process (1-100)
|
|
422
|
+
* @default 10
|
|
423
|
+
*/
|
|
424
|
+
maxUrls?: number;
|
|
425
|
+
/**
|
|
426
|
+
* Whether to show the full LLMs-full.txt in the response
|
|
427
|
+
* @default false
|
|
428
|
+
*/
|
|
429
|
+
showFullText?: boolean;
|
|
430
|
+
/**
|
|
431
|
+
* Experimental flag for streaming
|
|
432
|
+
*/
|
|
433
|
+
__experimental_stream?: boolean;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Response interface for LLMs.txt generation operations.
|
|
438
|
+
*/
|
|
439
|
+
export interface GenerateLLMsTextResponse {
|
|
440
|
+
success: boolean;
|
|
441
|
+
id: string;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Status response interface for LLMs.txt generation operations.
|
|
446
|
+
*/
|
|
447
|
+
export interface GenerateLLMsTextStatusResponse {
|
|
448
|
+
success: boolean;
|
|
449
|
+
data: {
|
|
450
|
+
llmstxt: string;
|
|
451
|
+
llmsfulltxt?: string;
|
|
452
|
+
};
|
|
453
|
+
status: "processing" | "completed" | "failed";
|
|
454
|
+
error?: string;
|
|
455
|
+
expiresAt: string;
|
|
456
|
+
}
|
|
457
|
+
|
|
416
458
|
/**
|
|
417
459
|
* Main class for interacting with the Firecrawl API.
|
|
418
460
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -1459,6 +1501,118 @@ export default class FirecrawlApp {
|
|
|
1459
1501
|
}
|
|
1460
1502
|
return { success: false, error: "Internal server error." };
|
|
1461
1503
|
}
|
|
1504
|
+
|
|
1505
|
+
/**
|
|
1506
|
+
* Generates LLMs.txt for a given URL and polls until completion.
|
|
1507
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1508
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1509
|
+
* @returns The final generation results.
|
|
1510
|
+
*/
|
|
1511
|
+
async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1512
|
+
try {
|
|
1513
|
+
const response = await this.asyncGenerateLLMsText(url, params);
|
|
1514
|
+
|
|
1515
|
+
if (!response.success || 'error' in response) {
|
|
1516
|
+
return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
|
|
1517
|
+
}
|
|
1518
|
+
|
|
1519
|
+
if (!response.id) {
|
|
1520
|
+
throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
const jobId = response.id;
|
|
1524
|
+
let generationStatus;
|
|
1525
|
+
|
|
1526
|
+
while (true) {
|
|
1527
|
+
generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
|
|
1528
|
+
|
|
1529
|
+
if ('error' in generationStatus && !generationStatus.success) {
|
|
1530
|
+
return generationStatus;
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
if (generationStatus.status === "completed") {
|
|
1534
|
+
return generationStatus;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
if (generationStatus.status === "failed") {
|
|
1538
|
+
throw new FirecrawlError(
|
|
1539
|
+
`LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
|
|
1540
|
+
500
|
|
1541
|
+
);
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
if (generationStatus.status !== "processing") {
|
|
1545
|
+
break;
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
|
|
1552
|
+
} catch (error: any) {
|
|
1553
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
/**
|
|
1558
|
+
* Initiates a LLMs.txt generation operation without polling.
|
|
1559
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1560
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1561
|
+
* @returns The response containing the generation job ID.
|
|
1562
|
+
*/
|
|
1563
|
+
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1564
|
+
const headers = this.prepareHeaders();
|
|
1565
|
+
try {
|
|
1566
|
+
const response: AxiosResponse = await this.postRequest(
|
|
1567
|
+
`${this.apiUrl}/v1/llmstxt`,
|
|
1568
|
+
{ url, ...params },
|
|
1569
|
+
headers
|
|
1570
|
+
);
|
|
1571
|
+
|
|
1572
|
+
if (response.status === 200) {
|
|
1573
|
+
return response.data;
|
|
1574
|
+
} else {
|
|
1575
|
+
this.handleError(response, "start LLMs.txt generation");
|
|
1576
|
+
}
|
|
1577
|
+
} catch (error: any) {
|
|
1578
|
+
if (error.response?.data?.error) {
|
|
1579
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1580
|
+
} else {
|
|
1581
|
+
throw new FirecrawlError(error.message, 500);
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
return { success: false, error: "Internal server error." };
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
/**
|
|
1588
|
+
* Checks the status of a LLMs.txt generation operation.
|
|
1589
|
+
* @param id - The ID of the LLMs.txt generation operation.
|
|
1590
|
+
* @returns The current status and results of the generation operation.
|
|
1591
|
+
*/
|
|
1592
|
+
async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1593
|
+
const headers = this.prepareHeaders();
|
|
1594
|
+
try {
|
|
1595
|
+
const response: AxiosResponse = await this.getRequest(
|
|
1596
|
+
`${this.apiUrl}/v1/llmstxt/${id}`,
|
|
1597
|
+
headers
|
|
1598
|
+
);
|
|
1599
|
+
|
|
1600
|
+
if (response.status === 200) {
|
|
1601
|
+
return response.data;
|
|
1602
|
+
} else if (response.status === 404) {
|
|
1603
|
+
throw new FirecrawlError("LLMs.txt generation job not found", 404);
|
|
1604
|
+
} else {
|
|
1605
|
+
this.handleError(response, "check LLMs.txt generation status");
|
|
1606
|
+
}
|
|
1607
|
+
} catch (error: any) {
|
|
1608
|
+
if (error.response?.data?.error) {
|
|
1609
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1610
|
+
} else {
|
|
1611
|
+
throw new FirecrawlError(error.message, 500);
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
return { success: false, error: "Internal server error." };
|
|
1615
|
+
}
|
|
1462
1616
|
}
|
|
1463
1617
|
|
|
1464
1618
|
interface CrawlWatcherEvents {
|