@mendable/firecrawl-js 1.18.1 → 1.18.3-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +393 -22
- package/dist/index.d.cts +217 -5
- package/dist/index.d.ts +217 -5
- package/dist/index.js +393 -22
- package/dump.rdb +0 -0
- package/package.json +1 -1
- package/src/index.ts +177 -4
package/src/index.ts
CHANGED
|
@@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
|
|
|
413
413
|
summaries: string[];
|
|
414
414
|
}
|
|
415
415
|
|
|
416
|
+
/**
|
|
417
|
+
* Parameters for LLMs.txt generation operations.
|
|
418
|
+
*/
|
|
419
|
+
export interface GenerateLLMsTextParams {
|
|
420
|
+
/**
|
|
421
|
+
* Maximum number of URLs to process (1-100)
|
|
422
|
+
* @default 10
|
|
423
|
+
*/
|
|
424
|
+
maxUrls?: number;
|
|
425
|
+
/**
|
|
426
|
+
* Whether to show the full LLMs-full.txt in the response
|
|
427
|
+
* @default false
|
|
428
|
+
*/
|
|
429
|
+
showFullText?: boolean;
|
|
430
|
+
/**
|
|
431
|
+
* Experimental flag for streaming
|
|
432
|
+
*/
|
|
433
|
+
__experimental_stream?: boolean;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Response interface for LLMs.txt generation operations.
|
|
438
|
+
*/
|
|
439
|
+
export interface GenerateLLMsTextResponse {
|
|
440
|
+
success: boolean;
|
|
441
|
+
id: string;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Status response interface for LLMs.txt generation operations.
|
|
446
|
+
*/
|
|
447
|
+
export interface GenerateLLMsTextStatusResponse {
|
|
448
|
+
success: boolean;
|
|
449
|
+
data: {
|
|
450
|
+
llmstxt: string;
|
|
451
|
+
llmsfulltxt?: string;
|
|
452
|
+
};
|
|
453
|
+
status: "processing" | "completed" | "failed";
|
|
454
|
+
error?: string;
|
|
455
|
+
expiresAt: string;
|
|
456
|
+
}
|
|
457
|
+
|
|
416
458
|
/**
|
|
417
459
|
* Main class for interacting with the Firecrawl API.
|
|
418
460
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -1349,10 +1391,22 @@ export default class FirecrawlApp {
|
|
|
1349
1391
|
|
|
1350
1392
|
/**
|
|
1351
1393
|
* Initiates a deep research operation on a given topic and polls until completion.
|
|
1394
|
+
* @param topic - The topic to research.
|
|
1352
1395
|
* @param params - Parameters for the deep research operation.
|
|
1396
|
+
* @param onActivity - Optional callback to receive activity updates in real-time.
|
|
1353
1397
|
* @returns The final research results.
|
|
1354
1398
|
*/
|
|
1355
|
-
async __deepResearch(
|
|
1399
|
+
async __deepResearch(
|
|
1400
|
+
topic: string,
|
|
1401
|
+
params: DeepResearchParams,
|
|
1402
|
+
onActivity?: (activity: {
|
|
1403
|
+
type: string;
|
|
1404
|
+
status: string;
|
|
1405
|
+
message: string;
|
|
1406
|
+
timestamp: string;
|
|
1407
|
+
depth: number;
|
|
1408
|
+
}) => void
|
|
1409
|
+
): Promise<DeepResearchStatusResponse | ErrorResponse> {
|
|
1356
1410
|
try {
|
|
1357
1411
|
const response = await this.__asyncDeepResearch(topic, params);
|
|
1358
1412
|
|
|
@@ -1366,16 +1420,24 @@ export default class FirecrawlApp {
|
|
|
1366
1420
|
|
|
1367
1421
|
const jobId = response.id;
|
|
1368
1422
|
let researchStatus;
|
|
1423
|
+
let lastActivityCount = 0;
|
|
1369
1424
|
|
|
1370
1425
|
while (true) {
|
|
1371
|
-
// console.log("Checking research status...");
|
|
1372
1426
|
researchStatus = await this.__checkDeepResearchStatus(jobId);
|
|
1373
|
-
// console.log("Research status:", researchStatus);
|
|
1374
1427
|
|
|
1375
1428
|
if ('error' in researchStatus && !researchStatus.success) {
|
|
1376
1429
|
return researchStatus;
|
|
1377
1430
|
}
|
|
1378
1431
|
|
|
1432
|
+
// Stream new activities through the callback if provided
|
|
1433
|
+
if (onActivity && researchStatus.activities) {
|
|
1434
|
+
const newActivities = researchStatus.activities.slice(lastActivityCount);
|
|
1435
|
+
for (const activity of newActivities) {
|
|
1436
|
+
onActivity(activity);
|
|
1437
|
+
}
|
|
1438
|
+
lastActivityCount = researchStatus.activities.length;
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1379
1441
|
if (researchStatus.status === "completed") {
|
|
1380
1442
|
return researchStatus;
|
|
1381
1443
|
}
|
|
@@ -1393,7 +1455,6 @@ export default class FirecrawlApp {
|
|
|
1393
1455
|
|
|
1394
1456
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
1395
1457
|
}
|
|
1396
|
-
// console.log("Research status finished:", researchStatus);
|
|
1397
1458
|
|
|
1398
1459
|
return { success: false, error: "Research job terminated unexpectedly" };
|
|
1399
1460
|
} catch (error: any) {
|
|
@@ -1459,6 +1520,118 @@ export default class FirecrawlApp {
|
|
|
1459
1520
|
}
|
|
1460
1521
|
return { success: false, error: "Internal server error." };
|
|
1461
1522
|
}
|
|
1523
|
+
|
|
1524
|
+
/**
|
|
1525
|
+
* Generates LLMs.txt for a given URL and polls until completion.
|
|
1526
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1527
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1528
|
+
* @returns The final generation results.
|
|
1529
|
+
*/
|
|
1530
|
+
async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1531
|
+
try {
|
|
1532
|
+
const response = await this.asyncGenerateLLMsText(url, params);
|
|
1533
|
+
|
|
1534
|
+
if (!response.success || 'error' in response) {
|
|
1535
|
+
return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
if (!response.id) {
|
|
1539
|
+
throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
const jobId = response.id;
|
|
1543
|
+
let generationStatus;
|
|
1544
|
+
|
|
1545
|
+
while (true) {
|
|
1546
|
+
generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
|
|
1547
|
+
|
|
1548
|
+
if ('error' in generationStatus && !generationStatus.success) {
|
|
1549
|
+
return generationStatus;
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
if (generationStatus.status === "completed") {
|
|
1553
|
+
return generationStatus;
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
if (generationStatus.status === "failed") {
|
|
1557
|
+
throw new FirecrawlError(
|
|
1558
|
+
`LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
|
|
1559
|
+
500
|
|
1560
|
+
);
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
if (generationStatus.status !== "processing") {
|
|
1564
|
+
break;
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1570
|
+
return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
|
|
1571
|
+
} catch (error: any) {
|
|
1572
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
/**
|
|
1577
|
+
* Initiates a LLMs.txt generation operation without polling.
|
|
1578
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1579
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1580
|
+
* @returns The response containing the generation job ID.
|
|
1581
|
+
*/
|
|
1582
|
+
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1583
|
+
const headers = this.prepareHeaders();
|
|
1584
|
+
try {
|
|
1585
|
+
const response: AxiosResponse = await this.postRequest(
|
|
1586
|
+
`${this.apiUrl}/v1/llmstxt`,
|
|
1587
|
+
{ url, ...params },
|
|
1588
|
+
headers
|
|
1589
|
+
);
|
|
1590
|
+
|
|
1591
|
+
if (response.status === 200) {
|
|
1592
|
+
return response.data;
|
|
1593
|
+
} else {
|
|
1594
|
+
this.handleError(response, "start LLMs.txt generation");
|
|
1595
|
+
}
|
|
1596
|
+
} catch (error: any) {
|
|
1597
|
+
if (error.response?.data?.error) {
|
|
1598
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1599
|
+
} else {
|
|
1600
|
+
throw new FirecrawlError(error.message, 500);
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
return { success: false, error: "Internal server error." };
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
/**
|
|
1607
|
+
* Checks the status of a LLMs.txt generation operation.
|
|
1608
|
+
* @param id - The ID of the LLMs.txt generation operation.
|
|
1609
|
+
* @returns The current status and results of the generation operation.
|
|
1610
|
+
*/
|
|
1611
|
+
async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1612
|
+
const headers = this.prepareHeaders();
|
|
1613
|
+
try {
|
|
1614
|
+
const response: AxiosResponse = await this.getRequest(
|
|
1615
|
+
`${this.apiUrl}/v1/llmstxt/${id}`,
|
|
1616
|
+
headers
|
|
1617
|
+
);
|
|
1618
|
+
|
|
1619
|
+
if (response.status === 200) {
|
|
1620
|
+
return response.data;
|
|
1621
|
+
} else if (response.status === 404) {
|
|
1622
|
+
throw new FirecrawlError("LLMs.txt generation job not found", 404);
|
|
1623
|
+
} else {
|
|
1624
|
+
this.handleError(response, "check LLMs.txt generation status");
|
|
1625
|
+
}
|
|
1626
|
+
} catch (error: any) {
|
|
1627
|
+
if (error.response?.data?.error) {
|
|
1628
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1629
|
+
} else {
|
|
1630
|
+
throw new FirecrawlError(error.message, 500);
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
return { success: false, error: "Internal server error." };
|
|
1634
|
+
}
|
|
1462
1635
|
}
|
|
1463
1636
|
|
|
1464
1637
|
interface CrawlWatcherEvents {
|