@mendable/firecrawl-js 1.18.1 → 1.18.3-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
413
413
  summaries: string[];
414
414
  }
415
415
 
416
+ /**
417
+ * Parameters for LLMs.txt generation operations.
418
+ */
419
+ export interface GenerateLLMsTextParams {
420
+ /**
421
+ * Maximum number of URLs to process (1-100)
422
+ * @default 10
423
+ */
424
+ maxUrls?: number;
425
+ /**
426
+ * Whether to show the full LLMs-full.txt in the response
427
+ * @default false
428
+ */
429
+ showFullText?: boolean;
430
+ /**
431
+ * Experimental flag for streaming
432
+ */
433
+ __experimental_stream?: boolean;
434
+ }
435
+
436
+ /**
437
+ * Response interface for LLMs.txt generation operations.
438
+ */
439
+ export interface GenerateLLMsTextResponse {
440
+ success: boolean;
441
+ id: string;
442
+ }
443
+
444
+ /**
445
+ * Status response interface for LLMs.txt generation operations.
446
+ */
447
+ export interface GenerateLLMsTextStatusResponse {
448
+ success: boolean;
449
+ data: {
450
+ llmstxt: string;
451
+ llmsfulltxt?: string;
452
+ };
453
+ status: "processing" | "completed" | "failed";
454
+ error?: string;
455
+ expiresAt: string;
456
+ }
457
+
416
458
  /**
417
459
  * Main class for interacting with the Firecrawl API.
418
460
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -1349,10 +1391,22 @@ export default class FirecrawlApp {
1349
1391
 
1350
1392
  /**
1351
1393
  * Initiates a deep research operation on a given topic and polls until completion.
1394
+ * @param topic - The topic to research.
1352
1395
  * @param params - Parameters for the deep research operation.
1396
+ * @param onActivity - Optional callback to receive activity updates in real-time.
1353
1397
  * @returns The final research results.
1354
1398
  */
1355
- async __deepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchStatusResponse | ErrorResponse> {
1399
+ async __deepResearch(
1400
+ topic: string,
1401
+ params: DeepResearchParams,
1402
+ onActivity?: (activity: {
1403
+ type: string;
1404
+ status: string;
1405
+ message: string;
1406
+ timestamp: string;
1407
+ depth: number;
1408
+ }) => void
1409
+ ): Promise<DeepResearchStatusResponse | ErrorResponse> {
1356
1410
  try {
1357
1411
  const response = await this.__asyncDeepResearch(topic, params);
1358
1412
 
@@ -1366,16 +1420,24 @@ export default class FirecrawlApp {
1366
1420
 
1367
1421
  const jobId = response.id;
1368
1422
  let researchStatus;
1423
+ let lastActivityCount = 0;
1369
1424
 
1370
1425
  while (true) {
1371
- // console.log("Checking research status...");
1372
1426
  researchStatus = await this.__checkDeepResearchStatus(jobId);
1373
- // console.log("Research status:", researchStatus);
1374
1427
 
1375
1428
  if ('error' in researchStatus && !researchStatus.success) {
1376
1429
  return researchStatus;
1377
1430
  }
1378
1431
 
1432
+ // Stream new activities through the callback if provided
1433
+ if (onActivity && researchStatus.activities) {
1434
+ const newActivities = researchStatus.activities.slice(lastActivityCount);
1435
+ for (const activity of newActivities) {
1436
+ onActivity(activity);
1437
+ }
1438
+ lastActivityCount = researchStatus.activities.length;
1439
+ }
1440
+
1379
1441
  if (researchStatus.status === "completed") {
1380
1442
  return researchStatus;
1381
1443
  }
@@ -1393,7 +1455,6 @@ export default class FirecrawlApp {
1393
1455
 
1394
1456
  await new Promise(resolve => setTimeout(resolve, 2000));
1395
1457
  }
1396
- // console.log("Research status finished:", researchStatus);
1397
1458
 
1398
1459
  return { success: false, error: "Research job terminated unexpectedly" };
1399
1460
  } catch (error: any) {
@@ -1459,6 +1520,118 @@ export default class FirecrawlApp {
1459
1520
  }
1460
1521
  return { success: false, error: "Internal server error." };
1461
1522
  }
1523
+
1524
+ /**
1525
+ * Generates LLMs.txt for a given URL and polls until completion.
1526
+ * @param url - The URL to generate LLMs.txt from.
1527
+ * @param params - Parameters for the LLMs.txt generation operation.
1528
+ * @returns The final generation results.
1529
+ */
1530
+ async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1531
+ try {
1532
+ const response = await this.asyncGenerateLLMsText(url, params);
1533
+
1534
+ if (!response.success || 'error' in response) {
1535
+ return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
1536
+ }
1537
+
1538
+ if (!response.id) {
1539
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
1540
+ }
1541
+
1542
+ const jobId = response.id;
1543
+ let generationStatus;
1544
+
1545
+ while (true) {
1546
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
1547
+
1548
+ if ('error' in generationStatus && !generationStatus.success) {
1549
+ return generationStatus;
1550
+ }
1551
+
1552
+ if (generationStatus.status === "completed") {
1553
+ return generationStatus;
1554
+ }
1555
+
1556
+ if (generationStatus.status === "failed") {
1557
+ throw new FirecrawlError(
1558
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
1559
+ 500
1560
+ );
1561
+ }
1562
+
1563
+ if (generationStatus.status !== "processing") {
1564
+ break;
1565
+ }
1566
+
1567
+ await new Promise(resolve => setTimeout(resolve, 2000));
1568
+ }
1569
+
1570
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
1571
+ } catch (error: any) {
1572
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
1573
+ }
1574
+ }
1575
+
1576
+ /**
1577
+ * Initiates a LLMs.txt generation operation without polling.
1578
+ * @param url - The URL to generate LLMs.txt from.
1579
+ * @param params - Parameters for the LLMs.txt generation operation.
1580
+ * @returns The response containing the generation job ID.
1581
+ */
1582
+ async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
1583
+ const headers = this.prepareHeaders();
1584
+ try {
1585
+ const response: AxiosResponse = await this.postRequest(
1586
+ `${this.apiUrl}/v1/llmstxt`,
1587
+ { url, ...params },
1588
+ headers
1589
+ );
1590
+
1591
+ if (response.status === 200) {
1592
+ return response.data;
1593
+ } else {
1594
+ this.handleError(response, "start LLMs.txt generation");
1595
+ }
1596
+ } catch (error: any) {
1597
+ if (error.response?.data?.error) {
1598
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1599
+ } else {
1600
+ throw new FirecrawlError(error.message, 500);
1601
+ }
1602
+ }
1603
+ return { success: false, error: "Internal server error." };
1604
+ }
1605
+
1606
+ /**
1607
+ * Checks the status of a LLMs.txt generation operation.
1608
+ * @param id - The ID of the LLMs.txt generation operation.
1609
+ * @returns The current status and results of the generation operation.
1610
+ */
1611
+ async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1612
+ const headers = this.prepareHeaders();
1613
+ try {
1614
+ const response: AxiosResponse = await this.getRequest(
1615
+ `${this.apiUrl}/v1/llmstxt/${id}`,
1616
+ headers
1617
+ );
1618
+
1619
+ if (response.status === 200) {
1620
+ return response.data;
1621
+ } else if (response.status === 404) {
1622
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
1623
+ } else {
1624
+ this.handleError(response, "check LLMs.txt generation status");
1625
+ }
1626
+ } catch (error: any) {
1627
+ if (error.response?.data?.error) {
1628
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1629
+ } else {
1630
+ throw new FirecrawlError(error.message, 500);
1631
+ }
1632
+ }
1633
+ return { success: false, error: "Internal server error." };
1634
+ }
1462
1635
  }
1463
1636
 
1464
1637
  interface CrawlWatcherEvents {