firecrawl 1.18.1 → 1.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.18.1",
3
+ "version": "1.18.4",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -364,6 +364,11 @@ export interface DeepResearchParams {
364
364
  * @default 270
365
365
  */
366
366
  timeLimit?: number;
367
+ /**
368
+ * Maximum number of URLs to analyze (1-1000)
369
+ * @default 20
370
+ */
371
+ maxUrls?: number;
367
372
  /**
368
373
  * Experimental flag for streaming steps
369
374
  */
@@ -413,6 +418,48 @@ export interface DeepResearchStatusResponse {
413
418
  summaries: string[];
414
419
  }
415
420
 
421
+ /**
422
+ * Parameters for LLMs.txt generation operations.
423
+ */
424
+ export interface GenerateLLMsTextParams {
425
+ /**
426
+ * Maximum number of URLs to process (1-100)
427
+ * @default 10
428
+ */
429
+ maxUrls?: number;
430
+ /**
431
+ * Whether to show the full LLMs-full.txt in the response
432
+ * @default false
433
+ */
434
+ showFullText?: boolean;
435
+ /**
436
+ * Experimental flag for streaming
437
+ */
438
+ __experimental_stream?: boolean;
439
+ }
440
+
441
+ /**
442
+ * Response interface for LLMs.txt generation operations.
443
+ */
444
+ export interface GenerateLLMsTextResponse {
445
+ success: boolean;
446
+ id: string;
447
+ }
448
+
449
+ /**
450
+ * Status response interface for LLMs.txt generation operations.
451
+ */
452
+ export interface GenerateLLMsTextStatusResponse {
453
+ success: boolean;
454
+ data: {
455
+ llmstxt: string;
456
+ llmsfulltxt?: string;
457
+ };
458
+ status: "processing" | "completed" | "failed";
459
+ error?: string;
460
+ expiresAt: string;
461
+ }
462
+
416
463
  /**
417
464
  * Main class for interacting with the Firecrawl API.
418
465
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -493,7 +540,7 @@ export default class FirecrawlApp {
493
540
  const response: AxiosResponse = await axios.post(
494
541
  this.apiUrl + `/v1/scrape`,
495
542
  jsonData,
496
- { headers }
543
+ { headers, timeout: params?.timeout !== undefined ? (params.timeout + 5000) : undefined },
497
544
  );
498
545
  if (response.status === 200) {
499
546
  const responseData = response.data;
@@ -1220,7 +1267,7 @@ export default class FirecrawlApp {
1220
1267
  data: any,
1221
1268
  headers: AxiosRequestHeaders
1222
1269
  ): Promise<AxiosResponse> {
1223
- return axios.post(url, data, { headers });
1270
+ return axios.post(url, data, { headers, timeout: (data?.timeout ? (data.timeout + 5000) : undefined) });
1224
1271
  }
1225
1272
 
1226
1273
  /**
@@ -1349,10 +1396,22 @@ export default class FirecrawlApp {
1349
1396
 
1350
1397
  /**
1351
1398
  * Initiates a deep research operation on a given topic and polls until completion.
1399
+ * @param topic - The topic to research.
1352
1400
  * @param params - Parameters for the deep research operation.
1401
+ * @param onActivity - Optional callback to receive activity updates in real-time.
1353
1402
  * @returns The final research results.
1354
1403
  */
1355
- async __deepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchStatusResponse | ErrorResponse> {
1404
+ async __deepResearch(
1405
+ topic: string,
1406
+ params: DeepResearchParams,
1407
+ onActivity?: (activity: {
1408
+ type: string;
1409
+ status: string;
1410
+ message: string;
1411
+ timestamp: string;
1412
+ depth: number;
1413
+ }) => void
1414
+ ): Promise<DeepResearchStatusResponse | ErrorResponse> {
1356
1415
  try {
1357
1416
  const response = await this.__asyncDeepResearch(topic, params);
1358
1417
 
@@ -1366,16 +1425,24 @@ export default class FirecrawlApp {
1366
1425
 
1367
1426
  const jobId = response.id;
1368
1427
  let researchStatus;
1428
+ let lastActivityCount = 0;
1369
1429
 
1370
1430
  while (true) {
1371
- // console.log("Checking research status...");
1372
1431
  researchStatus = await this.__checkDeepResearchStatus(jobId);
1373
- // console.log("Research status:", researchStatus);
1374
1432
 
1375
1433
  if ('error' in researchStatus && !researchStatus.success) {
1376
1434
  return researchStatus;
1377
1435
  }
1378
1436
 
1437
+ // Stream new activities through the callback if provided
1438
+ if (onActivity && researchStatus.activities) {
1439
+ const newActivities = researchStatus.activities.slice(lastActivityCount);
1440
+ for (const activity of newActivities) {
1441
+ onActivity(activity);
1442
+ }
1443
+ lastActivityCount = researchStatus.activities.length;
1444
+ }
1445
+
1379
1446
  if (researchStatus.status === "completed") {
1380
1447
  return researchStatus;
1381
1448
  }
@@ -1393,7 +1460,6 @@ export default class FirecrawlApp {
1393
1460
 
1394
1461
  await new Promise(resolve => setTimeout(resolve, 2000));
1395
1462
  }
1396
- // console.log("Research status finished:", researchStatus);
1397
1463
 
1398
1464
  return { success: false, error: "Research job terminated unexpectedly" };
1399
1465
  } catch (error: any) {
@@ -1459,6 +1525,118 @@ export default class FirecrawlApp {
1459
1525
  }
1460
1526
  return { success: false, error: "Internal server error." };
1461
1527
  }
1528
+
1529
+ /**
1530
+ * Generates LLMs.txt for a given URL and polls until completion.
1531
+ * @param url - The URL to generate LLMs.txt from.
1532
+ * @param params - Parameters for the LLMs.txt generation operation.
1533
+ * @returns The final generation results.
1534
+ */
1535
+ async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1536
+ try {
1537
+ const response = await this.asyncGenerateLLMsText(url, params);
1538
+
1539
+ if (!response.success || 'error' in response) {
1540
+ return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
1541
+ }
1542
+
1543
+ if (!response.id) {
1544
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
1545
+ }
1546
+
1547
+ const jobId = response.id;
1548
+ let generationStatus;
1549
+
1550
+ while (true) {
1551
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
1552
+
1553
+ if ('error' in generationStatus && !generationStatus.success) {
1554
+ return generationStatus;
1555
+ }
1556
+
1557
+ if (generationStatus.status === "completed") {
1558
+ return generationStatus;
1559
+ }
1560
+
1561
+ if (generationStatus.status === "failed") {
1562
+ throw new FirecrawlError(
1563
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
1564
+ 500
1565
+ );
1566
+ }
1567
+
1568
+ if (generationStatus.status !== "processing") {
1569
+ break;
1570
+ }
1571
+
1572
+ await new Promise(resolve => setTimeout(resolve, 2000));
1573
+ }
1574
+
1575
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
1576
+ } catch (error: any) {
1577
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
1578
+ }
1579
+ }
1580
+
1581
+ /**
1582
+ * Initiates a LLMs.txt generation operation without polling.
1583
+ * @param url - The URL to generate LLMs.txt from.
1584
+ * @param params - Parameters for the LLMs.txt generation operation.
1585
+ * @returns The response containing the generation job ID.
1586
+ */
1587
+ async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
1588
+ const headers = this.prepareHeaders();
1589
+ try {
1590
+ const response: AxiosResponse = await this.postRequest(
1591
+ `${this.apiUrl}/v1/llmstxt`,
1592
+ { url, ...params },
1593
+ headers
1594
+ );
1595
+
1596
+ if (response.status === 200) {
1597
+ return response.data;
1598
+ } else {
1599
+ this.handleError(response, "start LLMs.txt generation");
1600
+ }
1601
+ } catch (error: any) {
1602
+ if (error.response?.data?.error) {
1603
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1604
+ } else {
1605
+ throw new FirecrawlError(error.message, 500);
1606
+ }
1607
+ }
1608
+ return { success: false, error: "Internal server error." };
1609
+ }
1610
+
1611
+ /**
1612
+ * Checks the status of a LLMs.txt generation operation.
1613
+ * @param id - The ID of the LLMs.txt generation operation.
1614
+ * @returns The current status and results of the generation operation.
1615
+ */
1616
+ async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1617
+ const headers = this.prepareHeaders();
1618
+ try {
1619
+ const response: AxiosResponse = await this.getRequest(
1620
+ `${this.apiUrl}/v1/llmstxt/${id}`,
1621
+ headers
1622
+ );
1623
+
1624
+ if (response.status === 200) {
1625
+ return response.data;
1626
+ } else if (response.status === 404) {
1627
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
1628
+ } else {
1629
+ this.handleError(response, "check LLMs.txt generation status");
1630
+ }
1631
+ } catch (error: any) {
1632
+ if (error.response?.data?.error) {
1633
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1634
+ } else {
1635
+ throw new FirecrawlError(error.message, 500);
1636
+ }
1637
+ }
1638
+ return { success: false, error: "Internal server error." };
1639
+ }
1462
1640
  }
1463
1641
 
1464
1642
  interface CrawlWatcherEvents {