firecrawl 1.18.1 → 1.18.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.ts +184 -6
- package/dist/index.cjs +0 -778
- package/dist/index.d.cts +0 -452
- package/dist/index.d.ts +0 -452
- package/dist/index.js +0 -742
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -364,6 +364,11 @@ export interface DeepResearchParams {
|
|
|
364
364
|
* @default 270
|
|
365
365
|
*/
|
|
366
366
|
timeLimit?: number;
|
|
367
|
+
/**
|
|
368
|
+
* Maximum number of URLs to analyze (1-1000)
|
|
369
|
+
* @default 20
|
|
370
|
+
*/
|
|
371
|
+
maxUrls?: number;
|
|
367
372
|
/**
|
|
368
373
|
* Experimental flag for streaming steps
|
|
369
374
|
*/
|
|
@@ -413,6 +418,48 @@ export interface DeepResearchStatusResponse {
|
|
|
413
418
|
summaries: string[];
|
|
414
419
|
}
|
|
415
420
|
|
|
421
|
+
/**
|
|
422
|
+
* Parameters for LLMs.txt generation operations.
|
|
423
|
+
*/
|
|
424
|
+
export interface GenerateLLMsTextParams {
|
|
425
|
+
/**
|
|
426
|
+
* Maximum number of URLs to process (1-100)
|
|
427
|
+
* @default 10
|
|
428
|
+
*/
|
|
429
|
+
maxUrls?: number;
|
|
430
|
+
/**
|
|
431
|
+
* Whether to show the full LLMs-full.txt in the response
|
|
432
|
+
* @default false
|
|
433
|
+
*/
|
|
434
|
+
showFullText?: boolean;
|
|
435
|
+
/**
|
|
436
|
+
* Experimental flag for streaming
|
|
437
|
+
*/
|
|
438
|
+
__experimental_stream?: boolean;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* Response interface for LLMs.txt generation operations.
|
|
443
|
+
*/
|
|
444
|
+
export interface GenerateLLMsTextResponse {
|
|
445
|
+
success: boolean;
|
|
446
|
+
id: string;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Status response interface for LLMs.txt generation operations.
|
|
451
|
+
*/
|
|
452
|
+
export interface GenerateLLMsTextStatusResponse {
|
|
453
|
+
success: boolean;
|
|
454
|
+
data: {
|
|
455
|
+
llmstxt: string;
|
|
456
|
+
llmsfulltxt?: string;
|
|
457
|
+
};
|
|
458
|
+
status: "processing" | "completed" | "failed";
|
|
459
|
+
error?: string;
|
|
460
|
+
expiresAt: string;
|
|
461
|
+
}
|
|
462
|
+
|
|
416
463
|
/**
|
|
417
464
|
* Main class for interacting with the Firecrawl API.
|
|
418
465
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -493,7 +540,7 @@ export default class FirecrawlApp {
|
|
|
493
540
|
const response: AxiosResponse = await axios.post(
|
|
494
541
|
this.apiUrl + `/v1/scrape`,
|
|
495
542
|
jsonData,
|
|
496
|
-
{ headers }
|
|
543
|
+
{ headers, timeout: params?.timeout !== undefined ? (params.timeout + 5000) : undefined },
|
|
497
544
|
);
|
|
498
545
|
if (response.status === 200) {
|
|
499
546
|
const responseData = response.data;
|
|
@@ -1220,7 +1267,7 @@ export default class FirecrawlApp {
|
|
|
1220
1267
|
data: any,
|
|
1221
1268
|
headers: AxiosRequestHeaders
|
|
1222
1269
|
): Promise<AxiosResponse> {
|
|
1223
|
-
return axios.post(url, data, { headers });
|
|
1270
|
+
return axios.post(url, data, { headers, timeout: (data?.timeout ? (data.timeout + 5000) : undefined) });
|
|
1224
1271
|
}
|
|
1225
1272
|
|
|
1226
1273
|
/**
|
|
@@ -1349,10 +1396,22 @@ export default class FirecrawlApp {
|
|
|
1349
1396
|
|
|
1350
1397
|
/**
|
|
1351
1398
|
* Initiates a deep research operation on a given topic and polls until completion.
|
|
1399
|
+
* @param topic - The topic to research.
|
|
1352
1400
|
* @param params - Parameters for the deep research operation.
|
|
1401
|
+
* @param onActivity - Optional callback to receive activity updates in real-time.
|
|
1353
1402
|
* @returns The final research results.
|
|
1354
1403
|
*/
|
|
1355
|
-
async __deepResearch(
|
|
1404
|
+
async __deepResearch(
|
|
1405
|
+
topic: string,
|
|
1406
|
+
params: DeepResearchParams,
|
|
1407
|
+
onActivity?: (activity: {
|
|
1408
|
+
type: string;
|
|
1409
|
+
status: string;
|
|
1410
|
+
message: string;
|
|
1411
|
+
timestamp: string;
|
|
1412
|
+
depth: number;
|
|
1413
|
+
}) => void
|
|
1414
|
+
): Promise<DeepResearchStatusResponse | ErrorResponse> {
|
|
1356
1415
|
try {
|
|
1357
1416
|
const response = await this.__asyncDeepResearch(topic, params);
|
|
1358
1417
|
|
|
@@ -1366,16 +1425,24 @@ export default class FirecrawlApp {
|
|
|
1366
1425
|
|
|
1367
1426
|
const jobId = response.id;
|
|
1368
1427
|
let researchStatus;
|
|
1428
|
+
let lastActivityCount = 0;
|
|
1369
1429
|
|
|
1370
1430
|
while (true) {
|
|
1371
|
-
// console.log("Checking research status...");
|
|
1372
1431
|
researchStatus = await this.__checkDeepResearchStatus(jobId);
|
|
1373
|
-
// console.log("Research status:", researchStatus);
|
|
1374
1432
|
|
|
1375
1433
|
if ('error' in researchStatus && !researchStatus.success) {
|
|
1376
1434
|
return researchStatus;
|
|
1377
1435
|
}
|
|
1378
1436
|
|
|
1437
|
+
// Stream new activities through the callback if provided
|
|
1438
|
+
if (onActivity && researchStatus.activities) {
|
|
1439
|
+
const newActivities = researchStatus.activities.slice(lastActivityCount);
|
|
1440
|
+
for (const activity of newActivities) {
|
|
1441
|
+
onActivity(activity);
|
|
1442
|
+
}
|
|
1443
|
+
lastActivityCount = researchStatus.activities.length;
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1379
1446
|
if (researchStatus.status === "completed") {
|
|
1380
1447
|
return researchStatus;
|
|
1381
1448
|
}
|
|
@@ -1393,7 +1460,6 @@ export default class FirecrawlApp {
|
|
|
1393
1460
|
|
|
1394
1461
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
1395
1462
|
}
|
|
1396
|
-
// console.log("Research status finished:", researchStatus);
|
|
1397
1463
|
|
|
1398
1464
|
return { success: false, error: "Research job terminated unexpectedly" };
|
|
1399
1465
|
} catch (error: any) {
|
|
@@ -1459,6 +1525,118 @@ export default class FirecrawlApp {
|
|
|
1459
1525
|
}
|
|
1460
1526
|
return { success: false, error: "Internal server error." };
|
|
1461
1527
|
}
|
|
1528
|
+
|
|
1529
|
+
/**
|
|
1530
|
+
* Generates LLMs.txt for a given URL and polls until completion.
|
|
1531
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1532
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1533
|
+
* @returns The final generation results.
|
|
1534
|
+
*/
|
|
1535
|
+
async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1536
|
+
try {
|
|
1537
|
+
const response = await this.asyncGenerateLLMsText(url, params);
|
|
1538
|
+
|
|
1539
|
+
if (!response.success || 'error' in response) {
|
|
1540
|
+
return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
if (!response.id) {
|
|
1544
|
+
throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
const jobId = response.id;
|
|
1548
|
+
let generationStatus;
|
|
1549
|
+
|
|
1550
|
+
while (true) {
|
|
1551
|
+
generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
|
|
1552
|
+
|
|
1553
|
+
if ('error' in generationStatus && !generationStatus.success) {
|
|
1554
|
+
return generationStatus;
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
if (generationStatus.status === "completed") {
|
|
1558
|
+
return generationStatus;
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
if (generationStatus.status === "failed") {
|
|
1562
|
+
throw new FirecrawlError(
|
|
1563
|
+
`LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
|
|
1564
|
+
500
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
if (generationStatus.status !== "processing") {
|
|
1569
|
+
break;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
|
|
1576
|
+
} catch (error: any) {
|
|
1577
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
/**
|
|
1582
|
+
* Initiates a LLMs.txt generation operation without polling.
|
|
1583
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
1584
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
1585
|
+
* @returns The response containing the generation job ID.
|
|
1586
|
+
*/
|
|
1587
|
+
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1588
|
+
const headers = this.prepareHeaders();
|
|
1589
|
+
try {
|
|
1590
|
+
const response: AxiosResponse = await this.postRequest(
|
|
1591
|
+
`${this.apiUrl}/v1/llmstxt`,
|
|
1592
|
+
{ url, ...params },
|
|
1593
|
+
headers
|
|
1594
|
+
);
|
|
1595
|
+
|
|
1596
|
+
if (response.status === 200) {
|
|
1597
|
+
return response.data;
|
|
1598
|
+
} else {
|
|
1599
|
+
this.handleError(response, "start LLMs.txt generation");
|
|
1600
|
+
}
|
|
1601
|
+
} catch (error: any) {
|
|
1602
|
+
if (error.response?.data?.error) {
|
|
1603
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1604
|
+
} else {
|
|
1605
|
+
throw new FirecrawlError(error.message, 500);
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
return { success: false, error: "Internal server error." };
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
/**
|
|
1612
|
+
* Checks the status of a LLMs.txt generation operation.
|
|
1613
|
+
* @param id - The ID of the LLMs.txt generation operation.
|
|
1614
|
+
* @returns The current status and results of the generation operation.
|
|
1615
|
+
*/
|
|
1616
|
+
async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
|
|
1617
|
+
const headers = this.prepareHeaders();
|
|
1618
|
+
try {
|
|
1619
|
+
const response: AxiosResponse = await this.getRequest(
|
|
1620
|
+
`${this.apiUrl}/v1/llmstxt/${id}`,
|
|
1621
|
+
headers
|
|
1622
|
+
);
|
|
1623
|
+
|
|
1624
|
+
if (response.status === 200) {
|
|
1625
|
+
return response.data;
|
|
1626
|
+
} else if (response.status === 404) {
|
|
1627
|
+
throw new FirecrawlError("LLMs.txt generation job not found", 404);
|
|
1628
|
+
} else {
|
|
1629
|
+
this.handleError(response, "check LLMs.txt generation status");
|
|
1630
|
+
}
|
|
1631
|
+
} catch (error: any) {
|
|
1632
|
+
if (error.response?.data?.error) {
|
|
1633
|
+
throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
|
|
1634
|
+
} else {
|
|
1635
|
+
throw new FirecrawlError(error.message, 500);
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
return { success: false, error: "Internal server error." };
|
|
1639
|
+
}
|
|
1462
1640
|
}
|
|
1463
1641
|
|
|
1464
1642
|
interface CrawlWatcherEvents {
|