firecrawl 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/README.md +195 -0
- package/build/index.js +257 -0
- package/build_and_publish.sh +34 -0
- package/jest.config.cjs +5 -0
- package/package.json +52 -0
- package/src/__tests__/e2e_withAuth/index.test.ts +156 -0
- package/src/__tests__/fixtures/scrape.json +22 -0
- package/src/__tests__/index.test.ts +48 -0
- package/src/index.ts +411 -0
- package/tsconfig.json +111 -0
- package/types/index.d.ts +189 -0
package/types/index.d.ts
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
/**
|
|
4
|
+
* Configuration interface for FirecrawlApp.
|
|
5
|
+
*/
|
|
6
|
+
export interface FirecrawlAppConfig {
|
|
7
|
+
apiKey?: string | null;
|
|
8
|
+
apiUrl?: string | null;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Metadata for a Firecrawl document.
|
|
12
|
+
*/
|
|
13
|
+
export interface FirecrawlDocumentMetadata {
|
|
14
|
+
title?: string;
|
|
15
|
+
description?: string;
|
|
16
|
+
language?: string;
|
|
17
|
+
keywords?: string;
|
|
18
|
+
robots?: string;
|
|
19
|
+
ogTitle?: string;
|
|
20
|
+
ogDescription?: string;
|
|
21
|
+
ogUrl?: string;
|
|
22
|
+
ogImage?: string;
|
|
23
|
+
ogAudio?: string;
|
|
24
|
+
ogDeterminer?: string;
|
|
25
|
+
ogLocale?: string;
|
|
26
|
+
ogLocaleAlternate?: string[];
|
|
27
|
+
ogSiteName?: string;
|
|
28
|
+
ogVideo?: string;
|
|
29
|
+
dctermsCreated?: string;
|
|
30
|
+
dcDateCreated?: string;
|
|
31
|
+
dcDate?: string;
|
|
32
|
+
dctermsType?: string;
|
|
33
|
+
dcType?: string;
|
|
34
|
+
dctermsAudience?: string;
|
|
35
|
+
dctermsSubject?: string;
|
|
36
|
+
dcSubject?: string;
|
|
37
|
+
dcDescription?: string;
|
|
38
|
+
dctermsKeywords?: string;
|
|
39
|
+
modifiedTime?: string;
|
|
40
|
+
publishedTime?: string;
|
|
41
|
+
articleTag?: string;
|
|
42
|
+
articleSection?: string;
|
|
43
|
+
sourceURL?: string;
|
|
44
|
+
pageStatusCode?: number;
|
|
45
|
+
pageError?: string;
|
|
46
|
+
[key: string]: any;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Document interface for Firecrawl.
|
|
50
|
+
*/
|
|
51
|
+
export interface FirecrawlDocument {
|
|
52
|
+
id?: string;
|
|
53
|
+
url?: string;
|
|
54
|
+
content: string;
|
|
55
|
+
markdown?: string;
|
|
56
|
+
html?: string;
|
|
57
|
+
llm_extraction?: Record<string, any>;
|
|
58
|
+
createdAt?: Date;
|
|
59
|
+
updatedAt?: Date;
|
|
60
|
+
type?: string;
|
|
61
|
+
metadata: FirecrawlDocumentMetadata;
|
|
62
|
+
childrenLinks?: string[];
|
|
63
|
+
provider?: string;
|
|
64
|
+
warning?: string;
|
|
65
|
+
index?: number;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Response interface for scraping operations.
|
|
69
|
+
*/
|
|
70
|
+
export interface ScrapeResponse {
|
|
71
|
+
success: boolean;
|
|
72
|
+
data?: FirecrawlDocument;
|
|
73
|
+
error?: string;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Response interface for searching operations.
|
|
77
|
+
*/
|
|
78
|
+
export interface SearchResponse {
|
|
79
|
+
success: boolean;
|
|
80
|
+
data?: FirecrawlDocument[];
|
|
81
|
+
error?: string;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Response interface for crawling operations.
|
|
85
|
+
*/
|
|
86
|
+
export interface CrawlResponse {
|
|
87
|
+
success: boolean;
|
|
88
|
+
jobId?: string;
|
|
89
|
+
data?: FirecrawlDocument[];
|
|
90
|
+
error?: string;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Response interface for job status checks.
|
|
94
|
+
*/
|
|
95
|
+
export interface JobStatusResponse {
|
|
96
|
+
success: boolean;
|
|
97
|
+
status: string;
|
|
98
|
+
jobId?: string;
|
|
99
|
+
data?: FirecrawlDocument[];
|
|
100
|
+
partial_data?: FirecrawlDocument[];
|
|
101
|
+
error?: string;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Generic parameter interface.
|
|
105
|
+
*/
|
|
106
|
+
export interface Params {
|
|
107
|
+
[key: string]: any;
|
|
108
|
+
extractorOptions?: {
|
|
109
|
+
extractionSchema: z.ZodSchema | any;
|
|
110
|
+
mode?: "llm-extraction";
|
|
111
|
+
extractionPrompt?: string;
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Main class for interacting with the Firecrawl API.
|
|
116
|
+
*/
|
|
117
|
+
export default class FirecrawlApp {
|
|
118
|
+
private apiKey;
|
|
119
|
+
private apiUrl;
|
|
120
|
+
/**
|
|
121
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
122
|
+
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
123
|
+
*/
|
|
124
|
+
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
|
125
|
+
/**
|
|
126
|
+
* Scrapes a URL using the Firecrawl API.
|
|
127
|
+
* @param {string} url - The URL to scrape.
|
|
128
|
+
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
129
|
+
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
130
|
+
*/
|
|
131
|
+
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
|
132
|
+
/**
|
|
133
|
+
* Searches for a query using the Firecrawl API.
|
|
134
|
+
* @param {string} query - The query to search for.
|
|
135
|
+
* @param {Params | null} params - Additional parameters for the search request.
|
|
136
|
+
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
137
|
+
*/
|
|
138
|
+
search(query: string, params?: Params | null): Promise<SearchResponse>;
|
|
139
|
+
/**
|
|
140
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
141
|
+
* @param {string} url - The URL to crawl.
|
|
142
|
+
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
143
|
+
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
144
|
+
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
145
|
+
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
146
|
+
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
147
|
+
*/
|
|
148
|
+
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
|
149
|
+
/**
|
|
150
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
151
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
152
|
+
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
153
|
+
*/
|
|
154
|
+
checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
|
|
155
|
+
/**
|
|
156
|
+
* Prepares the headers for an API request.
|
|
157
|
+
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
158
|
+
*/
|
|
159
|
+
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
|
160
|
+
/**
|
|
161
|
+
* Sends a POST request to the specified URL.
|
|
162
|
+
* @param {string} url - The URL to send the request to.
|
|
163
|
+
* @param {Params} data - The data to send in the request.
|
|
164
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
165
|
+
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
166
|
+
*/
|
|
167
|
+
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
168
|
+
/**
|
|
169
|
+
* Sends a GET request to the specified URL.
|
|
170
|
+
* @param {string} url - The URL to send the request to.
|
|
171
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
172
|
+
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
173
|
+
*/
|
|
174
|
+
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
|
175
|
+
/**
|
|
176
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
177
|
+
* @param {string} jobId - The job ID of the crawl operation.
|
|
178
|
+
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
179
|
+
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
180
|
+
* @returns {Promise<any>} The final job status or data.
|
|
181
|
+
*/
|
|
182
|
+
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
|
|
183
|
+
/**
|
|
184
|
+
* Handles errors from API responses.
|
|
185
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
186
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
187
|
+
*/
|
|
188
|
+
handleError(response: AxiosResponse, action: string): void;
|
|
189
|
+
}
|