firecrawl 1.9.0 → 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +27 -2
- package/dist/index.d.cts +8 -6
- package/dist/index.d.ts +8 -6
- package/dist/index.js +27 -2
- package/package.json +1 -1
- package/src/index.ts +38 -9
package/dist/index.cjs
CHANGED
|
@@ -297,7 +297,21 @@ var FirecrawlApp = class {
|
|
|
297
297
|
*/
|
|
298
298
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
299
299
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
300
|
-
let jsonData = { urls, ...params
|
|
300
|
+
let jsonData = { urls, ...params };
|
|
301
|
+
if (jsonData?.extract?.schema) {
|
|
302
|
+
let schema = jsonData.extract.schema;
|
|
303
|
+
try {
|
|
304
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
305
|
+
} catch (error) {
|
|
306
|
+
}
|
|
307
|
+
jsonData = {
|
|
308
|
+
...jsonData,
|
|
309
|
+
extract: {
|
|
310
|
+
...jsonData.extract,
|
|
311
|
+
schema
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
}
|
|
301
315
|
try {
|
|
302
316
|
const response = await this.postRequest(
|
|
303
317
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -407,6 +421,7 @@ var FirecrawlApp = class {
|
|
|
407
421
|
}
|
|
408
422
|
/**
|
|
409
423
|
* Extracts information from URLs using the Firecrawl API.
|
|
424
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
410
425
|
* @param url - The URL to extract information from.
|
|
411
426
|
* @param params - Additional parameters for the extract request.
|
|
412
427
|
* @returns The response from the extract operation.
|
|
@@ -430,7 +445,17 @@ var FirecrawlApp = class {
|
|
|
430
445
|
headers
|
|
431
446
|
);
|
|
432
447
|
if (response.status === 200) {
|
|
433
|
-
|
|
448
|
+
const responseData = response.data;
|
|
449
|
+
if (responseData.success) {
|
|
450
|
+
return {
|
|
451
|
+
success: true,
|
|
452
|
+
data: responseData.data,
|
|
453
|
+
warning: responseData.warning,
|
|
454
|
+
error: responseData.error
|
|
455
|
+
};
|
|
456
|
+
} else {
|
|
457
|
+
throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
|
|
458
|
+
}
|
|
434
459
|
} else {
|
|
435
460
|
this.handleError(response, "extract");
|
|
436
461
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -223,9 +223,9 @@ interface MapResponse {
|
|
|
223
223
|
* Parameters for extracting information from URLs.
|
|
224
224
|
* Defines options for extracting information from URLs.
|
|
225
225
|
*/
|
|
226
|
-
interface ExtractParams {
|
|
226
|
+
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
227
227
|
prompt: string;
|
|
228
|
-
schema?:
|
|
228
|
+
schema?: LLMSchema;
|
|
229
229
|
systemPrompt?: string;
|
|
230
230
|
allowExternalLinks?: boolean;
|
|
231
231
|
}
|
|
@@ -233,10 +233,11 @@ interface ExtractParams {
|
|
|
233
233
|
* Response interface for extracting information from URLs.
|
|
234
234
|
* Defines the structure of the response received after extracting information from URLs.
|
|
235
235
|
*/
|
|
236
|
-
interface ExtractResponse {
|
|
237
|
-
success:
|
|
238
|
-
data:
|
|
236
|
+
interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
|
|
237
|
+
success: boolean;
|
|
238
|
+
data: LLMSchema;
|
|
239
239
|
error?: string;
|
|
240
|
+
warning?: string;
|
|
240
241
|
}
|
|
241
242
|
/**
|
|
242
243
|
* Error response interface.
|
|
@@ -346,11 +347,12 @@ declare class FirecrawlApp {
|
|
|
346
347
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
347
348
|
/**
|
|
348
349
|
* Extracts information from URLs using the Firecrawl API.
|
|
350
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
349
351
|
* @param url - The URL to extract information from.
|
|
350
352
|
* @param params - Additional parameters for the extract request.
|
|
351
353
|
* @returns The response from the extract operation.
|
|
352
354
|
*/
|
|
353
|
-
extract(urls: string[], params?: ExtractParams): Promise<ExtractResponse | ErrorResponse>;
|
|
355
|
+
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
354
356
|
/**
|
|
355
357
|
* Prepares the headers for an API request.
|
|
356
358
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.d.ts
CHANGED
|
@@ -223,9 +223,9 @@ interface MapResponse {
|
|
|
223
223
|
* Parameters for extracting information from URLs.
|
|
224
224
|
* Defines options for extracting information from URLs.
|
|
225
225
|
*/
|
|
226
|
-
interface ExtractParams {
|
|
226
|
+
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
227
227
|
prompt: string;
|
|
228
|
-
schema?:
|
|
228
|
+
schema?: LLMSchema;
|
|
229
229
|
systemPrompt?: string;
|
|
230
230
|
allowExternalLinks?: boolean;
|
|
231
231
|
}
|
|
@@ -233,10 +233,11 @@ interface ExtractParams {
|
|
|
233
233
|
* Response interface for extracting information from URLs.
|
|
234
234
|
* Defines the structure of the response received after extracting information from URLs.
|
|
235
235
|
*/
|
|
236
|
-
interface ExtractResponse {
|
|
237
|
-
success:
|
|
238
|
-
data:
|
|
236
|
+
interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
|
|
237
|
+
success: boolean;
|
|
238
|
+
data: LLMSchema;
|
|
239
239
|
error?: string;
|
|
240
|
+
warning?: string;
|
|
240
241
|
}
|
|
241
242
|
/**
|
|
242
243
|
* Error response interface.
|
|
@@ -346,11 +347,12 @@ declare class FirecrawlApp {
|
|
|
346
347
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
347
348
|
/**
|
|
348
349
|
* Extracts information from URLs using the Firecrawl API.
|
|
350
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
349
351
|
* @param url - The URL to extract information from.
|
|
350
352
|
* @param params - Additional parameters for the extract request.
|
|
351
353
|
* @returns The response from the extract operation.
|
|
352
354
|
*/
|
|
353
|
-
extract(urls: string[], params?: ExtractParams): Promise<ExtractResponse | ErrorResponse>;
|
|
355
|
+
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
354
356
|
/**
|
|
355
357
|
* Prepares the headers for an API request.
|
|
356
358
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
package/dist/index.js
CHANGED
|
@@ -261,7 +261,21 @@ var FirecrawlApp = class {
|
|
|
261
261
|
*/
|
|
262
262
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
|
|
263
263
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
264
|
-
let jsonData = { urls, ...params
|
|
264
|
+
let jsonData = { urls, ...params };
|
|
265
|
+
if (jsonData?.extract?.schema) {
|
|
266
|
+
let schema = jsonData.extract.schema;
|
|
267
|
+
try {
|
|
268
|
+
schema = zodToJsonSchema(schema);
|
|
269
|
+
} catch (error) {
|
|
270
|
+
}
|
|
271
|
+
jsonData = {
|
|
272
|
+
...jsonData,
|
|
273
|
+
extract: {
|
|
274
|
+
...jsonData.extract,
|
|
275
|
+
schema
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
}
|
|
265
279
|
try {
|
|
266
280
|
const response = await this.postRequest(
|
|
267
281
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -371,6 +385,7 @@ var FirecrawlApp = class {
|
|
|
371
385
|
}
|
|
372
386
|
/**
|
|
373
387
|
* Extracts information from URLs using the Firecrawl API.
|
|
388
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
374
389
|
* @param url - The URL to extract information from.
|
|
375
390
|
* @param params - Additional parameters for the extract request.
|
|
376
391
|
* @returns The response from the extract operation.
|
|
@@ -394,7 +409,17 @@ var FirecrawlApp = class {
|
|
|
394
409
|
headers
|
|
395
410
|
);
|
|
396
411
|
if (response.status === 200) {
|
|
397
|
-
|
|
412
|
+
const responseData = response.data;
|
|
413
|
+
if (responseData.success) {
|
|
414
|
+
return {
|
|
415
|
+
success: true,
|
|
416
|
+
data: responseData.data,
|
|
417
|
+
warning: responseData.warning,
|
|
418
|
+
error: responseData.error
|
|
419
|
+
};
|
|
420
|
+
} else {
|
|
421
|
+
throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
|
|
422
|
+
}
|
|
398
423
|
} else {
|
|
399
424
|
this.handleError(response, "extract");
|
|
400
425
|
}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -240,9 +240,9 @@ export interface MapResponse {
|
|
|
240
240
|
* Parameters for extracting information from URLs.
|
|
241
241
|
* Defines options for extracting information from URLs.
|
|
242
242
|
*/
|
|
243
|
-
export interface ExtractParams {
|
|
243
|
+
export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
244
244
|
prompt: string;
|
|
245
|
-
schema?:
|
|
245
|
+
schema?: LLMSchema;
|
|
246
246
|
systemPrompt?: string;
|
|
247
247
|
allowExternalLinks?: boolean;
|
|
248
248
|
}
|
|
@@ -251,10 +251,11 @@ export interface ExtractParams {
|
|
|
251
251
|
* Response interface for extracting information from URLs.
|
|
252
252
|
* Defines the structure of the response received after extracting information from URLs.
|
|
253
253
|
*/
|
|
254
|
-
export interface ExtractResponse {
|
|
255
|
-
success:
|
|
256
|
-
data:
|
|
254
|
+
export interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
|
|
255
|
+
success: boolean;
|
|
256
|
+
data: LLMSchema;
|
|
257
257
|
error?: string;
|
|
258
|
+
warning?: string;
|
|
258
259
|
}
|
|
259
260
|
|
|
260
261
|
/**
|
|
@@ -576,7 +577,24 @@ export default class FirecrawlApp {
|
|
|
576
577
|
webhook?: CrawlParams["webhook"],
|
|
577
578
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
578
579
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
579
|
-
let jsonData: any = { urls, ...
|
|
580
|
+
let jsonData: any = { urls, ...params };
|
|
581
|
+
if (jsonData?.extract?.schema) {
|
|
582
|
+
let schema = jsonData.extract.schema;
|
|
583
|
+
|
|
584
|
+
// Try parsing the schema as a Zod schema
|
|
585
|
+
try {
|
|
586
|
+
schema = zodToJsonSchema(schema);
|
|
587
|
+
} catch (error) {
|
|
588
|
+
|
|
589
|
+
}
|
|
590
|
+
jsonData = {
|
|
591
|
+
...jsonData,
|
|
592
|
+
extract: {
|
|
593
|
+
...jsonData.extract,
|
|
594
|
+
schema: schema,
|
|
595
|
+
},
|
|
596
|
+
};
|
|
597
|
+
}
|
|
580
598
|
try {
|
|
581
599
|
const response: AxiosResponse = await this.postRequest(
|
|
582
600
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -701,18 +719,19 @@ export default class FirecrawlApp {
|
|
|
701
719
|
|
|
702
720
|
/**
|
|
703
721
|
* Extracts information from URLs using the Firecrawl API.
|
|
722
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
704
723
|
* @param url - The URL to extract information from.
|
|
705
724
|
* @param params - Additional parameters for the extract request.
|
|
706
725
|
* @returns The response from the extract operation.
|
|
707
726
|
*/
|
|
708
|
-
async extract(urls: string[], params?: ExtractParams): Promise<ExtractResponse | ErrorResponse> {
|
|
727
|
+
async extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse> {
|
|
709
728
|
const headers = this.prepareHeaders();
|
|
710
729
|
|
|
711
730
|
if (!params?.prompt) {
|
|
712
731
|
throw new FirecrawlError("Prompt is required", 400);
|
|
713
732
|
}
|
|
714
733
|
|
|
715
|
-
let jsonData: { urls: string[] } & ExtractParams= { urls, ...params };
|
|
734
|
+
let jsonData: { urls: string[] } & ExtractParams<T> = { urls, ...params };
|
|
716
735
|
let jsonSchema: any;
|
|
717
736
|
try {
|
|
718
737
|
jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined;
|
|
@@ -727,7 +746,17 @@ export default class FirecrawlApp {
|
|
|
727
746
|
headers
|
|
728
747
|
);
|
|
729
748
|
if (response.status === 200) {
|
|
730
|
-
|
|
749
|
+
const responseData = response.data as ExtractResponse<T>;
|
|
750
|
+
if (responseData.success) {
|
|
751
|
+
return {
|
|
752
|
+
success: true,
|
|
753
|
+
data: responseData.data,
|
|
754
|
+
warning: responseData.warning,
|
|
755
|
+
error: responseData.error
|
|
756
|
+
};
|
|
757
|
+
} else {
|
|
758
|
+
throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
|
|
759
|
+
}
|
|
731
760
|
} else {
|
|
732
761
|
this.handleError(response, "extract");
|
|
733
762
|
}
|