firecrawl 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +9 -9
- package/build/esm/index.js +9 -9
- package/package.json +1 -1
- package/src/index.ts +18 -10
- package/types/index.d.ts +8 -1
package/build/cjs/index.js
CHANGED
|
@@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.CrawlWatcher = void 0;
|
|
7
7
|
const axios_1 = __importDefault(require("axios"));
|
|
8
|
-
const zod_1 = require("zod");
|
|
9
8
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
10
9
|
const isows_1 = require("isows");
|
|
11
10
|
const typescript_event_target_1 = require("typescript-event-target");
|
|
@@ -34,18 +33,19 @@ class FirecrawlApp {
|
|
|
34
33
|
Authorization: `Bearer ${this.apiKey}`,
|
|
35
34
|
};
|
|
36
35
|
let jsonData = { url, ...params };
|
|
37
|
-
if (jsonData?.
|
|
38
|
-
let schema = jsonData.
|
|
39
|
-
//
|
|
40
|
-
|
|
36
|
+
if (jsonData?.extract?.schema) {
|
|
37
|
+
let schema = jsonData.extract.schema;
|
|
38
|
+
// Try parsing the schema as a Zod schema
|
|
39
|
+
try {
|
|
41
40
|
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
42
41
|
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
}
|
|
43
44
|
jsonData = {
|
|
44
45
|
...jsonData,
|
|
45
|
-
|
|
46
|
-
...jsonData.
|
|
47
|
-
|
|
48
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
46
|
+
extract: {
|
|
47
|
+
...jsonData.extract,
|
|
48
|
+
schema: schema,
|
|
49
49
|
},
|
|
50
50
|
};
|
|
51
51
|
}
|
package/build/esm/index.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
|
-
import { z } from "zod";
|
|
3
2
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
3
|
import { WebSocket } from "isows";
|
|
5
4
|
import { TypedEventTarget } from "typescript-event-target";
|
|
@@ -28,18 +27,19 @@ export default class FirecrawlApp {
|
|
|
28
27
|
Authorization: `Bearer ${this.apiKey}`,
|
|
29
28
|
};
|
|
30
29
|
let jsonData = { url, ...params };
|
|
31
|
-
if (jsonData?.
|
|
32
|
-
let schema = jsonData.
|
|
33
|
-
//
|
|
34
|
-
|
|
30
|
+
if (jsonData?.extract?.schema) {
|
|
31
|
+
let schema = jsonData.extract.schema;
|
|
32
|
+
// Try parsing the schema as a Zod schema
|
|
33
|
+
try {
|
|
35
34
|
schema = zodToJsonSchema(schema);
|
|
36
35
|
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
}
|
|
37
38
|
jsonData = {
|
|
38
39
|
...jsonData,
|
|
39
|
-
|
|
40
|
-
...jsonData.
|
|
41
|
-
|
|
42
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
40
|
+
extract: {
|
|
41
|
+
...jsonData.extract,
|
|
42
|
+
schema: schema,
|
|
43
43
|
},
|
|
44
44
|
};
|
|
45
45
|
}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -64,6 +64,7 @@ export interface FirecrawlDocument {
|
|
|
64
64
|
html?: string;
|
|
65
65
|
rawHtml?: string;
|
|
66
66
|
links?: string[];
|
|
67
|
+
extract?: Record<any, any>;
|
|
67
68
|
screenshot?: string;
|
|
68
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
69
70
|
}
|
|
@@ -73,12 +74,17 @@ export interface FirecrawlDocument {
|
|
|
73
74
|
* Defines the options and configurations available for scraping web content.
|
|
74
75
|
*/
|
|
75
76
|
export interface ScrapeParams {
|
|
76
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
77
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
77
78
|
headers?: Record<string, string>;
|
|
78
79
|
includeTags?: string[];
|
|
79
80
|
excludeTags?: string[];
|
|
80
81
|
onlyMainContent?: boolean;
|
|
81
|
-
|
|
82
|
+
extract?: {
|
|
83
|
+
prompt?: string;
|
|
84
|
+
schema?: z.ZodSchema | any;
|
|
85
|
+
systemPrompt?: string;
|
|
86
|
+
};
|
|
87
|
+
waitFor?: number;
|
|
82
88
|
timeout?: number;
|
|
83
89
|
}
|
|
84
90
|
|
|
@@ -196,18 +202,20 @@ export default class FirecrawlApp {
|
|
|
196
202
|
Authorization: `Bearer ${this.apiKey}`,
|
|
197
203
|
} as AxiosRequestHeaders;
|
|
198
204
|
let jsonData: any = { url, ...params };
|
|
199
|
-
if (jsonData?.
|
|
200
|
-
let schema = jsonData.
|
|
201
|
-
|
|
202
|
-
|
|
205
|
+
if (jsonData?.extract?.schema) {
|
|
206
|
+
let schema = jsonData.extract.schema;
|
|
207
|
+
|
|
208
|
+
// Try parsing the schema as a Zod schema
|
|
209
|
+
try {
|
|
203
210
|
schema = zodToJsonSchema(schema);
|
|
211
|
+
} catch (error) {
|
|
212
|
+
|
|
204
213
|
}
|
|
205
214
|
jsonData = {
|
|
206
215
|
...jsonData,
|
|
207
|
-
|
|
208
|
-
...jsonData.
|
|
209
|
-
|
|
210
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
216
|
+
extract: {
|
|
217
|
+
...jsonData.extract,
|
|
218
|
+
schema: schema,
|
|
211
219
|
},
|
|
212
220
|
};
|
|
213
221
|
}
|
package/types/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
|
+
import { z } from "zod";
|
|
2
3
|
import { TypedEventTarget } from "typescript-event-target";
|
|
3
4
|
/**
|
|
4
5
|
* Configuration interface for FirecrawlApp.
|
|
@@ -58,6 +59,7 @@ export interface FirecrawlDocument {
|
|
|
58
59
|
html?: string;
|
|
59
60
|
rawHtml?: string;
|
|
60
61
|
links?: string[];
|
|
62
|
+
extract?: Record<any, any>;
|
|
61
63
|
screenshot?: string;
|
|
62
64
|
metadata?: FirecrawlDocumentMetadata;
|
|
63
65
|
}
|
|
@@ -66,11 +68,16 @@ export interface FirecrawlDocument {
|
|
|
66
68
|
* Defines the options and configurations available for scraping web content.
|
|
67
69
|
*/
|
|
68
70
|
export interface ScrapeParams {
|
|
69
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
71
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
70
72
|
headers?: Record<string, string>;
|
|
71
73
|
includeTags?: string[];
|
|
72
74
|
excludeTags?: string[];
|
|
73
75
|
onlyMainContent?: boolean;
|
|
76
|
+
extract?: {
|
|
77
|
+
prompt?: string;
|
|
78
|
+
schema?: z.ZodSchema | any;
|
|
79
|
+
systemPrompt?: string;
|
|
80
|
+
};
|
|
74
81
|
waitFor?: number;
|
|
75
82
|
timeout?: number;
|
|
76
83
|
}
|