@mendable/firecrawl-js 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +9 -9
- package/build/esm/index.js +9 -9
- package/package.json +1 -1
- package/src/index.ts +19 -10
- package/types/index.d.ts +9 -1
package/build/cjs/index.js
CHANGED
|
@@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.CrawlWatcher = void 0;
|
|
7
7
|
const axios_1 = __importDefault(require("axios"));
|
|
8
|
-
const zod_1 = require("zod");
|
|
9
8
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
10
9
|
const isows_1 = require("isows");
|
|
11
10
|
const typescript_event_target_1 = require("typescript-event-target");
|
|
@@ -34,18 +33,19 @@ class FirecrawlApp {
|
|
|
34
33
|
Authorization: `Bearer ${this.apiKey}`,
|
|
35
34
|
};
|
|
36
35
|
let jsonData = { url, ...params };
|
|
37
|
-
if (jsonData?.
|
|
38
|
-
let schema = jsonData.
|
|
39
|
-
//
|
|
40
|
-
|
|
36
|
+
if (jsonData?.extract?.schema) {
|
|
37
|
+
let schema = jsonData.extract.schema;
|
|
38
|
+
// Try parsing the schema as a Zod schema
|
|
39
|
+
try {
|
|
41
40
|
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
42
41
|
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
}
|
|
43
44
|
jsonData = {
|
|
44
45
|
...jsonData,
|
|
45
|
-
|
|
46
|
-
...jsonData.
|
|
47
|
-
|
|
48
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
46
|
+
extract: {
|
|
47
|
+
...jsonData.extract,
|
|
48
|
+
schema: schema,
|
|
49
49
|
},
|
|
50
50
|
};
|
|
51
51
|
}
|
package/build/esm/index.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
|
-
import { z } from "zod";
|
|
3
2
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
3
|
import { WebSocket } from "isows";
|
|
5
4
|
import { TypedEventTarget } from "typescript-event-target";
|
|
@@ -28,18 +27,19 @@ export default class FirecrawlApp {
|
|
|
28
27
|
Authorization: `Bearer ${this.apiKey}`,
|
|
29
28
|
};
|
|
30
29
|
let jsonData = { url, ...params };
|
|
31
|
-
if (jsonData?.
|
|
32
|
-
let schema = jsonData.
|
|
33
|
-
//
|
|
34
|
-
|
|
30
|
+
if (jsonData?.extract?.schema) {
|
|
31
|
+
let schema = jsonData.extract.schema;
|
|
32
|
+
// Try parsing the schema as a Zod schema
|
|
33
|
+
try {
|
|
35
34
|
schema = zodToJsonSchema(schema);
|
|
36
35
|
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
}
|
|
37
38
|
jsonData = {
|
|
38
39
|
...jsonData,
|
|
39
|
-
|
|
40
|
-
...jsonData.
|
|
41
|
-
|
|
42
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
40
|
+
extract: {
|
|
41
|
+
...jsonData.extract,
|
|
42
|
+
schema: schema,
|
|
43
43
|
},
|
|
44
44
|
};
|
|
45
45
|
}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -64,6 +64,7 @@ export interface FirecrawlDocument {
|
|
|
64
64
|
html?: string;
|
|
65
65
|
rawHtml?: string;
|
|
66
66
|
links?: string[];
|
|
67
|
+
extract?: Record<any, any>;
|
|
67
68
|
screenshot?: string;
|
|
68
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
69
70
|
}
|
|
@@ -73,12 +74,17 @@ export interface FirecrawlDocument {
|
|
|
73
74
|
* Defines the options and configurations available for scraping web content.
|
|
74
75
|
*/
|
|
75
76
|
export interface ScrapeParams {
|
|
76
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
77
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
77
78
|
headers?: Record<string, string>;
|
|
78
79
|
includeTags?: string[];
|
|
79
80
|
excludeTags?: string[];
|
|
80
81
|
onlyMainContent?: boolean;
|
|
81
|
-
|
|
82
|
+
extract?: {
|
|
83
|
+
prompt?: string;
|
|
84
|
+
schema?: z.ZodSchema | any;
|
|
85
|
+
systemPrompt?: string;
|
|
86
|
+
};
|
|
87
|
+
waitFor?: number;
|
|
82
88
|
timeout?: number;
|
|
83
89
|
}
|
|
84
90
|
|
|
@@ -105,6 +111,7 @@ export interface CrawlParams {
|
|
|
105
111
|
allowExternalLinks?: boolean;
|
|
106
112
|
ignoreSitemap?: boolean;
|
|
107
113
|
scrapeOptions?: ScrapeParams;
|
|
114
|
+
webhook?: string;
|
|
108
115
|
}
|
|
109
116
|
|
|
110
117
|
/**
|
|
@@ -196,18 +203,20 @@ export default class FirecrawlApp {
|
|
|
196
203
|
Authorization: `Bearer ${this.apiKey}`,
|
|
197
204
|
} as AxiosRequestHeaders;
|
|
198
205
|
let jsonData: any = { url, ...params };
|
|
199
|
-
if (jsonData?.
|
|
200
|
-
let schema = jsonData.
|
|
201
|
-
|
|
202
|
-
|
|
206
|
+
if (jsonData?.extract?.schema) {
|
|
207
|
+
let schema = jsonData.extract.schema;
|
|
208
|
+
|
|
209
|
+
// Try parsing the schema as a Zod schema
|
|
210
|
+
try {
|
|
203
211
|
schema = zodToJsonSchema(schema);
|
|
212
|
+
} catch (error) {
|
|
213
|
+
|
|
204
214
|
}
|
|
205
215
|
jsonData = {
|
|
206
216
|
...jsonData,
|
|
207
|
-
|
|
208
|
-
...jsonData.
|
|
209
|
-
|
|
210
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
217
|
+
extract: {
|
|
218
|
+
...jsonData.extract,
|
|
219
|
+
schema: schema,
|
|
211
220
|
},
|
|
212
221
|
};
|
|
213
222
|
}
|
package/types/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
|
+
import { z } from "zod";
|
|
2
3
|
import { TypedEventTarget } from "typescript-event-target";
|
|
3
4
|
/**
|
|
4
5
|
* Configuration interface for FirecrawlApp.
|
|
@@ -58,6 +59,7 @@ export interface FirecrawlDocument {
|
|
|
58
59
|
html?: string;
|
|
59
60
|
rawHtml?: string;
|
|
60
61
|
links?: string[];
|
|
62
|
+
extract?: Record<any, any>;
|
|
61
63
|
screenshot?: string;
|
|
62
64
|
metadata?: FirecrawlDocumentMetadata;
|
|
63
65
|
}
|
|
@@ -66,11 +68,16 @@ export interface FirecrawlDocument {
|
|
|
66
68
|
* Defines the options and configurations available for scraping web content.
|
|
67
69
|
*/
|
|
68
70
|
export interface ScrapeParams {
|
|
69
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
71
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
70
72
|
headers?: Record<string, string>;
|
|
71
73
|
includeTags?: string[];
|
|
72
74
|
excludeTags?: string[];
|
|
73
75
|
onlyMainContent?: boolean;
|
|
76
|
+
extract?: {
|
|
77
|
+
prompt?: string;
|
|
78
|
+
schema?: z.ZodSchema | any;
|
|
79
|
+
systemPrompt?: string;
|
|
80
|
+
};
|
|
74
81
|
waitFor?: number;
|
|
75
82
|
timeout?: number;
|
|
76
83
|
}
|
|
@@ -96,6 +103,7 @@ export interface CrawlParams {
|
|
|
96
103
|
allowExternalLinks?: boolean;
|
|
97
104
|
ignoreSitemap?: boolean;
|
|
98
105
|
scrapeOptions?: ScrapeParams;
|
|
106
|
+
webhook?: string;
|
|
99
107
|
}
|
|
100
108
|
/**
|
|
101
109
|
* Response interface for crawling operations.
|