firecrawl 1.21.0 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +12 -4
- package/dist/index.d.cts +15 -1
- package/dist/index.d.ts +15 -1
- package/dist/index.js +11 -3
- package/package.json +8 -6
- package/src/index.ts +15 -2
- package/tsup.config.ts +9 -0
- package/dump.rdb +0 -0
package/dist/index.cjs
CHANGED
|
@@ -38,8 +38,15 @@ module.exports = __toCommonJS(src_exports);
|
|
|
38
38
|
var import_axios = __toESM(require("axios"), 1);
|
|
39
39
|
var zt = __toESM(require("zod"), 1);
|
|
40
40
|
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
|
|
42
|
+
// node_modules/typescript-event-target/dist/index.mjs
|
|
43
|
+
var e = class extends EventTarget {
|
|
44
|
+
dispatchTypedEvent(s, t) {
|
|
45
|
+
return super.dispatchEvent(t);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// src/index.ts
|
|
43
50
|
var FirecrawlError = class extends Error {
|
|
44
51
|
statusCode;
|
|
45
52
|
details;
|
|
@@ -419,6 +426,7 @@ var FirecrawlApp = class {
|
|
|
419
426
|
* @param pollInterval - Time in seconds for job status checks.
|
|
420
427
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
421
428
|
* @param webhook - Optional webhook for the batch scrape.
|
|
429
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
422
430
|
* @returns The response from the crawl operation.
|
|
423
431
|
*/
|
|
424
432
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -1191,7 +1199,7 @@ var FirecrawlApp = class {
|
|
|
1191
1199
|
return { success: false, error: "Internal server error." };
|
|
1192
1200
|
}
|
|
1193
1201
|
};
|
|
1194
|
-
var CrawlWatcher = class extends
|
|
1202
|
+
var CrawlWatcher = class extends e {
|
|
1195
1203
|
ws;
|
|
1196
1204
|
data;
|
|
1197
1205
|
status;
|
|
@@ -1200,7 +1208,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
|
|
|
1200
1208
|
super();
|
|
1201
1209
|
this.id = id;
|
|
1202
1210
|
const wsUrl = app.apiUrl.replace(/^http/, "ws");
|
|
1203
|
-
this.ws = new
|
|
1211
|
+
this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
|
|
1204
1212
|
this.status = "scraping";
|
|
1205
1213
|
this.data = [];
|
|
1206
1214
|
const messageHandler = (msg) => {
|
package/dist/index.d.cts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -529,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
529
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
530
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
531
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
532
546
|
* @returns The response from the crawl operation.
|
|
533
547
|
*/
|
|
534
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
package/dist/index.d.ts
CHANGED
|
@@ -65,6 +65,11 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
65
65
|
screenshot?: string;
|
|
66
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
67
67
|
actions: ActionsSchema;
|
|
68
|
+
compare?: {
|
|
69
|
+
previousScrapeAt: string | null;
|
|
70
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
71
|
+
visibility: "visible" | "hidden";
|
|
72
|
+
};
|
|
68
73
|
title?: string;
|
|
69
74
|
description?: string;
|
|
70
75
|
}
|
|
@@ -73,7 +78,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
73
78
|
* Defines the options and configurations available for scraping web content.
|
|
74
79
|
*/
|
|
75
80
|
interface CrawlScrapeOptions {
|
|
76
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
81
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
77
82
|
headers?: Record<string, string>;
|
|
78
83
|
includeTags?: string[];
|
|
79
84
|
excludeTags?: string[];
|
|
@@ -132,6 +137,14 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
132
137
|
}
|
|
133
138
|
interface ActionsResult {
|
|
134
139
|
screenshots: string[];
|
|
140
|
+
scrapes: ({
|
|
141
|
+
url: string;
|
|
142
|
+
html: string;
|
|
143
|
+
})[];
|
|
144
|
+
javascriptReturns: {
|
|
145
|
+
type: string;
|
|
146
|
+
value: unknown;
|
|
147
|
+
}[];
|
|
135
148
|
}
|
|
136
149
|
/**
|
|
137
150
|
* Response interface for scraping operations.
|
|
@@ -529,6 +542,7 @@ declare class FirecrawlApp {
|
|
|
529
542
|
* @param pollInterval - Time in seconds for job status checks.
|
|
530
543
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
531
544
|
* @param webhook - Optional webhook for the batch scrape.
|
|
545
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
532
546
|
* @returns The response from the crawl operation.
|
|
533
547
|
*/
|
|
534
548
|
batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
package/dist/index.js
CHANGED
|
@@ -2,8 +2,15 @@
|
|
|
2
2
|
import axios, { AxiosError } from "axios";
|
|
3
3
|
import * as zt from "zod";
|
|
4
4
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
|
|
6
|
+
// node_modules/typescript-event-target/dist/index.mjs
|
|
7
|
+
var e = class extends EventTarget {
|
|
8
|
+
dispatchTypedEvent(s, t) {
|
|
9
|
+
return super.dispatchEvent(t);
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
// src/index.ts
|
|
7
14
|
var FirecrawlError = class extends Error {
|
|
8
15
|
statusCode;
|
|
9
16
|
details;
|
|
@@ -383,6 +390,7 @@ var FirecrawlApp = class {
|
|
|
383
390
|
* @param pollInterval - Time in seconds for job status checks.
|
|
384
391
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
385
392
|
* @param webhook - Optional webhook for the batch scrape.
|
|
393
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
386
394
|
* @returns The response from the crawl operation.
|
|
387
395
|
*/
|
|
388
396
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
@@ -1155,7 +1163,7 @@ var FirecrawlApp = class {
|
|
|
1155
1163
|
return { success: false, error: "Internal server error." };
|
|
1156
1164
|
}
|
|
1157
1165
|
};
|
|
1158
|
-
var CrawlWatcher = class extends
|
|
1166
|
+
var CrawlWatcher = class extends e {
|
|
1159
1167
|
ws;
|
|
1160
1168
|
data;
|
|
1161
1169
|
status;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.22.0",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -26,7 +26,6 @@
|
|
|
26
26
|
"license": "MIT",
|
|
27
27
|
"dependencies": {
|
|
28
28
|
"axios": "^1.6.8",
|
|
29
|
-
"isows": "^1.0.4",
|
|
30
29
|
"typescript-event-target": "^1.1.1",
|
|
31
30
|
"zod": "^3.23.8",
|
|
32
31
|
"zod-to-json-schema": "^3.23.0"
|
|
@@ -36,8 +35,6 @@
|
|
|
36
35
|
},
|
|
37
36
|
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
38
37
|
"devDependencies": {
|
|
39
|
-
"uuid": "^9.0.1",
|
|
40
|
-
"dotenv": "^16.4.5",
|
|
41
38
|
"@jest/globals": "^29.7.0",
|
|
42
39
|
"@types/axios": "^0.14.0",
|
|
43
40
|
"@types/dotenv": "^8.2.0",
|
|
@@ -45,10 +42,12 @@
|
|
|
45
42
|
"@types/mocha": "^10.0.6",
|
|
46
43
|
"@types/node": "^20.12.12",
|
|
47
44
|
"@types/uuid": "^9.0.8",
|
|
45
|
+
"dotenv": "^16.4.5",
|
|
48
46
|
"jest": "^29.7.0",
|
|
49
47
|
"ts-jest": "^29.2.2",
|
|
50
48
|
"tsup": "^8.2.4",
|
|
51
|
-
"typescript": "^5.4.5"
|
|
49
|
+
"typescript": "^5.4.5",
|
|
50
|
+
"uuid": "^9.0.1"
|
|
52
51
|
},
|
|
53
52
|
"keywords": [
|
|
54
53
|
"firecrawl",
|
|
@@ -58,5 +57,8 @@
|
|
|
58
57
|
"scraper",
|
|
59
58
|
"api",
|
|
60
59
|
"sdk"
|
|
61
|
-
]
|
|
60
|
+
],
|
|
61
|
+
"engines": {
|
|
62
|
+
"node": ">=22.0.0"
|
|
63
|
+
}
|
|
62
64
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios";
|
|
2
2
|
import * as zt from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
-
import { WebSocket } from "isows";
|
|
5
4
|
import { TypedEventTarget } from "typescript-event-target";
|
|
6
5
|
|
|
7
6
|
/**
|
|
@@ -69,6 +68,11 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
69
68
|
screenshot?: string;
|
|
70
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
71
70
|
actions: ActionsSchema;
|
|
71
|
+
compare?: {
|
|
72
|
+
previousScrapeAt: string | null;
|
|
73
|
+
changeStatus: "new" | "same" | "changed" | "removed";
|
|
74
|
+
visibility: "visible" | "hidden";
|
|
75
|
+
};
|
|
72
76
|
// v1 search only
|
|
73
77
|
title?: string;
|
|
74
78
|
description?: string;
|
|
@@ -79,7 +83,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
79
83
|
* Defines the options and configurations available for scraping web content.
|
|
80
84
|
*/
|
|
81
85
|
export interface CrawlScrapeOptions {
|
|
82
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
86
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "compare")[];
|
|
83
87
|
headers?: Record<string, string>;
|
|
84
88
|
includeTags?: string[];
|
|
85
89
|
excludeTags?: string[];
|
|
@@ -141,6 +145,14 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
141
145
|
|
|
142
146
|
export interface ActionsResult {
|
|
143
147
|
screenshots: string[];
|
|
148
|
+
scrapes: ({
|
|
149
|
+
url: string;
|
|
150
|
+
html: string;
|
|
151
|
+
})[];
|
|
152
|
+
javascriptReturns: {
|
|
153
|
+
type: string;
|
|
154
|
+
value: unknown
|
|
155
|
+
}[];
|
|
144
156
|
}
|
|
145
157
|
|
|
146
158
|
/**
|
|
@@ -914,6 +926,7 @@ export default class FirecrawlApp {
|
|
|
914
926
|
* @param pollInterval - Time in seconds for job status checks.
|
|
915
927
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
916
928
|
* @param webhook - Optional webhook for the batch scrape.
|
|
929
|
+
* @param ignoreInvalidURLs - Optional flag to ignore invalid URLs.
|
|
917
930
|
* @returns The response from the crawl operation.
|
|
918
931
|
*/
|
|
919
932
|
async batchScrapeUrls(
|
package/tsup.config.ts
CHANGED
|
@@ -6,4 +6,13 @@ export default defineConfig({
|
|
|
6
6
|
dts: true,
|
|
7
7
|
outDir: "dist",
|
|
8
8
|
clean: true,
|
|
9
|
+
platform: "node",
|
|
10
|
+
target: "node22",
|
|
11
|
+
noExternal: ["typescript-event-target"],
|
|
12
|
+
esbuildOptions(options) {
|
|
13
|
+
options.define = {
|
|
14
|
+
...options.define,
|
|
15
|
+
"process.env.NODE_ENV": JSON.stringify(process.env.NODE_ENV || "production"),
|
|
16
|
+
};
|
|
17
|
+
},
|
|
9
18
|
});
|
package/dump.rdb
DELETED
|
Binary file
|