firecrawl 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/index.d.cts +16 -2
- package/dist/index.d.ts +16 -2
- package/package.json +1 -1
- package/src/index.ts +16 -2
package/README.md
CHANGED
|
@@ -147,7 +147,7 @@ watch.addEventListener("done", state => {
|
|
|
147
147
|
|
|
148
148
|
### Batch scraping multiple URLs
|
|
149
149
|
|
|
150
|
-
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the
|
|
150
|
+
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
|
|
151
151
|
|
|
152
152
|
```js
|
|
153
153
|
const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
|
|
@@ -158,10 +158,10 @@ const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev',
|
|
|
158
158
|
|
|
159
159
|
#### Asynchronous batch scrape
|
|
160
160
|
|
|
161
|
-
To initiate an asynchronous batch scrape, utilize the `
|
|
161
|
+
To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
|
|
162
162
|
|
|
163
163
|
```js
|
|
164
|
-
const
|
|
164
|
+
const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
|
|
165
165
|
```
|
|
166
166
|
|
|
167
167
|
#### Batch scrape with WebSockets
|
package/dist/index.d.cts
CHANGED
|
@@ -77,10 +77,17 @@ interface CrawlScrapeOptions {
|
|
|
77
77
|
onlyMainContent?: boolean;
|
|
78
78
|
waitFor?: number;
|
|
79
79
|
timeout?: number;
|
|
80
|
+
location?: {
|
|
81
|
+
country?: string;
|
|
82
|
+
languages?: string[];
|
|
83
|
+
};
|
|
84
|
+
skipTlsVerification?: boolean;
|
|
85
|
+
removeBase64Images?: boolean;
|
|
80
86
|
}
|
|
81
87
|
type Action = {
|
|
82
88
|
type: "wait";
|
|
83
|
-
milliseconds
|
|
89
|
+
milliseconds?: number;
|
|
90
|
+
selector?: string;
|
|
84
91
|
} | {
|
|
85
92
|
type: "click";
|
|
86
93
|
selector: string;
|
|
@@ -95,7 +102,13 @@ type Action = {
|
|
|
95
102
|
key: string;
|
|
96
103
|
} | {
|
|
97
104
|
type: "scroll";
|
|
98
|
-
direction
|
|
105
|
+
direction?: "up" | "down";
|
|
106
|
+
selector?: string;
|
|
107
|
+
} | {
|
|
108
|
+
type: "scrape";
|
|
109
|
+
} | {
|
|
110
|
+
type: "executeJavascript";
|
|
111
|
+
script: string;
|
|
99
112
|
};
|
|
100
113
|
interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
|
101
114
|
extract?: {
|
|
@@ -131,6 +144,7 @@ interface CrawlParams {
|
|
|
131
144
|
ignoreSitemap?: boolean;
|
|
132
145
|
scrapeOptions?: CrawlScrapeOptions;
|
|
133
146
|
webhook?: string;
|
|
147
|
+
deduplicateSimilarURLs?: boolean;
|
|
134
148
|
}
|
|
135
149
|
/**
|
|
136
150
|
* Response interface for crawling operations.
|
package/dist/index.d.ts
CHANGED
|
@@ -77,10 +77,17 @@ interface CrawlScrapeOptions {
|
|
|
77
77
|
onlyMainContent?: boolean;
|
|
78
78
|
waitFor?: number;
|
|
79
79
|
timeout?: number;
|
|
80
|
+
location?: {
|
|
81
|
+
country?: string;
|
|
82
|
+
languages?: string[];
|
|
83
|
+
};
|
|
84
|
+
skipTlsVerification?: boolean;
|
|
85
|
+
removeBase64Images?: boolean;
|
|
80
86
|
}
|
|
81
87
|
type Action = {
|
|
82
88
|
type: "wait";
|
|
83
|
-
milliseconds
|
|
89
|
+
milliseconds?: number;
|
|
90
|
+
selector?: string;
|
|
84
91
|
} | {
|
|
85
92
|
type: "click";
|
|
86
93
|
selector: string;
|
|
@@ -95,7 +102,13 @@ type Action = {
|
|
|
95
102
|
key: string;
|
|
96
103
|
} | {
|
|
97
104
|
type: "scroll";
|
|
98
|
-
direction
|
|
105
|
+
direction?: "up" | "down";
|
|
106
|
+
selector?: string;
|
|
107
|
+
} | {
|
|
108
|
+
type: "scrape";
|
|
109
|
+
} | {
|
|
110
|
+
type: "executeJavascript";
|
|
111
|
+
script: string;
|
|
99
112
|
};
|
|
100
113
|
interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
|
101
114
|
extract?: {
|
|
@@ -131,6 +144,7 @@ interface CrawlParams {
|
|
|
131
144
|
ignoreSitemap?: boolean;
|
|
132
145
|
scrapeOptions?: CrawlScrapeOptions;
|
|
133
146
|
webhook?: string;
|
|
147
|
+
deduplicateSimilarURLs?: boolean;
|
|
134
148
|
}
|
|
135
149
|
/**
|
|
136
150
|
* Response interface for crawling operations.
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -82,11 +82,18 @@ export interface CrawlScrapeOptions {
|
|
|
82
82
|
onlyMainContent?: boolean;
|
|
83
83
|
waitFor?: number;
|
|
84
84
|
timeout?: number;
|
|
85
|
+
location?: {
|
|
86
|
+
country?: string;
|
|
87
|
+
languages?: string[];
|
|
88
|
+
};
|
|
89
|
+
skipTlsVerification?: boolean;
|
|
90
|
+
removeBase64Images?: boolean;
|
|
85
91
|
}
|
|
86
92
|
|
|
87
93
|
export type Action = {
|
|
88
94
|
type: "wait",
|
|
89
|
-
milliseconds
|
|
95
|
+
milliseconds?: number,
|
|
96
|
+
selector?: string,
|
|
90
97
|
} | {
|
|
91
98
|
type: "click",
|
|
92
99
|
selector: string,
|
|
@@ -101,7 +108,13 @@ export type Action = {
|
|
|
101
108
|
key: string,
|
|
102
109
|
} | {
|
|
103
110
|
type: "scroll",
|
|
104
|
-
direction
|
|
111
|
+
direction?: "up" | "down",
|
|
112
|
+
selector?: string,
|
|
113
|
+
} | {
|
|
114
|
+
type: "scrape",
|
|
115
|
+
} | {
|
|
116
|
+
type: "executeJavascript",
|
|
117
|
+
script: string,
|
|
105
118
|
};
|
|
106
119
|
|
|
107
120
|
export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema extends (Action[] | undefined) = undefined> extends CrawlScrapeOptions {
|
|
@@ -141,6 +154,7 @@ export interface CrawlParams {
|
|
|
141
154
|
ignoreSitemap?: boolean;
|
|
142
155
|
scrapeOptions?: CrawlScrapeOptions;
|
|
143
156
|
webhook?: string;
|
|
157
|
+
deduplicateSimilarURLs?: boolean;
|
|
144
158
|
}
|
|
145
159
|
|
|
146
160
|
/**
|