@mendable/firecrawl-js 4.19.0 → 4.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/audit-ci.jsonc +5 -2
- package/dist/{chunk-JJY4NJXL.js → chunk-OVLAEYRZ.js} +1 -1
- package/dist/index.cjs +166 -70
- package/dist/index.d.cts +6 -2
- package/dist/index.d.ts +6 -2
- package/dist/index.js +167 -71
- package/dist/{package-HMEPZJ3J.js → package-4Q7WA3UI.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/unit/v2/scrape-browser.unit.test.ts +39 -12
- package/src/v2/methods/batch.ts +89 -28
- package/src/v2/methods/browser.ts +19 -11
- package/src/v2/methods/map.ts +36 -9
- package/src/v2/methods/parse.ts +15 -9
- package/src/v2/methods/scrape.ts +32 -13
- package/src/v2/methods/search.ts +38 -10
- package/src/v2/utils/httpClient.ts +36 -16
- package/src/v2/watcher.ts +44 -24
package/src/v2/methods/batch.ts
CHANGED
|
@@ -11,7 +11,11 @@ import {
|
|
|
11
11
|
import { HttpClient } from "../utils/httpClient";
|
|
12
12
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
13
13
|
import { fetchAllPages } from "../utils/pagination";
|
|
14
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
normalizeAxiosError,
|
|
16
|
+
throwForBadResponse,
|
|
17
|
+
isRetryableError,
|
|
18
|
+
} from "../utils/errorHandler";
|
|
15
19
|
|
|
16
20
|
export async function startBatchScrape(
|
|
17
21
|
http: HttpClient,
|
|
@@ -26,9 +30,10 @@ export async function startBatchScrape(
|
|
|
26
30
|
idempotencyKey,
|
|
27
31
|
integration,
|
|
28
32
|
origin,
|
|
29
|
-
}: BatchScrapeOptions = {}
|
|
33
|
+
}: BatchScrapeOptions = {},
|
|
30
34
|
): Promise<BatchScrapeResponse> {
|
|
31
|
-
if (!Array.isArray(urls) || urls.length === 0)
|
|
35
|
+
if (!Array.isArray(urls) || urls.length === 0)
|
|
36
|
+
throw new Error("URLs list cannot be empty");
|
|
32
37
|
const payload: Record<string, unknown> = { urls };
|
|
33
38
|
if (options) {
|
|
34
39
|
ensureValidScrapeOptions(options);
|
|
@@ -39,16 +44,29 @@ export async function startBatchScrape(
|
|
|
39
44
|
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
40
45
|
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
41
46
|
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
42
|
-
if (integration != null && integration.trim())
|
|
47
|
+
if (integration != null && integration.trim())
|
|
48
|
+
payload.integration = integration.trim();
|
|
43
49
|
if (origin) payload.origin = origin;
|
|
44
50
|
|
|
45
51
|
try {
|
|
46
52
|
const headers = http.prepareHeaders(idempotencyKey);
|
|
47
|
-
const res = await http.post<{
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
const res = await http.post<{
|
|
54
|
+
success: boolean;
|
|
55
|
+
id: string;
|
|
56
|
+
url: string;
|
|
57
|
+
invalidURLs?: string[];
|
|
58
|
+
error?: string;
|
|
59
|
+
}>("/v2/batch/scrape", payload, { headers });
|
|
60
|
+
if (res.status !== 200 || !res.data?.success)
|
|
61
|
+
throwForBadResponse(res, "start batch scrape");
|
|
62
|
+
return {
|
|
63
|
+
id: res.data.id,
|
|
64
|
+
url: res.data.url,
|
|
65
|
+
invalidURLs: res.data.invalidURLs || undefined,
|
|
66
|
+
};
|
|
50
67
|
} catch (err: any) {
|
|
51
|
-
if (err?.isAxiosError)
|
|
68
|
+
if (err?.isAxiosError)
|
|
69
|
+
return normalizeAxiosError(err, "start batch scrape");
|
|
52
70
|
throw err;
|
|
53
71
|
}
|
|
54
72
|
}
|
|
@@ -56,11 +74,21 @@ export async function startBatchScrape(
|
|
|
56
74
|
export async function getBatchScrapeStatus(
|
|
57
75
|
http: HttpClient,
|
|
58
76
|
jobId: string,
|
|
59
|
-
pagination?: PaginationConfig
|
|
77
|
+
pagination?: PaginationConfig,
|
|
60
78
|
): Promise<BatchScrapeJob> {
|
|
61
79
|
try {
|
|
62
|
-
const res = await http.get<{
|
|
63
|
-
|
|
80
|
+
const res = await http.get<{
|
|
81
|
+
success: boolean;
|
|
82
|
+
status: BatchScrapeJob["status"];
|
|
83
|
+
completed?: number;
|
|
84
|
+
total?: number;
|
|
85
|
+
creditsUsed?: number;
|
|
86
|
+
expiresAt?: string;
|
|
87
|
+
next?: string | null;
|
|
88
|
+
data?: Document[];
|
|
89
|
+
}>(`/v2/batch/scrape/${jobId}`);
|
|
90
|
+
if (res.status !== 200 || !res.data?.success)
|
|
91
|
+
throwForBadResponse(res, "get batch scrape status");
|
|
64
92
|
const body = res.data;
|
|
65
93
|
const initialDocs = (body.data || []) as Document[];
|
|
66
94
|
const auto = pagination?.autoPaginate ?? true;
|
|
@@ -77,7 +105,12 @@ export async function getBatchScrapeStatus(
|
|
|
77
105
|
};
|
|
78
106
|
}
|
|
79
107
|
|
|
80
|
-
const aggregated = await fetchAllPages(
|
|
108
|
+
const aggregated = await fetchAllPages(
|
|
109
|
+
http,
|
|
110
|
+
body.next,
|
|
111
|
+
initialDocs,
|
|
112
|
+
pagination,
|
|
113
|
+
);
|
|
81
114
|
return {
|
|
82
115
|
id: jobId,
|
|
83
116
|
status: body.status,
|
|
@@ -89,35 +122,57 @@ export async function getBatchScrapeStatus(
|
|
|
89
122
|
data: aggregated,
|
|
90
123
|
};
|
|
91
124
|
} catch (err: any) {
|
|
92
|
-
if (err?.isAxiosError)
|
|
125
|
+
if (err?.isAxiosError)
|
|
126
|
+
return normalizeAxiosError(err, "get batch scrape status");
|
|
93
127
|
throw err;
|
|
94
128
|
}
|
|
95
129
|
}
|
|
96
130
|
|
|
97
|
-
export async function cancelBatchScrape(
|
|
131
|
+
export async function cancelBatchScrape(
|
|
132
|
+
http: HttpClient,
|
|
133
|
+
jobId: string,
|
|
134
|
+
): Promise<boolean> {
|
|
98
135
|
try {
|
|
99
|
-
const res = await http.delete<{ status: string }>(
|
|
136
|
+
const res = await http.delete<{ status: string }>(
|
|
137
|
+
`/v2/batch/scrape/${jobId}`,
|
|
138
|
+
);
|
|
100
139
|
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
|
|
101
140
|
return res.data?.status === "cancelled";
|
|
102
141
|
} catch (err: any) {
|
|
103
|
-
if (err?.isAxiosError)
|
|
142
|
+
if (err?.isAxiosError)
|
|
143
|
+
return normalizeAxiosError(err, "cancel batch scrape");
|
|
104
144
|
throw err;
|
|
105
145
|
}
|
|
106
146
|
}
|
|
107
147
|
|
|
108
|
-
export async function getBatchScrapeErrors(
|
|
148
|
+
export async function getBatchScrapeErrors(
|
|
149
|
+
http: HttpClient,
|
|
150
|
+
jobId: string,
|
|
151
|
+
): Promise<CrawlErrorsResponse> {
|
|
109
152
|
try {
|
|
110
|
-
const res = await http.get<{
|
|
153
|
+
const res = await http.get<{
|
|
154
|
+
success?: boolean;
|
|
155
|
+
data?: { errors: Array<Record<string, string>>; robotsBlocked: string[] };
|
|
156
|
+
}>(`/v2/batch/scrape/${jobId}/errors`);
|
|
111
157
|
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
|
|
112
158
|
const payload = res.data?.data ?? (res.data as any);
|
|
113
|
-
return {
|
|
159
|
+
return {
|
|
160
|
+
errors: payload.errors || [],
|
|
161
|
+
robotsBlocked: payload.robotsBlocked || [],
|
|
162
|
+
};
|
|
114
163
|
} catch (err: any) {
|
|
115
|
-
if (err?.isAxiosError)
|
|
164
|
+
if (err?.isAxiosError)
|
|
165
|
+
return normalizeAxiosError(err, "get batch scrape errors");
|
|
116
166
|
throw err;
|
|
117
167
|
}
|
|
118
168
|
}
|
|
119
169
|
|
|
120
|
-
export async function waitForBatchCompletion(
|
|
170
|
+
export async function waitForBatchCompletion(
|
|
171
|
+
http: HttpClient,
|
|
172
|
+
jobId: string,
|
|
173
|
+
pollInterval = 2,
|
|
174
|
+
timeout?: number,
|
|
175
|
+
): Promise<BatchScrapeJob> {
|
|
121
176
|
const start = Date.now();
|
|
122
177
|
|
|
123
178
|
while (true) {
|
|
@@ -137,7 +192,7 @@ export async function waitForBatchCompletion(http: HttpClient, jobId: string, po
|
|
|
137
192
|
err.status,
|
|
138
193
|
err.code,
|
|
139
194
|
err.details,
|
|
140
|
-
jobId
|
|
195
|
+
jobId,
|
|
141
196
|
);
|
|
142
197
|
throw errorWithJobId;
|
|
143
198
|
}
|
|
@@ -147,24 +202,30 @@ export async function waitForBatchCompletion(http: HttpClient, jobId: string, po
|
|
|
147
202
|
}
|
|
148
203
|
|
|
149
204
|
if (timeout != null && Date.now() - start > timeout * 1000) {
|
|
150
|
-
throw new JobTimeoutError(jobId, timeout,
|
|
205
|
+
throw new JobTimeoutError(jobId, timeout, "batch");
|
|
151
206
|
}
|
|
152
|
-
|
|
153
|
-
await new Promise(
|
|
207
|
+
|
|
208
|
+
await new Promise(r => setTimeout(r, Math.max(1000, pollInterval * 1000)));
|
|
154
209
|
}
|
|
155
210
|
}
|
|
156
211
|
|
|
157
212
|
export async function batchScrape(
|
|
158
213
|
http: HttpClient,
|
|
159
214
|
urls: string[],
|
|
160
|
-
opts: BatchScrapeOptions & { pollInterval?: number; timeout?: number } = {}
|
|
215
|
+
opts: BatchScrapeOptions & { pollInterval?: number; timeout?: number } = {},
|
|
161
216
|
): Promise<BatchScrapeJob> {
|
|
162
217
|
const start = await startBatchScrape(http, urls, opts);
|
|
163
|
-
return waitForBatchCompletion(
|
|
218
|
+
return waitForBatchCompletion(
|
|
219
|
+
http,
|
|
220
|
+
start.id,
|
|
221
|
+
opts.pollInterval ?? 2,
|
|
222
|
+
opts.timeout,
|
|
223
|
+
);
|
|
164
224
|
}
|
|
165
225
|
|
|
166
226
|
export function chunkUrls(urls: string[], chunkSize = 100): string[][] {
|
|
167
227
|
const chunks: string[][] = [];
|
|
168
|
-
for (let i = 0; i < urls.length; i += chunkSize)
|
|
228
|
+
for (let i = 0; i < urls.length; i += chunkSize)
|
|
229
|
+
chunks.push(urls.slice(i, i + chunkSize));
|
|
169
230
|
return chunks;
|
|
170
231
|
}
|
|
@@ -5,7 +5,10 @@ import type {
|
|
|
5
5
|
BrowserListResponse,
|
|
6
6
|
} from "../types";
|
|
7
7
|
import { HttpClient } from "../utils/httpClient";
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
normalizeAxiosError,
|
|
10
|
+
throwForBadResponse,
|
|
11
|
+
} from "../utils/errorHandler";
|
|
9
12
|
|
|
10
13
|
export async function browser(
|
|
11
14
|
http: HttpClient,
|
|
@@ -19,7 +22,7 @@ export async function browser(
|
|
|
19
22
|
};
|
|
20
23
|
integration?: string;
|
|
21
24
|
origin?: string;
|
|
22
|
-
} = {}
|
|
25
|
+
} = {},
|
|
23
26
|
): Promise<BrowserCreateResponse> {
|
|
24
27
|
const body: Record<string, unknown> = {};
|
|
25
28
|
if (args.ttl != null) body.ttl = args.ttl;
|
|
@@ -34,7 +37,8 @@ export async function browser(
|
|
|
34
37
|
if (res.status !== 200) throwForBadResponse(res, "create browser session");
|
|
35
38
|
return res.data;
|
|
36
39
|
} catch (err: any) {
|
|
37
|
-
if (err?.isAxiosError)
|
|
40
|
+
if (err?.isAxiosError)
|
|
41
|
+
return normalizeAxiosError(err, "create browser session");
|
|
38
42
|
throw err;
|
|
39
43
|
}
|
|
40
44
|
}
|
|
@@ -46,7 +50,7 @@ export async function browserExecute(
|
|
|
46
50
|
code: string;
|
|
47
51
|
language?: "python" | "node" | "bash";
|
|
48
52
|
timeout?: number;
|
|
49
|
-
}
|
|
53
|
+
},
|
|
50
54
|
): Promise<BrowserExecuteResponse> {
|
|
51
55
|
const body: Record<string, unknown> = {
|
|
52
56
|
code: args.code,
|
|
@@ -57,28 +61,31 @@ export async function browserExecute(
|
|
|
57
61
|
try {
|
|
58
62
|
const res = await http.post<BrowserExecuteResponse>(
|
|
59
63
|
`/v2/browser/${sessionId}/execute`,
|
|
60
|
-
body
|
|
64
|
+
body,
|
|
65
|
+
args.timeout != null ? { timeoutMs: args.timeout * 1000 + 5000 } : {},
|
|
61
66
|
);
|
|
62
67
|
if (res.status !== 200) throwForBadResponse(res, "execute browser code");
|
|
63
68
|
return res.data;
|
|
64
69
|
} catch (err: any) {
|
|
65
|
-
if (err?.isAxiosError)
|
|
70
|
+
if (err?.isAxiosError)
|
|
71
|
+
return normalizeAxiosError(err, "execute browser code");
|
|
66
72
|
throw err;
|
|
67
73
|
}
|
|
68
74
|
}
|
|
69
75
|
|
|
70
76
|
export async function deleteBrowser(
|
|
71
77
|
http: HttpClient,
|
|
72
|
-
sessionId: string
|
|
78
|
+
sessionId: string,
|
|
73
79
|
): Promise<BrowserDeleteResponse> {
|
|
74
80
|
try {
|
|
75
81
|
const res = await http.delete<BrowserDeleteResponse>(
|
|
76
|
-
`/v2/browser/${sessionId}
|
|
82
|
+
`/v2/browser/${sessionId}`,
|
|
77
83
|
);
|
|
78
84
|
if (res.status !== 200) throwForBadResponse(res, "delete browser session");
|
|
79
85
|
return res.data;
|
|
80
86
|
} catch (err: any) {
|
|
81
|
-
if (err?.isAxiosError)
|
|
87
|
+
if (err?.isAxiosError)
|
|
88
|
+
return normalizeAxiosError(err, "delete browser session");
|
|
82
89
|
throw err;
|
|
83
90
|
}
|
|
84
91
|
}
|
|
@@ -87,7 +94,7 @@ export async function listBrowsers(
|
|
|
87
94
|
http: HttpClient,
|
|
88
95
|
args: {
|
|
89
96
|
status?: "active" | "destroyed";
|
|
90
|
-
} = {}
|
|
97
|
+
} = {},
|
|
91
98
|
): Promise<BrowserListResponse> {
|
|
92
99
|
let endpoint = "/v2/browser";
|
|
93
100
|
if (args.status) endpoint += `?status=${args.status}`;
|
|
@@ -97,7 +104,8 @@ export async function listBrowsers(
|
|
|
97
104
|
if (res.status !== 200) throwForBadResponse(res, "list browser sessions");
|
|
98
105
|
return res.data;
|
|
99
106
|
} catch (err: any) {
|
|
100
|
-
if (err?.isAxiosError)
|
|
107
|
+
if (err?.isAxiosError)
|
|
108
|
+
return normalizeAxiosError(err, "list browser sessions");
|
|
101
109
|
throw err;
|
|
102
110
|
}
|
|
103
111
|
}
|
package/src/v2/methods/map.ts
CHANGED
|
@@ -1,28 +1,51 @@
|
|
|
1
1
|
import { type MapData, type MapOptions, type SearchResultWeb } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
throwForBadResponse,
|
|
5
|
+
normalizeAxiosError,
|
|
6
|
+
} from "../utils/errorHandler";
|
|
4
7
|
|
|
5
|
-
function prepareMapPayload(
|
|
8
|
+
function prepareMapPayload(
|
|
9
|
+
url: string,
|
|
10
|
+
options?: MapOptions,
|
|
11
|
+
): Record<string, unknown> {
|
|
6
12
|
if (!url || !url.trim()) throw new Error("URL cannot be empty");
|
|
7
13
|
const payload: Record<string, unknown> = { url: url.trim() };
|
|
8
14
|
if (options) {
|
|
9
15
|
if (options.sitemap != null) payload.sitemap = options.sitemap;
|
|
10
16
|
if (options.search != null) payload.search = options.search;
|
|
11
|
-
if (options.includeSubdomains != null)
|
|
12
|
-
|
|
17
|
+
if (options.includeSubdomains != null)
|
|
18
|
+
payload.includeSubdomains = options.includeSubdomains;
|
|
19
|
+
if (options.ignoreQueryParameters != null)
|
|
20
|
+
payload.ignoreQueryParameters = options.ignoreQueryParameters;
|
|
13
21
|
if (options.limit != null) payload.limit = options.limit;
|
|
14
22
|
if (options.timeout != null) payload.timeout = options.timeout;
|
|
15
|
-
if (options.integration != null && options.integration.trim())
|
|
23
|
+
if (options.integration != null && options.integration.trim())
|
|
24
|
+
payload.integration = options.integration.trim();
|
|
16
25
|
if (options.origin) payload.origin = options.origin;
|
|
17
26
|
if (options.location != null) payload.location = options.location;
|
|
18
27
|
}
|
|
19
28
|
return payload;
|
|
20
29
|
}
|
|
21
30
|
|
|
22
|
-
export async function map(
|
|
31
|
+
export async function map(
|
|
32
|
+
http: HttpClient,
|
|
33
|
+
url: string,
|
|
34
|
+
options?: MapOptions,
|
|
35
|
+
): Promise<MapData> {
|
|
23
36
|
const payload = prepareMapPayload(url, options);
|
|
24
37
|
try {
|
|
25
|
-
const res = await http.post<{
|
|
38
|
+
const res = await http.post<{
|
|
39
|
+
success: boolean;
|
|
40
|
+
error?: string;
|
|
41
|
+
links?: Array<string | SearchResultWeb>;
|
|
42
|
+
}>(
|
|
43
|
+
"/v2/map",
|
|
44
|
+
payload,
|
|
45
|
+
typeof options?.timeout === "number"
|
|
46
|
+
? { timeoutMs: options.timeout + 5000 }
|
|
47
|
+
: {},
|
|
48
|
+
);
|
|
26
49
|
if (res.status !== 200 || !res.data?.success) {
|
|
27
50
|
throwForBadResponse(res, "map");
|
|
28
51
|
}
|
|
@@ -30,7 +53,12 @@ export async function map(http: HttpClient, url: string, options?: MapOptions):
|
|
|
30
53
|
const links: SearchResultWeb[] = [];
|
|
31
54
|
for (const item of linksIn) {
|
|
32
55
|
if (typeof item === "string") links.push({ url: item });
|
|
33
|
-
else if (item && typeof item === "object")
|
|
56
|
+
else if (item && typeof item === "object")
|
|
57
|
+
links.push({
|
|
58
|
+
url: item.url,
|
|
59
|
+
title: (item as any).title,
|
|
60
|
+
description: (item as any).description,
|
|
61
|
+
});
|
|
34
62
|
}
|
|
35
63
|
return { links };
|
|
36
64
|
} catch (err: any) {
|
|
@@ -38,4 +66,3 @@ export async function map(http: HttpClient, url: string, options?: MapOptions):
|
|
|
38
66
|
throw err;
|
|
39
67
|
}
|
|
40
68
|
}
|
|
41
|
-
|
package/src/v2/methods/parse.ts
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { type Document, type ParseFile, type ParseOptions } from "../types";
|
|
2
2
|
import { HttpClient } from "../utils/httpClient";
|
|
3
3
|
import { ensureValidParseOptions } from "../utils/validation";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
throwForBadResponse,
|
|
6
|
+
normalizeAxiosError,
|
|
7
|
+
} from "../utils/errorHandler";
|
|
5
8
|
import { getVersion } from "../utils/getVersion";
|
|
6
9
|
|
|
7
10
|
function toUploadBlob(input: ParseFile["data"], contentType?: string): Blob {
|
|
@@ -25,7 +28,9 @@ function toUploadBlob(input: ParseFile["data"], contentType?: string): Blob {
|
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
if (typeof input === "string") {
|
|
28
|
-
return new Blob([input], {
|
|
31
|
+
return new Blob([input], {
|
|
32
|
+
type: contentType ?? "text/plain; charset=utf-8",
|
|
33
|
+
});
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
throw new Error("Unsupported parse file data type");
|
|
@@ -57,7 +62,7 @@ export async function parse(
|
|
|
57
62
|
origin:
|
|
58
63
|
typeof options?.origin === "string" && options.origin.includes("mcp")
|
|
59
64
|
? options.origin
|
|
60
|
-
: options?.origin ?? `js-sdk@${version}
|
|
65
|
+
: (options?.origin ?? `js-sdk@${version}`),
|
|
61
66
|
};
|
|
62
67
|
|
|
63
68
|
const formData = new FormData();
|
|
@@ -68,17 +73,18 @@ export async function parse(
|
|
|
68
73
|
file.filename.trim(),
|
|
69
74
|
);
|
|
70
75
|
|
|
71
|
-
const requestTimeoutMs =
|
|
72
|
-
typeof normalizedOptions.timeout === "number"
|
|
73
|
-
? normalizedOptions.timeout + 5000
|
|
74
|
-
: undefined;
|
|
75
|
-
|
|
76
76
|
try {
|
|
77
77
|
const res = await http.postMultipart<{
|
|
78
78
|
success: boolean;
|
|
79
79
|
data?: Document;
|
|
80
80
|
error?: string;
|
|
81
|
-
}>(
|
|
81
|
+
}>(
|
|
82
|
+
"/v2/parse",
|
|
83
|
+
formData,
|
|
84
|
+
typeof normalizedOptions.timeout === "number"
|
|
85
|
+
? { timeoutMs: normalizedOptions.timeout + 5000 }
|
|
86
|
+
: {},
|
|
87
|
+
);
|
|
82
88
|
if (res.status !== 200 || !res.data?.success) {
|
|
83
89
|
throwForBadResponse(res, "parse");
|
|
84
90
|
}
|
package/src/v2/methods/scrape.ts
CHANGED
|
@@ -7,9 +7,16 @@ import {
|
|
|
7
7
|
} from "../types";
|
|
8
8
|
import { HttpClient } from "../utils/httpClient";
|
|
9
9
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
10
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
throwForBadResponse,
|
|
12
|
+
normalizeAxiosError,
|
|
13
|
+
} from "../utils/errorHandler";
|
|
11
14
|
|
|
12
|
-
export async function scrape(
|
|
15
|
+
export async function scrape(
|
|
16
|
+
http: HttpClient,
|
|
17
|
+
url: string,
|
|
18
|
+
options?: ScrapeOptions,
|
|
19
|
+
): Promise<Document> {
|
|
13
20
|
if (!url || !url.trim()) {
|
|
14
21
|
throw new Error("URL cannot be empty");
|
|
15
22
|
}
|
|
@@ -19,7 +26,17 @@ export async function scrape(http: HttpClient, url: string, options?: ScrapeOpti
|
|
|
19
26
|
if (options) Object.assign(payload, options);
|
|
20
27
|
|
|
21
28
|
try {
|
|
22
|
-
const res = await http.post<{
|
|
29
|
+
const res = await http.post<{
|
|
30
|
+
success: boolean;
|
|
31
|
+
data?: Document;
|
|
32
|
+
error?: string;
|
|
33
|
+
}>(
|
|
34
|
+
"/v2/scrape",
|
|
35
|
+
payload,
|
|
36
|
+
typeof options?.timeout === "number"
|
|
37
|
+
? { timeoutMs: options.timeout + 5000 }
|
|
38
|
+
: {},
|
|
39
|
+
);
|
|
23
40
|
if (res.status !== 200 || !res.data?.success) {
|
|
24
41
|
throwForBadResponse(res, "scrape");
|
|
25
42
|
}
|
|
@@ -33,7 +50,7 @@ export async function scrape(http: HttpClient, url: string, options?: ScrapeOpti
|
|
|
33
50
|
export async function interact(
|
|
34
51
|
http: HttpClient,
|
|
35
52
|
jobId: string,
|
|
36
|
-
args: ScrapeExecuteRequest
|
|
53
|
+
args: ScrapeExecuteRequest,
|
|
37
54
|
): Promise<ScrapeExecuteResponse> {
|
|
38
55
|
if (!jobId || !jobId.trim()) {
|
|
39
56
|
throw new Error("Job ID cannot be empty");
|
|
@@ -54,19 +71,22 @@ export async function interact(
|
|
|
54
71
|
try {
|
|
55
72
|
const res = await http.post<ScrapeExecuteResponse>(
|
|
56
73
|
`/v2/scrape/${jobId}/interact`,
|
|
57
|
-
body
|
|
74
|
+
body,
|
|
75
|
+
args.timeout != null ? { timeoutMs: args.timeout * 1000 + 5000 } : {},
|
|
58
76
|
);
|
|
59
|
-
if (res.status !== 200)
|
|
77
|
+
if (res.status !== 200)
|
|
78
|
+
throwForBadResponse(res, "interact with scrape browser");
|
|
60
79
|
return res.data;
|
|
61
80
|
} catch (err: any) {
|
|
62
|
-
if (err?.isAxiosError)
|
|
81
|
+
if (err?.isAxiosError)
|
|
82
|
+
return normalizeAxiosError(err, "interact with scrape browser");
|
|
63
83
|
throw err;
|
|
64
84
|
}
|
|
65
85
|
}
|
|
66
86
|
|
|
67
87
|
export async function stopInteraction(
|
|
68
88
|
http: HttpClient,
|
|
69
|
-
jobId: string
|
|
89
|
+
jobId: string,
|
|
70
90
|
): Promise<ScrapeBrowserDeleteResponse> {
|
|
71
91
|
if (!jobId || !jobId.trim()) {
|
|
72
92
|
throw new Error("Job ID cannot be empty");
|
|
@@ -74,7 +94,7 @@ export async function stopInteraction(
|
|
|
74
94
|
|
|
75
95
|
try {
|
|
76
96
|
const res = await http.delete<ScrapeBrowserDeleteResponse>(
|
|
77
|
-
`/v2/scrape/${jobId}/interact
|
|
97
|
+
`/v2/scrape/${jobId}/interact`,
|
|
78
98
|
);
|
|
79
99
|
if (res.status !== 200) throwForBadResponse(res, "stop interaction");
|
|
80
100
|
return res.data;
|
|
@@ -88,7 +108,7 @@ export async function stopInteraction(
|
|
|
88
108
|
export async function scrapeExecute(
|
|
89
109
|
http: HttpClient,
|
|
90
110
|
jobId: string,
|
|
91
|
-
args: ScrapeExecuteRequest
|
|
111
|
+
args: ScrapeExecuteRequest,
|
|
92
112
|
): Promise<ScrapeExecuteResponse> {
|
|
93
113
|
return interact(http, jobId, args);
|
|
94
114
|
}
|
|
@@ -96,7 +116,7 @@ export async function scrapeExecute(
|
|
|
96
116
|
/** @deprecated Use stopInteraction(). */
|
|
97
117
|
export async function stopInteractiveBrowser(
|
|
98
118
|
http: HttpClient,
|
|
99
|
-
jobId: string
|
|
119
|
+
jobId: string,
|
|
100
120
|
): Promise<ScrapeBrowserDeleteResponse> {
|
|
101
121
|
return stopInteraction(http, jobId);
|
|
102
122
|
}
|
|
@@ -104,8 +124,7 @@ export async function stopInteractiveBrowser(
|
|
|
104
124
|
/** @deprecated Use stopInteraction(). */
|
|
105
125
|
export async function deleteScrapeBrowser(
|
|
106
126
|
http: HttpClient,
|
|
107
|
-
jobId: string
|
|
127
|
+
jobId: string,
|
|
108
128
|
): Promise<ScrapeBrowserDeleteResponse> {
|
|
109
129
|
return stopInteraction(http, jobId);
|
|
110
130
|
}
|
|
111
|
-
|
package/src/v2/methods/search.ts
CHANGED
|
@@ -1,12 +1,25 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
type Document,
|
|
3
|
+
type SearchData,
|
|
4
|
+
type SearchRequest,
|
|
5
|
+
type SearchResultWeb,
|
|
6
|
+
type ScrapeOptions,
|
|
7
|
+
type SearchResultNews,
|
|
8
|
+
type SearchResultImages,
|
|
9
|
+
} from "../types";
|
|
2
10
|
import { HttpClient } from "../utils/httpClient";
|
|
3
11
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
4
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
throwForBadResponse,
|
|
14
|
+
normalizeAxiosError,
|
|
15
|
+
} from "../utils/errorHandler";
|
|
5
16
|
|
|
6
17
|
function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
7
18
|
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
|
|
8
|
-
if (req.limit != null && req.limit <= 0)
|
|
9
|
-
|
|
19
|
+
if (req.limit != null && req.limit <= 0)
|
|
20
|
+
throw new Error("limit must be positive");
|
|
21
|
+
if (req.timeout != null && req.timeout <= 0)
|
|
22
|
+
throw new Error("timeout must be positive");
|
|
10
23
|
const payload: Record<string, unknown> = {
|
|
11
24
|
query: req.query,
|
|
12
25
|
};
|
|
@@ -15,9 +28,11 @@ function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
|
15
28
|
if (req.limit != null) payload.limit = req.limit;
|
|
16
29
|
if (req.tbs != null) payload.tbs = req.tbs;
|
|
17
30
|
if (req.location != null) payload.location = req.location;
|
|
18
|
-
if (req.ignoreInvalidURLs != null)
|
|
31
|
+
if (req.ignoreInvalidURLs != null)
|
|
32
|
+
payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
19
33
|
if (req.timeout != null) payload.timeout = req.timeout;
|
|
20
|
-
if (req.integration && req.integration.trim())
|
|
34
|
+
if (req.integration && req.integration.trim())
|
|
35
|
+
payload.integration = req.integration.trim();
|
|
21
36
|
if (req.origin) payload.origin = req.origin;
|
|
22
37
|
if (req.scrapeOptions) {
|
|
23
38
|
ensureValidScrapeOptions(req.scrapeOptions as ScrapeOptions);
|
|
@@ -51,10 +66,23 @@ function transformArray<ResultType>(arr: any[]): Array<ResultType | Document> {
|
|
|
51
66
|
return results;
|
|
52
67
|
}
|
|
53
68
|
|
|
54
|
-
export async function search(
|
|
69
|
+
export async function search(
|
|
70
|
+
http: HttpClient,
|
|
71
|
+
request: SearchRequest,
|
|
72
|
+
): Promise<SearchData> {
|
|
55
73
|
const payload = prepareSearchPayload(request);
|
|
56
74
|
try {
|
|
57
|
-
const res = await http.post<{
|
|
75
|
+
const res = await http.post<{
|
|
76
|
+
success: boolean;
|
|
77
|
+
data?: Record<string, unknown>;
|
|
78
|
+
error?: string;
|
|
79
|
+
}>(
|
|
80
|
+
"/v2/search",
|
|
81
|
+
payload,
|
|
82
|
+
typeof request.timeout === "number"
|
|
83
|
+
? { timeoutMs: request.timeout + 5000 }
|
|
84
|
+
: {},
|
|
85
|
+
);
|
|
58
86
|
if (res.status !== 200 || !res.data?.success) {
|
|
59
87
|
throwForBadResponse(res, "search");
|
|
60
88
|
}
|
|
@@ -62,11 +90,11 @@ export async function search(http: HttpClient, request: SearchRequest): Promise<
|
|
|
62
90
|
const out: SearchData = {};
|
|
63
91
|
if (data.web) out.web = transformArray<SearchResultWeb>(data.web);
|
|
64
92
|
if (data.news) out.news = transformArray<SearchResultNews>(data.news);
|
|
65
|
-
if (data.images)
|
|
93
|
+
if (data.images)
|
|
94
|
+
out.images = transformArray<SearchResultImages>(data.images);
|
|
66
95
|
return out;
|
|
67
96
|
} catch (err: any) {
|
|
68
97
|
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
|
69
98
|
throw err;
|
|
70
99
|
}
|
|
71
100
|
}
|
|
72
|
-
|