firecrawl 1.29.3 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -2
- package/LICENSE +0 -0
- package/README.md +85 -78
- package/audit-ci.jsonc +4 -0
- package/dist/chunk-JFWW4BWA.js +85 -0
- package/dist/index.cjs +964 -39
- package/dist/index.d.cts +529 -11
- package/dist/index.d.ts +529 -11
- package/dist/index.js +952 -27
- package/dist/package-KYZ3HXR5.js +4 -0
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +6 -6
- package/src/__tests__/e2e/v2/batch.test.ts +74 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
- package/src/__tests__/e2e/v2/extract.test.ts +70 -0
- package/src/__tests__/e2e/v2/map.test.ts +55 -0
- package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
- package/src/__tests__/e2e/v2/search.test.ts +247 -0
- package/src/__tests__/e2e/v2/usage.test.ts +36 -0
- package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
- package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
- package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
- package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
- package/src/__tests__/unit/v2/validation.test.ts +59 -0
- package/src/index.backup.ts +2146 -0
- package/src/index.ts +27 -2134
- package/src/v1/index.ts +2158 -0
- package/src/v2/client.ts +281 -0
- package/src/v2/methods/batch.ts +131 -0
- package/src/v2/methods/crawl.ts +160 -0
- package/src/v2/methods/extract.ts +86 -0
- package/src/v2/methods/map.ts +37 -0
- package/src/v2/methods/scrape.ts +26 -0
- package/src/v2/methods/search.ts +69 -0
- package/src/v2/methods/usage.ts +39 -0
- package/src/v2/types.ts +308 -0
- package/src/v2/utils/errorHandler.ts +18 -0
- package/src/v2/utils/getVersion.ts +14 -0
- package/src/v2/utils/httpClient.ts +99 -0
- package/src/v2/utils/validation.ts +50 -0
- package/src/v2/watcher.ts +159 -0
- package/tsconfig.json +2 -1
- package/tsup.config.ts +0 -0
- package/dist/package-Z6F7JDXI.js +0 -111
- /package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
- /package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { type Document, type ScrapeOptions } from "../types";
|
|
2
|
+
import { HttpClient } from "../utils/httpClient";
|
|
3
|
+
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
4
|
+
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
5
|
+
|
|
6
|
+
export async function scrape(http: HttpClient, url: string, options?: ScrapeOptions): Promise<Document> {
|
|
7
|
+
if (!url || !url.trim()) {
|
|
8
|
+
throw new Error("URL cannot be empty");
|
|
9
|
+
}
|
|
10
|
+
if (options) ensureValidScrapeOptions(options);
|
|
11
|
+
|
|
12
|
+
const payload: Record<string, unknown> = { url: url.trim() };
|
|
13
|
+
if (options) Object.assign(payload, options);
|
|
14
|
+
|
|
15
|
+
try {
|
|
16
|
+
const res = await http.post<{ success: boolean; data?: Document; error?: string }>("/v2/scrape", payload);
|
|
17
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
18
|
+
throwForBadResponse(res, "scrape");
|
|
19
|
+
}
|
|
20
|
+
return (res.data.data || {}) as Document;
|
|
21
|
+
} catch (err: any) {
|
|
22
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
|
|
23
|
+
throw err;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { type Document, type SearchData, type SearchRequest, type SearchResult, type ScrapeOptions } from "../types";
|
|
2
|
+
import { HttpClient } from "../utils/httpClient";
|
|
3
|
+
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
4
|
+
import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler";
|
|
5
|
+
|
|
6
|
+
function prepareSearchPayload(req: SearchRequest): Record<string, unknown> {
|
|
7
|
+
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
|
|
8
|
+
if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive");
|
|
9
|
+
if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive");
|
|
10
|
+
const payload: Record<string, unknown> = {
|
|
11
|
+
query: req.query,
|
|
12
|
+
};
|
|
13
|
+
if (req.sources) payload.sources = req.sources;
|
|
14
|
+
if (req.limit != null) payload.limit = req.limit;
|
|
15
|
+
if (req.tbs != null) payload.tbs = req.tbs;
|
|
16
|
+
if (req.location != null) payload.location = req.location;
|
|
17
|
+
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
18
|
+
if (req.timeout != null) payload.timeout = req.timeout;
|
|
19
|
+
if (req.scrapeOptions) {
|
|
20
|
+
ensureValidScrapeOptions(req.scrapeOptions as ScrapeOptions);
|
|
21
|
+
payload.scrapeOptions = req.scrapeOptions;
|
|
22
|
+
}
|
|
23
|
+
return payload;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export async function search(http: HttpClient, request: SearchRequest): Promise<SearchData> {
|
|
27
|
+
const payload = prepareSearchPayload(request);
|
|
28
|
+
try {
|
|
29
|
+
const res = await http.post<{ success: boolean; data?: Record<string, unknown>; error?: string }>("/v2/search", payload);
|
|
30
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
31
|
+
throwForBadResponse(res, "search");
|
|
32
|
+
}
|
|
33
|
+
const data = (res.data.data || {}) as Record<string, any>;
|
|
34
|
+
const out: SearchData = {};
|
|
35
|
+
for (const key of Object.keys(data)) {
|
|
36
|
+
const arr = data[key];
|
|
37
|
+
if (Array.isArray(arr)) {
|
|
38
|
+
const results: Array<SearchResult | Document> = [] as any;
|
|
39
|
+
for (const item of arr) {
|
|
40
|
+
if (item && typeof item === "object") {
|
|
41
|
+
// If scraped page fields present, treat as Document; otherwise SearchResult
|
|
42
|
+
if (
|
|
43
|
+
"markdown" in item ||
|
|
44
|
+
"html" in item ||
|
|
45
|
+
"rawHtml" in item ||
|
|
46
|
+
"links" in item ||
|
|
47
|
+
"screenshot" in item ||
|
|
48
|
+
"changeTracking" in item ||
|
|
49
|
+
"summary" in item ||
|
|
50
|
+
"json" in item
|
|
51
|
+
) {
|
|
52
|
+
results.push(item as Document);
|
|
53
|
+
} else {
|
|
54
|
+
results.push({ url: item.url, title: item.title, description: item.description } as SearchResult);
|
|
55
|
+
}
|
|
56
|
+
} else if (typeof item === "string") {
|
|
57
|
+
results.push({ url: item } as SearchResult);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
(out as any)[key] = results;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return out;
|
|
64
|
+
} catch (err: any) {
|
|
65
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
|
66
|
+
throw err;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { ConcurrencyCheck, CreditUsage, TokenUsage } from "../types";
|
|
2
|
+
import { HttpClient } from "../utils/httpClient";
|
|
3
|
+
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
4
|
+
|
|
5
|
+
export async function getConcurrency(http: HttpClient): Promise<ConcurrencyCheck> {
|
|
6
|
+
try {
|
|
7
|
+
const res = await http.get<{ success: boolean; data?: { concurrency: number; maxConcurrency: number } }>("/v2/concurrency-check");
|
|
8
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
|
|
9
|
+
const d = res.data.data || (res.data as any);
|
|
10
|
+
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
|
|
11
|
+
} catch (err: any) {
|
|
12
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
|
|
13
|
+
throw err;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function getCreditUsage(http: HttpClient): Promise<CreditUsage> {
|
|
18
|
+
try {
|
|
19
|
+
const res = await http.get<{ success: boolean; data?: { remainingCredits?: number; remaining_credits?: number } }>("/v2/team/credit-usage");
|
|
20
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
|
|
21
|
+
const d = res.data.data || (res.data as any);
|
|
22
|
+
return { remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0 };
|
|
23
|
+
} catch (err: any) {
|
|
24
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
|
|
25
|
+
throw err;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export async function getTokenUsage(http: HttpClient): Promise<TokenUsage> {
|
|
30
|
+
try {
|
|
31
|
+
const res = await http.get<{ success: boolean; data?: TokenUsage }>("/v2/team/token-usage");
|
|
32
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
|
|
33
|
+
return (res.data.data || (res.data as any)) as TokenUsage;
|
|
34
|
+
} catch (err: any) {
|
|
35
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
|
|
36
|
+
throw err;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
package/src/v2/types.ts
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import type { ZodTypeAny } from "zod";
|
|
2
|
+
// Public types for Firecrawl JS/TS SDK v2 (camelCase only)
|
|
3
|
+
|
|
4
|
+
export type FormatString =
|
|
5
|
+
| "markdown"
|
|
6
|
+
| "html"
|
|
7
|
+
| "rawHtml"
|
|
8
|
+
| "links"
|
|
9
|
+
| "screenshot"
|
|
10
|
+
| "summary"
|
|
11
|
+
| "changeTracking"
|
|
12
|
+
| "json";
|
|
13
|
+
|
|
14
|
+
export interface Viewport {
|
|
15
|
+
width: number;
|
|
16
|
+
height: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface Format {
|
|
20
|
+
type: FormatString;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface JsonFormat extends Format {
|
|
24
|
+
type: "json";
|
|
25
|
+
prompt?: string;
|
|
26
|
+
schema?: Record<string, unknown> | ZodTypeAny;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface ScreenshotFormat {
|
|
30
|
+
type: "screenshot";
|
|
31
|
+
fullPage?: boolean;
|
|
32
|
+
quality?: number;
|
|
33
|
+
viewport?: Viewport | { width: number; height: number };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface ChangeTrackingFormat extends Format {
|
|
37
|
+
type: "changeTracking";
|
|
38
|
+
modes: ("git-diff" | "json")[];
|
|
39
|
+
schema?: Record<string, unknown>;
|
|
40
|
+
prompt?: string;
|
|
41
|
+
tag?: string;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export type FormatOption =
|
|
45
|
+
| FormatString
|
|
46
|
+
| Format
|
|
47
|
+
| JsonFormat
|
|
48
|
+
| ChangeTrackingFormat
|
|
49
|
+
| ScreenshotFormat;
|
|
50
|
+
|
|
51
|
+
export interface LocationConfig {
|
|
52
|
+
country?: string;
|
|
53
|
+
languages?: string[];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface WaitAction {
|
|
57
|
+
type: "wait";
|
|
58
|
+
milliseconds?: number;
|
|
59
|
+
selector?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface ScreenshotAction {
|
|
63
|
+
type: "screenshot";
|
|
64
|
+
fullPage?: boolean;
|
|
65
|
+
quality?: number;
|
|
66
|
+
viewport?: Viewport | { width: number; height: number };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface ClickAction {
|
|
70
|
+
type: "click";
|
|
71
|
+
selector: string;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface WriteAction {
|
|
75
|
+
type: "write";
|
|
76
|
+
text: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface PressAction {
|
|
80
|
+
type: "press";
|
|
81
|
+
key: string;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface ScrollAction {
|
|
85
|
+
type: "scroll";
|
|
86
|
+
direction: "up" | "down";
|
|
87
|
+
selector?: string;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export interface ScrapeAction {
|
|
91
|
+
type: "scrape";
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export interface ExecuteJavascriptAction {
|
|
95
|
+
type: "executeJavascript";
|
|
96
|
+
script: string;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface PDFAction {
|
|
100
|
+
type: "pdf";
|
|
101
|
+
format?: "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger";
|
|
102
|
+
landscape?: boolean;
|
|
103
|
+
scale?: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export type ActionOption =
|
|
107
|
+
| WaitAction
|
|
108
|
+
| ScreenshotAction
|
|
109
|
+
| ClickAction
|
|
110
|
+
| WriteAction
|
|
111
|
+
| PressAction
|
|
112
|
+
| ScrollAction
|
|
113
|
+
| ScrapeAction
|
|
114
|
+
| ExecuteJavascriptAction
|
|
115
|
+
| PDFAction;
|
|
116
|
+
|
|
117
|
+
export interface ScrapeOptions {
|
|
118
|
+
formats?: FormatOption[];
|
|
119
|
+
headers?: Record<string, string>;
|
|
120
|
+
includeTags?: string[];
|
|
121
|
+
excludeTags?: string[];
|
|
122
|
+
onlyMainContent?: boolean;
|
|
123
|
+
timeout?: number;
|
|
124
|
+
waitFor?: number;
|
|
125
|
+
mobile?: boolean;
|
|
126
|
+
parsers?: string[];
|
|
127
|
+
actions?: ActionOption[];
|
|
128
|
+
location?: LocationConfig;
|
|
129
|
+
skipTlsVerification?: boolean;
|
|
130
|
+
removeBase64Images?: boolean;
|
|
131
|
+
fastMode?: boolean;
|
|
132
|
+
useMock?: string;
|
|
133
|
+
blockAds?: boolean;
|
|
134
|
+
proxy?: "basic" | "stealth" | "auto" | string;
|
|
135
|
+
maxAge?: number;
|
|
136
|
+
storeInCache?: boolean;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export interface WebhookConfig {
|
|
140
|
+
url: string;
|
|
141
|
+
headers?: Record<string, string>;
|
|
142
|
+
metadata?: Record<string, string>;
|
|
143
|
+
events?: Array<"completed" | "failed" | "page" | "started">;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export interface DocumentMetadata {
|
|
147
|
+
title?: string;
|
|
148
|
+
description?: string;
|
|
149
|
+
language?: string;
|
|
150
|
+
keywords?: string | string[];
|
|
151
|
+
robots?: string;
|
|
152
|
+
ogTitle?: string;
|
|
153
|
+
ogDescription?: string;
|
|
154
|
+
ogUrl?: string;
|
|
155
|
+
ogImage?: string;
|
|
156
|
+
sourceURL?: string;
|
|
157
|
+
statusCode?: number;
|
|
158
|
+
error?: string;
|
|
159
|
+
[key: string]: unknown;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export interface Document {
|
|
163
|
+
markdown?: string;
|
|
164
|
+
html?: string;
|
|
165
|
+
rawHtml?: string;
|
|
166
|
+
json?: unknown;
|
|
167
|
+
summary?: string;
|
|
168
|
+
metadata?: DocumentMetadata;
|
|
169
|
+
links?: string[];
|
|
170
|
+
screenshot?: string;
|
|
171
|
+
actions?: Record<string, unknown>;
|
|
172
|
+
warning?: string;
|
|
173
|
+
changeTracking?: Record<string, unknown>;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface SearchResult {
|
|
177
|
+
url: string;
|
|
178
|
+
title?: string;
|
|
179
|
+
description?: string;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export interface SearchData {
|
|
183
|
+
web?: Array<SearchResult | Document>;
|
|
184
|
+
news?: Array<SearchResult | Document>;
|
|
185
|
+
images?: Array<SearchResult | Document>;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export interface SearchRequest {
|
|
189
|
+
query: string;
|
|
190
|
+
sources?: Array<"web" | "news" | "images" | { type: "web" | "news" | "images" }>;
|
|
191
|
+
limit?: number;
|
|
192
|
+
tbs?: string;
|
|
193
|
+
location?: string;
|
|
194
|
+
ignoreInvalidURLs?: boolean;
|
|
195
|
+
timeout?: number; // ms
|
|
196
|
+
scrapeOptions?: ScrapeOptions;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export interface CrawlResponse {
|
|
200
|
+
id: string;
|
|
201
|
+
url: string;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export interface CrawlJob {
|
|
205
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
206
|
+
total: number;
|
|
207
|
+
completed: number;
|
|
208
|
+
creditsUsed?: number;
|
|
209
|
+
expiresAt?: string;
|
|
210
|
+
next?: string | null;
|
|
211
|
+
data: Document[];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export interface BatchScrapeResponse {
|
|
215
|
+
id: string;
|
|
216
|
+
url: string;
|
|
217
|
+
invalidURLs?: string[];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export interface BatchScrapeJob {
|
|
221
|
+
status: "scraping" | "completed" | "failed" | "cancelled";
|
|
222
|
+
completed: number;
|
|
223
|
+
total: number;
|
|
224
|
+
creditsUsed?: number;
|
|
225
|
+
expiresAt?: string;
|
|
226
|
+
next?: string | null;
|
|
227
|
+
data: Document[];
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export interface MapData {
|
|
231
|
+
links: SearchResult[];
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
export interface MapOptions {
|
|
235
|
+
search?: string;
|
|
236
|
+
sitemap?: "only" | "include" | "skip";
|
|
237
|
+
includeSubdomains?: boolean;
|
|
238
|
+
limit?: number;
|
|
239
|
+
timeout?: number;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export interface ExtractResponse {
|
|
243
|
+
success?: boolean;
|
|
244
|
+
id?: string;
|
|
245
|
+
status?: "processing" | "completed" | "failed" | "cancelled";
|
|
246
|
+
data?: unknown;
|
|
247
|
+
error?: string;
|
|
248
|
+
warning?: string;
|
|
249
|
+
sources?: Record<string, unknown>;
|
|
250
|
+
expiresAt?: string;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export interface ConcurrencyCheck {
|
|
254
|
+
concurrency: number;
|
|
255
|
+
maxConcurrency: number;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export interface CreditUsage {
|
|
259
|
+
remainingCredits: number;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
export interface TokenUsage {
|
|
263
|
+
remainingTokens: number;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
export interface CrawlErrorsResponse {
|
|
267
|
+
errors: {
|
|
268
|
+
id: string;
|
|
269
|
+
timestamp?: string;
|
|
270
|
+
url: string;
|
|
271
|
+
code?: string;
|
|
272
|
+
error: string;
|
|
273
|
+
}[];
|
|
274
|
+
robotsBlocked: string[];
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
export interface ActiveCrawl {
|
|
278
|
+
id: string;
|
|
279
|
+
teamId: string;
|
|
280
|
+
url: string;
|
|
281
|
+
options?: Record<string, unknown> | null;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
export interface ActiveCrawlsResponse {
|
|
285
|
+
success: boolean;
|
|
286
|
+
crawls: ActiveCrawl[];
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export interface ErrorDetails {
|
|
290
|
+
code?: string;
|
|
291
|
+
message: string;
|
|
292
|
+
details?: Record<string, unknown>;
|
|
293
|
+
status?: number;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
export class SdkError extends Error {
|
|
297
|
+
status?: number;
|
|
298
|
+
code?: string;
|
|
299
|
+
details?: unknown;
|
|
300
|
+
constructor(message: string, status?: number, code?: string, details?: unknown) {
|
|
301
|
+
super(message);
|
|
302
|
+
this.name = "FirecrawlSdkError";
|
|
303
|
+
this.status = status;
|
|
304
|
+
this.code = code;
|
|
305
|
+
this.details = details;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { type AxiosError, type AxiosResponse } from "axios";
|
|
2
|
+
import { SdkError } from "../types";
|
|
3
|
+
|
|
4
|
+
export function throwForBadResponse(resp: AxiosResponse, action: string): never {
|
|
5
|
+
const status = resp.status;
|
|
6
|
+
const body = resp.data || {};
|
|
7
|
+
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
|
|
8
|
+
throw new SdkError(msg, status, undefined, body?.details);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function normalizeAxiosError(err: AxiosError, action: string): never {
|
|
12
|
+
const status = err.response?.status;
|
|
13
|
+
const body: any = err.response?.data;
|
|
14
|
+
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
|
|
15
|
+
const code = (body?.code as string) || err.code;
|
|
16
|
+
throw new SdkError(message, status, code, body?.details ?? body);
|
|
17
|
+
}
|
|
18
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export function getVersion(): string {
|
|
2
|
+
try {
|
|
3
|
+
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
4
|
+
return process.env.npm_package_version as string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
8
|
+
const pkg = require("../../../package.json");
|
|
9
|
+
return (pkg?.version as string) || "3.x.x";
|
|
10
|
+
} catch {
|
|
11
|
+
return "3.x.x";
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import axios, { type AxiosInstance, type AxiosRequestConfig, type AxiosResponse } from "axios";
|
|
2
|
+
import { getVersion } from "./getVersion";
|
|
3
|
+
|
|
4
|
+
export interface HttpClientOptions {
|
|
5
|
+
apiKey: string;
|
|
6
|
+
apiUrl: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
maxRetries?: number;
|
|
9
|
+
backoffFactor?: number; // seconds factor for 0.5, 1, 2...
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export class HttpClient {
|
|
13
|
+
private instance: AxiosInstance;
|
|
14
|
+
private readonly apiKey: string;
|
|
15
|
+
private readonly apiUrl: string;
|
|
16
|
+
private readonly maxRetries: number;
|
|
17
|
+
private readonly backoffFactor: number;
|
|
18
|
+
|
|
19
|
+
constructor(options: HttpClientOptions) {
|
|
20
|
+
this.apiKey = options.apiKey;
|
|
21
|
+
this.apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
22
|
+
this.maxRetries = options.maxRetries ?? 3;
|
|
23
|
+
this.backoffFactor = options.backoffFactor ?? 0.5;
|
|
24
|
+
this.instance = axios.create({
|
|
25
|
+
baseURL: this.apiUrl,
|
|
26
|
+
timeout: options.timeoutMs ?? 60000,
|
|
27
|
+
headers: {
|
|
28
|
+
"Content-Type": "application/json",
|
|
29
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
30
|
+
},
|
|
31
|
+
transitional: { clarifyTimeoutError: true },
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
getApiUrl(): string {
|
|
36
|
+
return this.apiUrl;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
getApiKey(): string {
|
|
40
|
+
return this.apiKey;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
private async request<T = any>(config: AxiosRequestConfig): Promise<AxiosResponse<T>> {
|
|
44
|
+
const version = getVersion();
|
|
45
|
+
config.headers = {
|
|
46
|
+
...(config.headers || {}),
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
let lastError: any;
|
|
50
|
+
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
51
|
+
try {
|
|
52
|
+
const cfg: AxiosRequestConfig = { ...config };
|
|
53
|
+
// For POST/PUT, ensure origin is present in JSON body too
|
|
54
|
+
if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
|
|
55
|
+
const data = (cfg.data ?? {}) as Record<string, unknown>;
|
|
56
|
+
cfg.data = { ...data, origin: `js-sdk@${version}` };
|
|
57
|
+
}
|
|
58
|
+
const res = await this.instance.request<T>(cfg);
|
|
59
|
+
if (res.status === 502 && attempt < this.maxRetries - 1) {
|
|
60
|
+
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
return res;
|
|
64
|
+
} catch (err: any) {
|
|
65
|
+
lastError = err;
|
|
66
|
+
const status = err?.response?.status;
|
|
67
|
+
if (status === 502 && attempt < this.maxRetries - 1) {
|
|
68
|
+
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
throw err;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
throw lastError ?? new Error("Unexpected HTTP client error");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private sleep(seconds: number): Promise<void> {
|
|
78
|
+
return new Promise((r) => setTimeout(r, seconds * 1000));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>) {
|
|
82
|
+
return this.request<T>({ method: "post", url: endpoint, data: body, headers });
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
get<T = any>(endpoint: string, headers?: Record<string, string>) {
|
|
86
|
+
return this.request<T>({ method: "get", url: endpoint, headers });
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
delete<T = any>(endpoint: string, headers?: Record<string, string>) {
|
|
90
|
+
return this.request<T>({ method: "delete", url: endpoint, headers });
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
prepareHeaders(idempotencyKey?: string): Record<string, string> {
|
|
94
|
+
const headers: Record<string, string> = {};
|
|
95
|
+
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
|
|
96
|
+
return headers;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { type FormatOption, type JsonFormat, type ScrapeOptions, type ScreenshotFormat } from "../types";
|
|
2
|
+
import zodToJsonSchema from "zod-to-json-schema";
|
|
3
|
+
|
|
4
|
+
export function ensureValidFormats(formats?: FormatOption[]): void {
|
|
5
|
+
if (!formats) return;
|
|
6
|
+
for (const fmt of formats) {
|
|
7
|
+
if (typeof fmt === "string") {
|
|
8
|
+
if (fmt === "json") {
|
|
9
|
+
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
|
|
10
|
+
}
|
|
11
|
+
continue;
|
|
12
|
+
}
|
|
13
|
+
if ((fmt as JsonFormat).type === "json") {
|
|
14
|
+
const j = fmt as JsonFormat;
|
|
15
|
+
if (!j.prompt && !j.schema) {
|
|
16
|
+
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
|
|
17
|
+
}
|
|
18
|
+
// Flexibility: allow passing a Zod schema. Convert to JSON schema internally.
|
|
19
|
+
const maybeSchema: any = j.schema as any;
|
|
20
|
+
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
|
|
21
|
+
if (isZod) {
|
|
22
|
+
try {
|
|
23
|
+
(j as any).schema = zodToJsonSchema(maybeSchema);
|
|
24
|
+
} catch {
|
|
25
|
+
// If conversion fails, leave as-is; server-side may still handle, or request will fail explicitly
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if ((fmt as ScreenshotFormat).type === "screenshot") {
|
|
31
|
+
// no-op; already camelCase; validate numeric fields if present
|
|
32
|
+
const s = fmt as ScreenshotFormat;
|
|
33
|
+
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
|
|
34
|
+
throw new Error("screenshot.quality must be a non-negative number");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function ensureValidScrapeOptions(options?: ScrapeOptions): void {
|
|
41
|
+
if (!options) return;
|
|
42
|
+
if (options.timeout != null && options.timeout <= 0) {
|
|
43
|
+
throw new Error("timeout must be positive");
|
|
44
|
+
}
|
|
45
|
+
if (options.waitFor != null && options.waitFor < 0) {
|
|
46
|
+
throw new Error("waitFor must be non-negative");
|
|
47
|
+
}
|
|
48
|
+
ensureValidFormats(options.formats);
|
|
49
|
+
}
|
|
50
|
+
|