@upcrawl/sdk 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,7 +30,28 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ BatchScrapeOptionsSchema: () => BatchScrapeOptionsSchema,
34
+ BatchScrapeResponseSchema: () => BatchScrapeResponseSchema,
35
+ Browser: () => Browser,
36
+ BrowserSessionSchema: () => BrowserSessionSchema,
37
+ CreateBrowserSessionOptionsSchema: () => CreateBrowserSessionOptionsSchema,
38
+ ExecuteCodeOptionsSchema: () => ExecuteCodeOptionsSchema,
39
+ ExecuteCodeResponseSchema: () => ExecuteCodeResponseSchema,
40
+ GeneratePdfFromUrlOptionsSchema: () => GeneratePdfFromUrlOptionsSchema,
41
+ GeneratePdfOptionsSchema: () => GeneratePdfOptionsSchema,
42
+ PdfMarginSchema: () => PdfMarginSchema,
43
+ PdfResponseSchema: () => PdfResponseSchema,
44
+ ScrapeMetadataSchema: () => ScrapeMetadataSchema,
45
+ ScrapeOptionsSchema: () => ScrapeOptionsSchema,
46
+ ScrapeResponseSchema: () => ScrapeResponseSchema,
47
+ SearchOptionsSchema: () => SearchOptionsSchema,
48
+ SearchResponseSchema: () => SearchResponseSchema,
49
+ SearchResultItemSchema: () => SearchResultItemSchema,
50
+ SearchResultWebSchema: () => SearchResultWebSchema,
51
+ SummaryQuerySchema: () => SummaryQuerySchema,
52
+ UpcrawlConfigSchema: () => UpcrawlConfigSchema,
33
53
  UpcrawlError: () => UpcrawlError,
54
+ UpcrawlErrorResponseSchema: () => UpcrawlErrorResponseSchema,
34
55
  batchScrape: () => batchScrape,
35
56
  configure: () => configure,
36
57
  default: () => index_default,
@@ -48,6 +69,146 @@ __export(index_exports, {
48
69
  module.exports = __toCommonJS(index_exports);
49
70
 
50
71
  // src/types.ts
72
+ var import_zod = require("zod");
73
+ var UpcrawlConfigSchema = import_zod.z.object({
74
+ apiKey: import_zod.z.string().optional().describe("Your Upcrawl API key"),
75
+ baseUrl: import_zod.z.string().optional().describe("Custom API base URL"),
76
+ timeout: import_zod.z.number().optional().describe("Request timeout in milliseconds")
77
+ });
78
+ var SummaryQuerySchema = import_zod.z.object({
79
+ query: import_zod.z.string().describe("Query/instruction for content summarization")
80
+ });
81
+ var ScrapeOptionsSchema = import_zod.z.object({
82
+ url: import_zod.z.string().url().describe("URL to scrape (required)"),
83
+ type: import_zod.z.enum(["html", "markdown"]).optional().describe('Output format: html or markdown. Defaults to "html"'),
84
+ onlyMainContent: import_zod.z.boolean().optional().describe("Extract only main content (removes nav, ads, footers). Defaults to true"),
85
+ extractMetadata: import_zod.z.boolean().optional().describe("Whether to extract page metadata"),
86
+ summary: SummaryQuerySchema.optional().describe("Summary query for LLM summarization"),
87
+ timeoutMs: import_zod.z.number().optional().describe("Custom timeout in milliseconds (1000-120000)"),
88
+ waitUntil: import_zod.z.enum(["load", "domcontentloaded", "networkidle"]).optional().describe("Wait strategy for page load")
89
+ });
90
+ var ScrapeMetadataSchema = import_zod.z.object({
91
+ title: import_zod.z.string().optional(),
92
+ description: import_zod.z.string().optional(),
93
+ canonicalUrl: import_zod.z.string().optional(),
94
+ finalUrl: import_zod.z.string().optional(),
95
+ contentType: import_zod.z.string().optional(),
96
+ contentLength: import_zod.z.number().optional()
97
+ });
98
+ var ScrapeResponseSchema = import_zod.z.object({
99
+ url: import_zod.z.string().describe("Original URL that was scraped"),
100
+ html: import_zod.z.string().nullable().optional().describe("Rendered HTML content (when type is html)"),
101
+ markdown: import_zod.z.string().nullable().optional().describe("Content converted to Markdown (when type is markdown)"),
102
+ statusCode: import_zod.z.number().nullable().describe("HTTP status code"),
103
+ success: import_zod.z.boolean().describe("Whether scraping was successful"),
104
+ error: import_zod.z.string().optional().describe("Error message if scraping failed"),
105
+ timestamp: import_zod.z.string().describe("ISO timestamp when scraping completed"),
106
+ loadTimeMs: import_zod.z.number().describe("Time taken to load and render the page in milliseconds"),
107
+ metadata: ScrapeMetadataSchema.optional().describe("Additional page metadata"),
108
+ retryCount: import_zod.z.number().describe("Number of retry attempts made"),
109
+ cost: import_zod.z.number().optional().describe("Cost in USD for this scrape operation"),
110
+ content: import_zod.z.string().nullable().optional().describe("Content after summarization (when summary query provided)")
111
+ });
112
+ var BatchScrapeOptionsSchema = import_zod.z.object({
113
+ urls: import_zod.z.array(import_zod.z.union([import_zod.z.string(), ScrapeOptionsSchema])).describe("Array of URLs to scrape (strings or detailed request objects)"),
114
+ type: import_zod.z.enum(["html", "markdown"]).optional().describe("Output format: html or markdown"),
115
+ onlyMainContent: import_zod.z.boolean().optional().describe("Extract only main content (removes nav, ads, footers)"),
116
+ summary: SummaryQuerySchema.optional().describe("Summary query for LLM summarization"),
117
+ batchTimeoutMs: import_zod.z.number().optional().describe("Global timeout for entire batch operation in milliseconds (10000-600000)"),
118
+ failFast: import_zod.z.boolean().optional().describe("Whether to stop on first error")
119
+ });
120
+ var BatchScrapeResponseSchema = import_zod.z.object({
121
+ results: import_zod.z.array(ScrapeResponseSchema).describe("Array of scrape results"),
122
+ total: import_zod.z.number().describe("Total number of URLs processed"),
123
+ successful: import_zod.z.number().describe("Number of successful scrapes"),
124
+ failed: import_zod.z.number().describe("Number of failed scrapes"),
125
+ totalTimeMs: import_zod.z.number().describe("Total time taken for batch operation in milliseconds"),
126
+ timestamp: import_zod.z.string().describe("Timestamp when batch operation completed"),
127
+ cost: import_zod.z.number().optional().describe("Total cost in USD for all scrape operations")
128
+ });
129
+ var SearchOptionsSchema = import_zod.z.object({
130
+ queries: import_zod.z.array(import_zod.z.string()).describe("Array of search queries to execute (1-20)"),
131
+ limit: import_zod.z.number().optional().describe("Number of results per query (1-100). Defaults to 10"),
132
+ location: import_zod.z.string().optional().describe('Location for search (e.g., "IN", "US")'),
133
+ includeDomains: import_zod.z.array(import_zod.z.string()).optional().describe("Domains to include (will add site: to query)"),
134
+ excludeDomains: import_zod.z.array(import_zod.z.string()).optional().describe("Domains to exclude (will add -site: to query)")
135
+ });
136
+ var SearchResultWebSchema = import_zod.z.object({
137
+ url: import_zod.z.string().describe("URL of the search result"),
138
+ title: import_zod.z.string().describe("Title of the search result"),
139
+ description: import_zod.z.string().describe("Description/snippet of the search result")
140
+ });
141
+ var SearchResultItemSchema = import_zod.z.object({
142
+ query: import_zod.z.string().describe("The search query"),
143
+ success: import_zod.z.boolean().describe("Whether the search was successful"),
144
+ results: import_zod.z.array(SearchResultWebSchema).describe("Parsed search result links"),
145
+ error: import_zod.z.string().optional().describe("Error message if failed"),
146
+ loadTimeMs: import_zod.z.number().optional().describe("Time taken in milliseconds"),
147
+ cost: import_zod.z.number().optional().describe("Cost in USD for this query")
148
+ });
149
+ var SearchResponseSchema = import_zod.z.object({
150
+ results: import_zod.z.array(SearchResultItemSchema).describe("Array of search results per query"),
151
+ total: import_zod.z.number().describe("Total number of queries"),
152
+ successful: import_zod.z.number().describe("Number of successful searches"),
153
+ failed: import_zod.z.number().describe("Number of failed searches"),
154
+ totalTimeMs: import_zod.z.number().describe("Total time in milliseconds"),
155
+ timestamp: import_zod.z.string().describe("ISO timestamp"),
156
+ cost: import_zod.z.number().optional().describe("Total cost in USD")
157
+ });
158
+ var PdfMarginSchema = import_zod.z.object({
159
+ top: import_zod.z.string().optional(),
160
+ right: import_zod.z.string().optional(),
161
+ bottom: import_zod.z.string().optional(),
162
+ left: import_zod.z.string().optional()
163
+ });
164
+ var GeneratePdfOptionsSchema = import_zod.z.object({
165
+ html: import_zod.z.string().describe("Complete HTML content to convert to PDF (required)"),
166
+ title: import_zod.z.string().optional().describe("Title used for the exported filename"),
167
+ pageSize: import_zod.z.enum(["A4", "Letter", "Legal"]).optional().describe('Page size. Defaults to "A4"'),
168
+ landscape: import_zod.z.boolean().optional().describe("Landscape orientation. Defaults to false"),
169
+ margin: PdfMarginSchema.optional().describe('Page margins (e.g., { top: "20mm", right: "20mm", bottom: "20mm", left: "20mm" })'),
170
+ printBackground: import_zod.z.boolean().optional().describe("Print background graphics and colors. Defaults to true"),
171
+ skipChartWait: import_zod.z.boolean().optional().describe("Skip waiting for chart rendering signal. Defaults to false"),
172
+ timeoutMs: import_zod.z.number().optional().describe("Timeout in milliseconds (5000-120000). Defaults to 30000")
173
+ });
174
+ var GeneratePdfFromUrlOptionsSchema = import_zod.z.object({
175
+ url: import_zod.z.string().url().describe("URL to navigate to and convert to PDF (required)"),
176
+ title: import_zod.z.string().optional().describe("Title used for the exported filename"),
177
+ pageSize: import_zod.z.enum(["A4", "Letter", "Legal"]).optional().describe('Page size. Defaults to "A4"'),
178
+ landscape: import_zod.z.boolean().optional().describe("Landscape orientation. Defaults to false"),
179
+ margin: PdfMarginSchema.optional().describe("Page margins"),
180
+ printBackground: import_zod.z.boolean().optional().describe("Print background graphics and colors. Defaults to true"),
181
+ timeoutMs: import_zod.z.number().optional().describe("Timeout in milliseconds (5000-120000). Defaults to 30000")
182
+ });
183
+ var PdfResponseSchema = import_zod.z.object({
184
+ success: import_zod.z.boolean().describe("Whether PDF generation succeeded"),
185
+ url: import_zod.z.string().optional().describe("Public URL of the generated PDF"),
186
+ filename: import_zod.z.string().optional().describe("Generated filename"),
187
+ blobName: import_zod.z.string().optional().describe("Blob storage path"),
188
+ error: import_zod.z.string().optional().describe("Error message on failure"),
189
+ durationMs: import_zod.z.number().describe("Total time taken in milliseconds")
190
+ });
191
+ var ExecuteCodeOptionsSchema = import_zod.z.object({
192
+ code: import_zod.z.string().describe("Code to execute (required)"),
193
+ language: import_zod.z.enum(["python"]).optional().describe('Language runtime. Defaults to "python"')
194
+ });
195
+ var ExecuteCodeResponseSchema = import_zod.z.object({
196
+ stdout: import_zod.z.string().describe("Standard output from the executed code"),
197
+ stderr: import_zod.z.string().describe("Standard error from the executed code"),
198
+ exitCode: import_zod.z.number().describe("Process exit code (0 = success, 124 = timeout)"),
199
+ executionTimeMs: import_zod.z.number().describe("Execution time in milliseconds"),
200
+ timedOut: import_zod.z.boolean().describe("Whether execution was killed due to timeout"),
201
+ memoryUsageMb: import_zod.z.number().optional().describe("Peak memory usage in megabytes"),
202
+ error: import_zod.z.string().optional().describe("Error message if execution infrastructure failed"),
203
+ cost: import_zod.z.number().optional().describe("Cost in USD for this execution")
204
+ });
205
+ var UpcrawlErrorResponseSchema = import_zod.z.object({
206
+ error: import_zod.z.object({
207
+ code: import_zod.z.string(),
208
+ message: import_zod.z.string()
209
+ }),
210
+ statusCode: import_zod.z.number().optional()
211
+ });
51
212
  var UpcrawlError = class extends Error {
52
213
  constructor(message, status, code = "UNKNOWN_ERROR") {
53
214
  super(message);
@@ -56,6 +217,118 @@ var UpcrawlError = class extends Error {
56
217
  this.code = code;
57
218
  }
58
219
  };
220
+ var CreateBrowserSessionOptionsSchema = import_zod.z.object({
221
+ width: import_zod.z.number().optional().describe("Browser viewport width (800-3840). Defaults to 1280"),
222
+ height: import_zod.z.number().optional().describe("Browser viewport height (600-2160). Defaults to 720"),
223
+ headless: import_zod.z.boolean().optional().describe("Run browser in headless mode. Defaults to true")
224
+ });
225
+ var BrowserSessionSchema = import_zod.z.object({
226
+ sessionId: import_zod.z.string().describe("Unique session identifier"),
227
+ wsEndpoint: import_zod.z.string().describe("WebSocket URL for connecting with Playwright/Puppeteer"),
228
+ vncUrl: import_zod.z.string().nullable().describe("VNC URL for viewing the browser (if available)"),
229
+ affinityCookie: import_zod.z.string().optional().describe("Affinity cookie for sticky session routing (format: SCRAPER_AFFINITY=xxx) - extracted from response headers"),
230
+ createdAt: import_zod.z.date().describe("Session creation timestamp"),
231
+ width: import_zod.z.number().describe("Browser viewport width"),
232
+ height: import_zod.z.number().describe("Browser viewport height")
233
+ });
234
+
235
+ // src/browser.ts
236
+ var Browser = class {
237
+ constructor(createClient2) {
238
+ this.createClient = createClient2;
239
+ }
240
+ /**
241
+ * Create a new browser session for remote control
242
+ * @param options - Session options including viewport size and headless mode
243
+ * @returns Promise with session details including WebSocket URL
244
+ *
245
+ * @example
246
+ * ```typescript
247
+ * const session = await upcrawl.browser.create({
248
+ * width: 1280,
249
+ * height: 720,
250
+ * headless: true
251
+ * });
252
+ *
253
+ * console.log(session.wsEndpoint); // WebSocket URL for Playwright
254
+ * console.log(session.vncUrl); // VNC URL for viewing (if available)
255
+ * ```
256
+ */
257
+ async create(options) {
258
+ try {
259
+ const client = this.createClient();
260
+ const response = await client.post("/browser/session", {
261
+ width: options?.width ?? 1280,
262
+ height: options?.height ?? 720,
263
+ headless: options?.headless ?? true
264
+ });
265
+ const setCookieHeaders = response.headers["set-cookie"] || [];
266
+ const affinityCookie = setCookieHeaders.map((c) => c.split(";")[0]).find((c) => c.startsWith("SCRAPER_AFFINITY="));
267
+ return {
268
+ sessionId: response.data.sessionId,
269
+ wsEndpoint: response.data.wsUrl,
270
+ vncUrl: response.data.vncUrl,
271
+ affinityCookie: affinityCookie || void 0,
272
+ createdAt: new Date(response.data.createdAt),
273
+ width: response.data.width,
274
+ height: response.data.height
275
+ };
276
+ } catch (error) {
277
+ this.handleError(error);
278
+ }
279
+ }
280
+ /**
281
+ * Close a browser session
282
+ * @param sessionId - The session ID to close
283
+ * @returns Promise that resolves when session is closed
284
+ *
285
+ * @example
286
+ * ```typescript
287
+ * await upcrawl.browser.close(session.sessionId);
288
+ * ```
289
+ */
290
+ async close(sessionId) {
291
+ try {
292
+ const client = this.createClient();
293
+ await client.delete(`/browser/session/${sessionId}`);
294
+ } catch (error) {
295
+ this.handleError(error);
296
+ }
297
+ }
298
+ /**
299
+ * Handle API errors and convert to UpcrawlError
300
+ */
301
+ handleError(error) {
302
+ if (error && typeof error === "object" && "response" in error) {
303
+ const axiosError = error;
304
+ const status = axiosError.response?.status || 500;
305
+ const data = axiosError.response?.data;
306
+ if (data?.error) {
307
+ throw new UpcrawlError(data.error.message || "Unknown error", status, data.error.code || "UNKNOWN_ERROR");
308
+ }
309
+ switch (status) {
310
+ case 401:
311
+ throw new UpcrawlError("Invalid or missing API key", 401, "UNAUTHORIZED");
312
+ case 404:
313
+ throw new UpcrawlError("Session not found", 404, "NOT_FOUND");
314
+ case 429:
315
+ throw new UpcrawlError("Rate limit exceeded", 429, "RATE_LIMIT_EXCEEDED");
316
+ case 503:
317
+ throw new UpcrawlError("No browser slots available", 503, "SERVICE_UNAVAILABLE");
318
+ default:
319
+ throw new UpcrawlError(axiosError.message || "An unknown error occurred", status, "UNKNOWN_ERROR");
320
+ }
321
+ }
322
+ if (error instanceof UpcrawlError) {
323
+ throw error;
324
+ }
325
+ throw new UpcrawlError(
326
+ error instanceof Error ? error.message : "An unknown error occurred",
327
+ 500,
328
+ "UNKNOWN_ERROR"
329
+ );
330
+ }
331
+ };
59
332
 
60
333
  // src/client.ts
61
334
  var import_axios = __toESM(require("axios"));
@@ -279,6 +552,23 @@ var Upcrawl = {
279
552
  * @returns Promise with execution response (stdout, stderr, exit code, memory usage)
280
553
  */
281
554
  executeCode,
555
+ /**
556
+ * Browser session management
557
+ * Create and manage browser sessions for remote control with Playwright/Puppeteer
558
+ *
559
+ * @example
560
+ * ```typescript
561
+ * const session = await Upcrawl.browser.create({
562
+ * width: 1280,
563
+ * height: 720,
564
+ * headless: true
565
+ * });
566
+ *
567
+ * const browser = await playwright.connect(session.wsEndpoint);
568
+ * await upcrawl.browser.close(session.sessionId);
569
+ * ```
570
+ */
571
+ browser: new Browser(createClient),
282
572
  /**
283
573
  * Error class for Upcrawl API errors
284
574
  */
@@ -287,7 +577,28 @@ var Upcrawl = {
287
577
  var index_default = Upcrawl;
288
578
  // Annotate the CommonJS export names for ESM import in node:
289
579
  0 && (module.exports = {
580
+ BatchScrapeOptionsSchema,
581
+ BatchScrapeResponseSchema,
582
+ Browser,
583
+ BrowserSessionSchema,
584
+ CreateBrowserSessionOptionsSchema,
585
+ ExecuteCodeOptionsSchema,
586
+ ExecuteCodeResponseSchema,
587
+ GeneratePdfFromUrlOptionsSchema,
588
+ GeneratePdfOptionsSchema,
589
+ PdfMarginSchema,
590
+ PdfResponseSchema,
591
+ ScrapeMetadataSchema,
592
+ ScrapeOptionsSchema,
593
+ ScrapeResponseSchema,
594
+ SearchOptionsSchema,
595
+ SearchResponseSchema,
596
+ SearchResultItemSchema,
597
+ SearchResultWebSchema,
598
+ SummaryQuerySchema,
599
+ UpcrawlConfigSchema,
290
600
  UpcrawlError,
601
+ UpcrawlErrorResponseSchema,
291
602
  batchScrape,
292
603
  configure,
293
604
  executeCode,
package/dist/index.mjs CHANGED
@@ -1,4 +1,144 @@
1
1
  // src/types.ts
2
+ import { z } from "zod";
3
+ var UpcrawlConfigSchema = z.object({
4
+ apiKey: z.string().optional().describe("Your Upcrawl API key"),
5
+ baseUrl: z.string().optional().describe("Custom API base URL"),
6
+ timeout: z.number().optional().describe("Request timeout in milliseconds")
7
+ });
8
+ var SummaryQuerySchema = z.object({
9
+ query: z.string().describe("Query/instruction for content summarization")
10
+ });
11
+ var ScrapeOptionsSchema = z.object({
12
+ url: z.string().url().describe("URL to scrape (required)"),
13
+ type: z.enum(["html", "markdown"]).optional().describe('Output format: html or markdown. Defaults to "html"'),
14
+ onlyMainContent: z.boolean().optional().describe("Extract only main content (removes nav, ads, footers). Defaults to true"),
15
+ extractMetadata: z.boolean().optional().describe("Whether to extract page metadata"),
16
+ summary: SummaryQuerySchema.optional().describe("Summary query for LLM summarization"),
17
+ timeoutMs: z.number().optional().describe("Custom timeout in milliseconds (1000-120000)"),
18
+ waitUntil: z.enum(["load", "domcontentloaded", "networkidle"]).optional().describe("Wait strategy for page load")
19
+ });
20
+ var ScrapeMetadataSchema = z.object({
21
+ title: z.string().optional(),
22
+ description: z.string().optional(),
23
+ canonicalUrl: z.string().optional(),
24
+ finalUrl: z.string().optional(),
25
+ contentType: z.string().optional(),
26
+ contentLength: z.number().optional()
27
+ });
28
+ var ScrapeResponseSchema = z.object({
29
+ url: z.string().describe("Original URL that was scraped"),
30
+ html: z.string().nullable().optional().describe("Rendered HTML content (when type is html)"),
31
+ markdown: z.string().nullable().optional().describe("Content converted to Markdown (when type is markdown)"),
32
+ statusCode: z.number().nullable().describe("HTTP status code"),
33
+ success: z.boolean().describe("Whether scraping was successful"),
34
+ error: z.string().optional().describe("Error message if scraping failed"),
35
+ timestamp: z.string().describe("ISO timestamp when scraping completed"),
36
+ loadTimeMs: z.number().describe("Time taken to load and render the page in milliseconds"),
37
+ metadata: ScrapeMetadataSchema.optional().describe("Additional page metadata"),
38
+ retryCount: z.number().describe("Number of retry attempts made"),
39
+ cost: z.number().optional().describe("Cost in USD for this scrape operation"),
40
+ content: z.string().nullable().optional().describe("Content after summarization (when summary query provided)")
41
+ });
42
+ var BatchScrapeOptionsSchema = z.object({
43
+ urls: z.array(z.union([z.string(), ScrapeOptionsSchema])).describe("Array of URLs to scrape (strings or detailed request objects)"),
44
+ type: z.enum(["html", "markdown"]).optional().describe("Output format: html or markdown"),
45
+ onlyMainContent: z.boolean().optional().describe("Extract only main content (removes nav, ads, footers)"),
46
+ summary: SummaryQuerySchema.optional().describe("Summary query for LLM summarization"),
47
+ batchTimeoutMs: z.number().optional().describe("Global timeout for entire batch operation in milliseconds (10000-600000)"),
48
+ failFast: z.boolean().optional().describe("Whether to stop on first error")
49
+ });
50
+ var BatchScrapeResponseSchema = z.object({
51
+ results: z.array(ScrapeResponseSchema).describe("Array of scrape results"),
52
+ total: z.number().describe("Total number of URLs processed"),
53
+ successful: z.number().describe("Number of successful scrapes"),
54
+ failed: z.number().describe("Number of failed scrapes"),
55
+ totalTimeMs: z.number().describe("Total time taken for batch operation in milliseconds"),
56
+ timestamp: z.string().describe("Timestamp when batch operation completed"),
57
+ cost: z.number().optional().describe("Total cost in USD for all scrape operations")
58
+ });
59
+ var SearchOptionsSchema = z.object({
60
+ queries: z.array(z.string()).describe("Array of search queries to execute (1-20)"),
61
+ limit: z.number().optional().describe("Number of results per query (1-100). Defaults to 10"),
62
+ location: z.string().optional().describe('Location for search (e.g., "IN", "US")'),
63
+ includeDomains: z.array(z.string()).optional().describe("Domains to include (will add site: to query)"),
64
+ excludeDomains: z.array(z.string()).optional().describe("Domains to exclude (will add -site: to query)")
65
+ });
66
+ var SearchResultWebSchema = z.object({
67
+ url: z.string().describe("URL of the search result"),
68
+ title: z.string().describe("Title of the search result"),
69
+ description: z.string().describe("Description/snippet of the search result")
70
+ });
71
+ var SearchResultItemSchema = z.object({
72
+ query: z.string().describe("The search query"),
73
+ success: z.boolean().describe("Whether the search was successful"),
74
+ results: z.array(SearchResultWebSchema).describe("Parsed search result links"),
75
+ error: z.string().optional().describe("Error message if failed"),
76
+ loadTimeMs: z.number().optional().describe("Time taken in milliseconds"),
77
+ cost: z.number().optional().describe("Cost in USD for this query")
78
+ });
79
+ var SearchResponseSchema = z.object({
80
+ results: z.array(SearchResultItemSchema).describe("Array of search results per query"),
81
+ total: z.number().describe("Total number of queries"),
82
+ successful: z.number().describe("Number of successful searches"),
83
+ failed: z.number().describe("Number of failed searches"),
84
+ totalTimeMs: z.number().describe("Total time in milliseconds"),
85
+ timestamp: z.string().describe("ISO timestamp"),
86
+ cost: z.number().optional().describe("Total cost in USD")
87
+ });
88
+ var PdfMarginSchema = z.object({
89
+ top: z.string().optional(),
90
+ right: z.string().optional(),
91
+ bottom: z.string().optional(),
92
+ left: z.string().optional()
93
+ });
94
+ var GeneratePdfOptionsSchema = z.object({
95
+ html: z.string().describe("Complete HTML content to convert to PDF (required)"),
96
+ title: z.string().optional().describe("Title used for the exported filename"),
97
+ pageSize: z.enum(["A4", "Letter", "Legal"]).optional().describe('Page size. Defaults to "A4"'),
98
+ landscape: z.boolean().optional().describe("Landscape orientation. Defaults to false"),
99
+ margin: PdfMarginSchema.optional().describe('Page margins (e.g., { top: "20mm", right: "20mm", bottom: "20mm", left: "20mm" })'),
100
+ printBackground: z.boolean().optional().describe("Print background graphics and colors. Defaults to true"),
101
+ skipChartWait: z.boolean().optional().describe("Skip waiting for chart rendering signal. Defaults to false"),
102
+ timeoutMs: z.number().optional().describe("Timeout in milliseconds (5000-120000). Defaults to 30000")
103
+ });
104
+ var GeneratePdfFromUrlOptionsSchema = z.object({
105
+ url: z.string().url().describe("URL to navigate to and convert to PDF (required)"),
106
+ title: z.string().optional().describe("Title used for the exported filename"),
107
+ pageSize: z.enum(["A4", "Letter", "Legal"]).optional().describe('Page size. Defaults to "A4"'),
108
+ landscape: z.boolean().optional().describe("Landscape orientation. Defaults to false"),
109
+ margin: PdfMarginSchema.optional().describe("Page margins"),
110
+ printBackground: z.boolean().optional().describe("Print background graphics and colors. Defaults to true"),
111
+ timeoutMs: z.number().optional().describe("Timeout in milliseconds (5000-120000). Defaults to 30000")
112
+ });
113
+ var PdfResponseSchema = z.object({
114
+ success: z.boolean().describe("Whether PDF generation succeeded"),
115
+ url: z.string().optional().describe("Public URL of the generated PDF"),
116
+ filename: z.string().optional().describe("Generated filename"),
117
+ blobName: z.string().optional().describe("Blob storage path"),
118
+ error: z.string().optional().describe("Error message on failure"),
119
+ durationMs: z.number().describe("Total time taken in milliseconds")
120
+ });
121
+ var ExecuteCodeOptionsSchema = z.object({
122
+ code: z.string().describe("Code to execute (required)"),
123
+ language: z.enum(["python"]).optional().describe('Language runtime. Defaults to "python"')
124
+ });
125
+ var ExecuteCodeResponseSchema = z.object({
126
+ stdout: z.string().describe("Standard output from the executed code"),
127
+ stderr: z.string().describe("Standard error from the executed code"),
128
+ exitCode: z.number().describe("Process exit code (0 = success, 124 = timeout)"),
129
+ executionTimeMs: z.number().describe("Execution time in milliseconds"),
130
+ timedOut: z.boolean().describe("Whether execution was killed due to timeout"),
131
+ memoryUsageMb: z.number().optional().describe("Peak memory usage in megabytes"),
132
+ error: z.string().optional().describe("Error message if execution infrastructure failed"),
133
+ cost: z.number().optional().describe("Cost in USD for this execution")
134
+ });
135
+ var UpcrawlErrorResponseSchema = z.object({
136
+ error: z.object({
137
+ code: z.string(),
138
+ message: z.string()
139
+ }),
140
+ statusCode: z.number().optional()
141
+ });
2
142
  var UpcrawlError = class extends Error {
3
143
  constructor(message, status, code = "UNKNOWN_ERROR") {
4
144
  super(message);
@@ -7,6 +147,118 @@ var UpcrawlError = class extends Error {
7
147
  this.code = code;
8
148
  }
9
149
  };
150
+ var CreateBrowserSessionOptionsSchema = z.object({
151
+ width: z.number().optional().describe("Browser viewport width (800-3840). Defaults to 1280"),
152
+ height: z.number().optional().describe("Browser viewport height (600-2160). Defaults to 720"),
153
+ headless: z.boolean().optional().describe("Run browser in headless mode. Defaults to true")
154
+ });
155
+ var BrowserSessionSchema = z.object({
156
+ sessionId: z.string().describe("Unique session identifier"),
157
+ wsEndpoint: z.string().describe("WebSocket URL for connecting with Playwright/Puppeteer"),
158
+ vncUrl: z.string().nullable().describe("VNC URL for viewing the browser (if available)"),
159
+ affinityCookie: z.string().optional().describe("Affinity cookie for sticky session routing (format: SCRAPER_AFFINITY=xxx) - extracted from response headers"),
160
+ createdAt: z.date().describe("Session creation timestamp"),
161
+ width: z.number().describe("Browser viewport width"),
162
+ height: z.number().describe("Browser viewport height")
163
+ });
164
+
165
+ // src/browser.ts
166
+ var Browser = class {
167
+ constructor(createClient2) {
168
+ this.createClient = createClient2;
169
+ }
170
+ /**
171
+ * Create a new browser session for remote control
172
+ * @param options - Session options including viewport size and headless mode
173
+ * @returns Promise with session details including WebSocket URL
174
+ *
175
+ * @example
176
+ * ```typescript
177
+ * const session = await upcrawl.browser.create({
178
+ * width: 1280,
179
+ * height: 720,
180
+ * headless: true
181
+ * });
182
+ *
183
+ * console.log(session.wsEndpoint); // WebSocket URL for Playwright
184
+ * console.log(session.vncUrl); // VNC URL for viewing (if available)
185
+ * ```
186
+ */
187
+ async create(options) {
188
+ try {
189
+ const client = this.createClient();
190
+ const response = await client.post("/browser/session", {
191
+ width: options?.width ?? 1280,
192
+ height: options?.height ?? 720,
193
+ headless: options?.headless ?? true
194
+ });
195
+ const setCookieHeaders = response.headers["set-cookie"] || [];
196
+ const affinityCookie = setCookieHeaders.map((c) => c.split(";")[0]).find((c) => c.startsWith("SCRAPER_AFFINITY="));
197
+ return {
198
+ sessionId: response.data.sessionId,
199
+ wsEndpoint: response.data.wsUrl,
200
+ vncUrl: response.data.vncUrl,
201
+ affinityCookie: affinityCookie || void 0,
202
+ createdAt: new Date(response.data.createdAt),
203
+ width: response.data.width,
204
+ height: response.data.height
205
+ };
206
+ } catch (error) {
207
+ this.handleError(error);
208
+ }
209
+ }
210
+ /**
211
+ * Close a browser session
212
+ * @param sessionId - The session ID to close
213
+ * @returns Promise that resolves when session is closed
214
+ *
215
+ * @example
216
+ * ```typescript
217
+ * await upcrawl.browser.close(session.sessionId);
218
+ * ```
219
+ */
220
+ async close(sessionId) {
221
+ try {
222
+ const client = this.createClient();
223
+ await client.delete(`/browser/session/${sessionId}`);
224
+ } catch (error) {
225
+ this.handleError(error);
226
+ }
227
+ }
228
+ /**
229
+ * Handle API errors and convert to UpcrawlError
230
+ */
231
+ handleError(error) {
232
+ if (error && typeof error === "object" && "response" in error) {
233
+ const axiosError = error;
234
+ const status = axiosError.response?.status || 500;
235
+ const data = axiosError.response?.data;
236
+ if (data?.error) {
237
+ throw new UpcrawlError(data.error.message || "Unknown error", status, data.error.code || "UNKNOWN_ERROR");
238
+ }
239
+ switch (status) {
240
+ case 401:
241
+ throw new UpcrawlError("Invalid or missing API key", 401, "UNAUTHORIZED");
242
+ case 404:
243
+ throw new UpcrawlError("Session not found", 404, "NOT_FOUND");
244
+ case 429:
245
+ throw new UpcrawlError("Rate limit exceeded", 429, "RATE_LIMIT_EXCEEDED");
246
+ case 503:
247
+ throw new UpcrawlError("No browser slots available", 503, "SERVICE_UNAVAILABLE");
248
+ default:
249
+ throw new UpcrawlError(axiosError.message || "An unknown error occurred", status, "UNKNOWN_ERROR");
250
+ }
251
+ }
252
+ if (error instanceof UpcrawlError) {
253
+ throw error;
254
+ }
255
+ throw new UpcrawlError(
256
+ error instanceof Error ? error.message : "An unknown error occurred",
257
+ 500,
258
+ "UNKNOWN_ERROR"
259
+ );
260
+ }
261
+ };
10
262
 
11
263
  // src/client.ts
12
264
  import axios from "axios";
@@ -230,6 +482,23 @@ var Upcrawl = {
230
482
  * @returns Promise with execution response (stdout, stderr, exit code, memory usage)
231
483
  */
232
484
  executeCode,
485
+ /**
486
+ * Browser session management
487
+ * Create and manage browser sessions for remote control with Playwright/Puppeteer
488
+ *
489
+ * @example
490
+ * ```typescript
491
+ * const session = await Upcrawl.browser.create({
492
+ * width: 1280,
493
+ * height: 720,
494
+ * headless: true
495
+ * });
496
+ *
497
+ * const browser = await playwright.connect(session.wsEndpoint);
498
+ * await upcrawl.browser.close(session.sessionId);
499
+ * ```
500
+ */
501
+ browser: new Browser(createClient),
233
502
  /**
234
503
  * Error class for Upcrawl API errors
235
504
  */
@@ -237,7 +506,28 @@ var Upcrawl = {
237
506
  };
238
507
  var index_default = Upcrawl;
239
508
  export {
509
+ BatchScrapeOptionsSchema,
510
+ BatchScrapeResponseSchema,
511
+ Browser,
512
+ BrowserSessionSchema,
513
+ CreateBrowserSessionOptionsSchema,
514
+ ExecuteCodeOptionsSchema,
515
+ ExecuteCodeResponseSchema,
516
+ GeneratePdfFromUrlOptionsSchema,
517
+ GeneratePdfOptionsSchema,
518
+ PdfMarginSchema,
519
+ PdfResponseSchema,
520
+ ScrapeMetadataSchema,
521
+ ScrapeOptionsSchema,
522
+ ScrapeResponseSchema,
523
+ SearchOptionsSchema,
524
+ SearchResponseSchema,
525
+ SearchResultItemSchema,
526
+ SearchResultWebSchema,
527
+ SummaryQuerySchema,
528
+ UpcrawlConfigSchema,
240
529
  UpcrawlError,
530
+ UpcrawlErrorResponseSchema,
241
531
  batchScrape,
242
532
  configure,
243
533
  index_default as default,