crawl4ai 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,8 +10,10 @@ A type-safe TypeScript SDK for the Crawl4AI REST API. Built for modern JavaScrip
10
10
  - **Comprehensive Coverage** - All Crawl4AI endpoints including specialized features
11
11
  - **Smart Error Handling** - Custom error classes with retry logic and timeouts
12
12
  - **Batch Processing** - Efficiently crawl multiple URLs in a single request
13
+ - **Streaming Results** - NDJSON streaming for long-running crawls
13
14
  - **Input Validation** - Built-in URL validation and parameter checking
14
15
  - **Debug Mode** - Optional request/response logging for development
16
+ - **MCP Endpoints** - Model Context Protocol schema + SSE support
15
17
  - **Zero Dependencies** - Uses only native fetch API
16
18
 
17
19
  ## 📦 Installation
@@ -37,6 +39,8 @@ yarn add crawl4ai
37
39
  - **Official Project**: [https://github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)
38
40
  - **Official Documentation**: [https://docs.crawl4ai.com/](https://docs.crawl4ai.com/)
39
41
 
42
+ **Compatibility note:** SDK updated against Crawl4AI **v0.8.0** (January 18, 2026).
43
+
40
44
 
41
45
  ## 🏗️ Prerequisites
42
46
 
@@ -90,7 +94,9 @@ const results = await client.crawl({
90
94
 
91
95
  const result = results[0]; // API returns array of results
92
96
  console.log('Title:', result.metadata?.title);
93
- console.log('Content:', result.markdown?.slice(0, 200));
97
+ const markdown =
98
+ typeof result.markdown === 'string' ? result.markdown : result.markdown?.raw_markdown;
99
+ console.log('Content:', markdown?.slice(0, 200));
94
100
  ```
95
101
 
96
102
  ### Configuration Options
@@ -135,6 +141,30 @@ const results = await client.crawl({
135
141
  });
136
142
  ```
137
143
 
144
+ Need server metadata (processing time, memory stats, etc.)? Use `crawlWithMetadata`:
145
+
146
+ ```typescript
147
+ const response = await client.crawlWithMetadata({
148
+ urls: ['https://example.com']
149
+ });
150
+
151
+ console.log(response.success);
152
+ console.log(response.server_processing_time_s);
153
+ ```
154
+
155
+ #### `crawlStream(request)` - Stream Crawl Results (NDJSON)
156
+ Stream results as they arrive:
157
+
158
+ ```typescript
159
+ for await (const chunk of client.crawlStream({ urls: ['https://example.com'] })) {
160
+ if ('status' in chunk) {
161
+ console.log('Stream status:', chunk.status);
162
+ } else {
163
+ console.log('Crawled:', chunk.url);
164
+ }
165
+ }
166
+ ```
167
+
138
168
 
139
169
  ### Content Generation
140
170
 
@@ -144,11 +174,21 @@ Extract markdown with various filters:
144
174
  ```typescript
145
175
  const markdown = await client.markdown({
146
176
  url: 'https://example.com',
147
- f: 'fit', // 'raw' | 'fit' | 'bm25' | 'llm'
148
- q: 'search query for bm25/llm filters'
177
+ filter: 'fit', // 'raw' | 'fit' | 'bm25' | 'llm'
178
+ query: 'search query for bm25/llm filters'
149
179
  });
150
180
  ```
151
181
 
182
+ Want the full response metadata? Use `markdownResult`:
183
+
184
+ ```typescript
185
+ const markdownResult = await client.markdownResult({
186
+ url: 'https://example.com',
187
+ filter: 'fit'
188
+ });
189
+ console.log(markdownResult.success, markdownResult.cache);
190
+ ```
191
+
152
192
  #### `html(request)` - Get Processed HTML
153
193
  Get sanitized HTML for schema extraction:
154
194
 
@@ -158,6 +198,13 @@ const html = await client.html({
158
198
  });
159
199
  ```
160
200
 
201
+ Full response metadata via `htmlResult`:
202
+
203
+ ```typescript
204
+ const htmlResult = await client.htmlResult({ url: 'https://example.com' });
205
+ console.log(htmlResult.success);
206
+ ```
207
+
161
208
  #### `screenshot(request)` - Capture Screenshot
162
209
  Capture full-page screenshots:
163
210
 
@@ -169,6 +216,8 @@ const screenshotBase64 = await client.screenshot({
169
216
  });
170
217
  ```
171
218
 
219
+ For response metadata use `screenshotResult()`.
220
+
172
221
  #### `pdf(request)` - Generate PDF
173
222
  Generate PDF documents:
174
223
 
@@ -179,6 +228,8 @@ const pdfData = await client.pdf({
179
228
  });
180
229
  ```
181
230
 
231
+ For response metadata use `pdfResult()`.
232
+
182
233
  ### JavaScript Execution
183
234
 
184
235
  #### `executeJs(request)` - Run JavaScript
@@ -239,6 +290,16 @@ const version = await client.version({ throwOnError: true });
239
290
  // Get Prometheus metrics
240
291
  const metrics = await client.metrics();
241
292
 
293
+ // Request an API token
294
+ const token = await client.token({ email: 'user@example.com' });
295
+
296
+ // Dump server configuration (advanced)
297
+ const configDump = await client.configDump({ code: 'CrawlerRunConfig()' });
298
+
299
+ // MCP schema + SSE (advanced integrations)
300
+ const mcpSchema = await client.mcpSchema();
301
+ const mcpStream = await client.mcpSse();
302
+
242
303
  // Update configuration
243
304
  client.setApiToken('new_token');
244
305
  client.setBaseUrl('https://new-url.com');
@@ -345,7 +406,7 @@ const results = await client.crawl({
345
406
  The SDK provides custom error handling with detailed information:
346
407
 
347
408
  ```typescript
348
- import { Crawl4AIError } from 'crawl4ai-sdk';
409
+ import { Crawl4AIError } from 'crawl4ai';
349
410
 
350
411
  try {
351
412
  const results = await client.crawl({ urls: 'https://example.com' });
@@ -426,4 +487,4 @@ This SDK is released under the MIT License.
426
487
 
427
488
  ## 🙏 Acknowledgments
428
489
 
429
- Built for the amazing [Crawl4AI](https://github.com/unclecode/crawl4ai) project by [@unclecode](https://github.com/unclecode) and the Crawl4AI community.
490
+ Built for the amazing [Crawl4AI](https://github.com/unclecode/crawl4ai) project by [@unclecode](https://github.com/unclecode) and the Crawl4AI community.
package/dist/index.js CHANGED
@@ -233,6 +233,21 @@ class Crawl4AI {
233
233
  }
234
234
  return [response];
235
235
  }
236
+ normalizeCrawlResponse(response) {
237
+ if (Array.isArray(response)) {
238
+ return { results: response };
239
+ }
240
+ if (typeof response === "object" && response !== null) {
241
+ const responseObject = response;
242
+ if (Array.isArray(responseObject.results)) {
243
+ return responseObject;
244
+ }
245
+ if (Array.isArray(responseObject.result)) {
246
+ return { results: responseObject.result };
247
+ }
248
+ }
249
+ return { results: [response] };
250
+ }
236
251
  buildQueryParams(params) {
237
252
  const searchParams = new URLSearchParams;
238
253
  for (const [key, value] of Object.entries(params)) {
@@ -242,6 +257,19 @@ class Crawl4AI {
242
257
  }
243
258
  return searchParams.toString();
244
259
  }
260
+ async parseResponseBody(response) {
261
+ const contentType = response.headers.get("content-type") || "";
262
+ if (contentType.includes("application/json")) {
263
+ return response.json();
264
+ }
265
+ if (contentType.includes("text/html") || contentType.includes("text/plain")) {
266
+ return response.text();
267
+ }
268
+ if (contentType.includes("text/event-stream") || contentType.includes("application/x-ndjson")) {
269
+ return response;
270
+ }
271
+ return response.text();
272
+ }
245
273
  async request(endpoint, options = {}) {
246
274
  const url = `${this.config.baseUrl}${endpoint}`;
247
275
  const { timeout = this.config.timeout, signal, headers, ...fetchOptions } = options;
@@ -260,17 +288,7 @@ class Crawl4AI {
260
288
  signal: requestSignal
261
289
  });
262
290
  clearTimeout(timeoutId);
263
- const contentType = response.headers.get("content-type") || "";
264
- let responseData;
265
- if (contentType.includes("application/json")) {
266
- responseData = await response.json();
267
- } else if (contentType.includes("text/html") || contentType.includes("text/plain")) {
268
- responseData = await response.text();
269
- } else if (contentType.includes("text/event-stream")) {
270
- return response;
271
- } else {
272
- responseData = await response.text();
273
- }
291
+ const responseData = await this.parseResponseBody(response);
274
292
  this.log(`Response: ${response.status}`, responseData);
275
293
  if (!this.config.validateStatus(response.status)) {
276
294
  const headers2 = {};
@@ -300,6 +318,53 @@ class Crawl4AI {
300
318
  throw error;
301
319
  }
302
320
  }
321
+ async requestRaw(endpoint, options = {}) {
322
+ const url = `${this.config.baseUrl}${endpoint}`;
323
+ const { timeout = this.config.timeout, signal, headers, ...fetchOptions } = options;
324
+ this.log(`Request: ${fetchOptions.method || "GET"} ${url}`, fetchOptions.body);
325
+ const requestHeaders = {
326
+ ...this.config.defaultHeaders,
327
+ ...headers
328
+ };
329
+ const controller = new AbortController;
330
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
331
+ const requestSignal = signal || controller.signal;
332
+ try {
333
+ const response = await fetch(url, {
334
+ ...fetchOptions,
335
+ headers: requestHeaders,
336
+ signal: requestSignal
337
+ });
338
+ clearTimeout(timeoutId);
339
+ if (!this.config.validateStatus(response.status)) {
340
+ const responseData = await this.parseResponseBody(response);
341
+ const headers2 = {};
342
+ response.headers.forEach((value, key) => {
343
+ headers2[key] = value;
344
+ });
345
+ const error = createHttpError(response.status, response.statusText, undefined, responseData, headers2);
346
+ error.request = {
347
+ url,
348
+ method: fetchOptions.method || "GET",
349
+ headers: requestHeaders,
350
+ body: fetchOptions.body
351
+ };
352
+ if (this.config.throwOnError) {
353
+ throw error;
354
+ }
355
+ }
356
+ return response;
357
+ } catch (error) {
358
+ clearTimeout(timeoutId);
359
+ if (error instanceof Error && error.name === "AbortError") {
360
+ throw new TimeoutError(timeout, url);
361
+ }
362
+ if (error instanceof TypeError && error.message.includes("fetch")) {
363
+ throw new NetworkError(`Network request failed: ${error.message}`, error);
364
+ }
365
+ throw error;
366
+ }
367
+ }
303
368
  async requestWithRetry(endpoint, options = {}) {
304
369
  let lastError = new Error("No attempts made");
305
370
  for (let attempt = 0;attempt <= this.config.retries; attempt++) {
@@ -324,6 +389,30 @@ class Crawl4AI {
324
389
  }
325
390
  throw lastError;
326
391
  }
392
+ async requestRawWithRetry(endpoint, options = {}) {
393
+ let lastError = new Error("No attempts made");
394
+ for (let attempt = 0;attempt <= this.config.retries; attempt++) {
395
+ try {
396
+ return await this.requestRaw(endpoint, options);
397
+ } catch (error) {
398
+ lastError = error;
399
+ if (error instanceof Crawl4AIError && error.status && error.status >= CLIENT_ERROR_MIN && error.status < CLIENT_ERROR_MAX && error.status !== RATE_LIMIT_STATUS) {
400
+ throw error;
401
+ }
402
+ if (attempt < this.config.retries) {
403
+ let delay = this.config.retryDelay * RETRY_BACKOFF_MULTIPLIER ** attempt;
404
+ if (error instanceof RateLimitError && error.retryAfter) {
405
+ delay = error.retryAfter * 1000;
406
+ this.log(`Rate limited. Waiting ${error.retryAfter}s before retry (attempt ${attempt + 1}/${this.config.retries})`);
407
+ } else {
408
+ this.log(`Retry attempt ${attempt + 1}/${this.config.retries} after ${delay}ms`);
409
+ }
410
+ await new Promise((resolve) => setTimeout(resolve, delay));
411
+ }
412
+ }
413
+ }
414
+ throw lastError;
415
+ }
327
416
  async crawl(request, config) {
328
417
  const urls = Array.isArray(request.urls) ? request.urls : [request.urls];
329
418
  for (const url of urls) {
@@ -340,23 +429,131 @@ class Crawl4AI {
340
429
  });
341
430
  return this.normalizeArrayResponse(response);
342
431
  }
432
+ async crawlWithMetadata(request, config) {
433
+ const urls = Array.isArray(request.urls) ? request.urls : [request.urls];
434
+ for (const url of urls) {
435
+ this.validateUrl(url);
436
+ }
437
+ const normalizedRequest = {
438
+ ...request,
439
+ urls
440
+ };
441
+ const response = await this.requestWithRetry("/crawl", {
442
+ method: "POST",
443
+ body: JSON.stringify(normalizedRequest),
444
+ ...config
445
+ });
446
+ return this.normalizeCrawlResponse(response);
447
+ }
448
+ async* crawlStream(request, config) {
449
+ const urls = Array.isArray(request.urls) ? request.urls : [request.urls];
450
+ for (const url of urls) {
451
+ this.validateUrl(url);
452
+ }
453
+ const normalizedRequest = {
454
+ ...request,
455
+ urls
456
+ };
457
+ const response = await this.requestRawWithRetry("/crawl/stream", {
458
+ method: "POST",
459
+ body: JSON.stringify(normalizedRequest),
460
+ ...config
461
+ });
462
+ if (!response.body) {
463
+ throw new NetworkError("Streaming response body is not available");
464
+ }
465
+ const reader = response.body.getReader();
466
+ const decoder = new TextDecoder;
467
+ let buffer = "";
468
+ while (true) {
469
+ const { value, done } = await reader.read();
470
+ if (done) {
471
+ break;
472
+ }
473
+ buffer += decoder.decode(value, { stream: true });
474
+ const lines = buffer.split(`
475
+ `);
476
+ buffer = lines.pop() || "";
477
+ for (const line of lines) {
478
+ const trimmed = line.trim();
479
+ if (!trimmed) {
480
+ continue;
481
+ }
482
+ const parsed = JSON.parse(trimmed);
483
+ yield parsed;
484
+ }
485
+ }
486
+ const remaining = buffer.trim();
487
+ if (remaining) {
488
+ yield JSON.parse(remaining);
489
+ }
490
+ }
343
491
  async markdown(request, config) {
492
+ const response = await this.markdownResult(request, config);
493
+ return response.markdown;
494
+ }
495
+ async markdownResult(request, config) {
344
496
  this.validateUrl(request.url);
497
+ const apiRequest = {
498
+ url: request.url,
499
+ ...request.f !== undefined || request.filter !== undefined ? { f: request.f ?? request.filter } : {},
500
+ ...request.q !== undefined || request.query !== undefined ? { q: request.q ?? request.query } : {},
501
+ ...request.c !== undefined || request.cache !== undefined ? { c: request.c ?? request.cache } : {}
502
+ };
345
503
  const response = await this.requestWithRetry("/md", {
346
504
  method: "POST",
347
- body: JSON.stringify(request),
505
+ body: JSON.stringify(apiRequest),
348
506
  ...config
349
507
  });
350
- return typeof response === "string" ? response : response.markdown;
508
+ if (typeof response === "string") {
509
+ return {
510
+ url: request.url,
511
+ markdown: response
512
+ };
513
+ }
514
+ return response;
351
515
  }
352
516
  async html(request, config) {
517
+ const response = await this.htmlResult(request, config);
518
+ return response.html;
519
+ }
520
+ async htmlResult(request, config) {
353
521
  this.validateUrl(request.url);
354
522
  const response = await this.requestWithRetry("/html", {
355
523
  method: "POST",
356
524
  body: JSON.stringify(request),
357
525
  ...config
358
526
  });
359
- return typeof response === "string" ? response : response.html;
527
+ if (typeof response === "string") {
528
+ return { html: response, url: request.url };
529
+ }
530
+ return response;
531
+ }
532
+ async screenshot(request, config) {
533
+ const response = await this.screenshotResult(request, config);
534
+ return response.screenshot;
535
+ }
536
+ async screenshotResult(request, config) {
537
+ this.validateUrl(request.url);
538
+ const response = await this.requestWithRetry("/screenshot", {
539
+ method: "POST",
540
+ body: JSON.stringify(request),
541
+ ...config
542
+ });
543
+ return response;
544
+ }
545
+ async pdf(request, config) {
546
+ const response = await this.pdfResult(request, config);
547
+ return response.pdf;
548
+ }
549
+ async pdfResult(request, config) {
550
+ this.validateUrl(request.url);
551
+ const response = await this.requestWithRetry("/pdf", {
552
+ method: "POST",
553
+ body: JSON.stringify(request),
554
+ ...config
555
+ });
556
+ return response;
360
557
  }
361
558
  async executeJs(request, config) {
362
559
  this.validateUrl(request.url);
@@ -419,12 +616,38 @@ class Crawl4AI {
419
616
  ...config
420
617
  });
421
618
  }
619
+ async mcpSchema(config) {
620
+ return this.request("/mcp/schema", {
621
+ method: "GET",
622
+ ...config
623
+ });
624
+ }
625
+ async mcpSse(config) {
626
+ return this.requestRaw("/mcp/sse", {
627
+ method: "GET",
628
+ ...config
629
+ });
630
+ }
422
631
  async getRoot(config) {
423
632
  return this.request("/", {
424
633
  method: "GET",
425
634
  ...config
426
635
  });
427
636
  }
637
+ async token(request, config) {
638
+ return this.request("/token", {
639
+ method: "POST",
640
+ body: JSON.stringify(request),
641
+ ...config
642
+ });
643
+ }
644
+ async configDump(request, config) {
645
+ return this.request("/config/dump", {
646
+ method: "POST",
647
+ body: JSON.stringify(request),
648
+ ...config
649
+ });
650
+ }
428
651
  async testConnection(options) {
429
652
  try {
430
653
  await this.health({ timeout: HEALTH_CHECK_TIMEOUT });
package/dist/sdk.d.ts CHANGED
@@ -2,7 +2,7 @@
2
2
  * Crawl4AI TypeScript SDK
3
3
  * A comprehensive SDK for interacting with Crawl4AI REST API
4
4
  */
5
- import type { AskRequest, AskResponse, Crawl4AIConfig, CrawlRequest, CrawlResult, ExecuteJsRequest, HealthResponse, HtmlRequest, MarkdownRequest, RequestConfig } from './types';
5
+ import type { AskRequest, AskResponse, ConfigDumpRequest, ConfigDumpResponse, Crawl4AIConfig, CrawlRequest, CrawlResponse, CrawlResult, CrawlStreamChunk, ExecuteJsRequest, HealthResponse, HtmlRequest, HtmlResponse, MarkdownRequest, MarkdownResponse, McpSchemaResponse, PdfRequest, PdfResponse, RequestConfig, ScreenshotRequest, ScreenshotResponse, TokenRequest, TokenResponse } from './types';
6
6
  /**
7
7
  * Crawl4AI SDK Client - Main class for interacting with Crawl4AI REST API
8
8
  *
@@ -59,12 +59,22 @@ export declare class Crawl4AI {
59
59
  * Normalize different API response formats to a consistent array
60
60
  */
61
61
  private normalizeArrayResponse;
62
+ /**
63
+ * Normalize crawl response to include metadata when available
64
+ */
65
+ private normalizeCrawlResponse;
62
66
  /**
63
67
  * Build query parameters from an object, filtering out undefined values
64
68
  */
65
69
  private buildQueryParams;
70
+ /**
71
+ * Parse response body based on content type
72
+ */
73
+ private parseResponseBody;
66
74
  private request;
75
+ private requestRaw;
67
76
  private requestWithRetry;
77
+ private requestRawWithRetry;
68
78
  /**
69
79
  * Main crawl endpoint - Extract content from one or more URLs
70
80
  *
@@ -86,13 +96,22 @@ export declare class Crawl4AI {
86
96
  * @throws {TimeoutError} If request times out
87
97
  */
88
98
  crawl(request: CrawlRequest, config?: RequestConfig): Promise<CrawlResult[]>;
99
+ /**
100
+ * Crawl endpoint returning full metadata payload
101
+ */
102
+ crawlWithMetadata(request: CrawlRequest, config?: RequestConfig): Promise<CrawlResponse>;
103
+ /**
104
+ * Stream crawl results via NDJSON
105
+ */
106
+ crawlStream(request: CrawlRequest, config?: RequestConfig): AsyncIterable<CrawlStreamChunk>;
89
107
  /**
90
108
  * Get markdown content from URL with optional filtering
91
109
  *
92
110
  * @param request - Markdown extraction configuration
93
111
  * @param request.url - URL to extract markdown from
94
- * @param request.f - Content filter: 'raw' | 'fit' | 'bm25' | 'llm'
95
- * @param request.q - Query for BM25/LLM filtering
112
+ * @param request.filter - Content filter: 'raw' | 'fit' | 'bm25' | 'llm'
113
+ * @param request.query - Query for BM25/LLM filtering
114
+ * @param request.cache - Cache mode (e.g., 'bypass')
96
115
  * @param config - Optional request configuration
97
116
  * @returns Promise resolving to markdown string
98
117
  *
@@ -100,16 +119,40 @@ export declare class Crawl4AI {
100
119
  * ```typescript
101
120
  * const markdown = await client.markdown({
102
121
  * url: 'https://example.com',
103
- * f: 'fit'
122
+ * filter: 'fit'
104
123
  * });
105
124
  * ```
106
125
  */
107
126
  markdown(request: MarkdownRequest, config?: RequestConfig): Promise<string>;
127
+ /**
128
+ * Get markdown content with response metadata
129
+ */
130
+ markdownResult(request: MarkdownRequest, config?: RequestConfig): Promise<MarkdownResponse>;
108
131
  /**
109
132
  * Get HTML content from URL
110
133
  * @param request HTML extraction options
111
134
  */
112
135
  html(request: HtmlRequest, config?: RequestConfig): Promise<string>;
136
+ /**
137
+ * Get HTML content with response metadata
138
+ */
139
+ htmlResult(request: HtmlRequest, config?: RequestConfig): Promise<HtmlResponse>;
140
+ /**
141
+ * Capture screenshot and return base64 or file path
142
+ */
143
+ screenshot(request: ScreenshotRequest, config?: RequestConfig): Promise<string>;
144
+ /**
145
+ * Capture screenshot with response metadata
146
+ */
147
+ screenshotResult(request: ScreenshotRequest, config?: RequestConfig): Promise<ScreenshotResponse>;
148
+ /**
149
+ * Generate PDF and return base64 or file path
150
+ */
151
+ pdf(request: PdfRequest, config?: RequestConfig): Promise<string>;
152
+ /**
153
+ * Generate PDF with response metadata
154
+ */
155
+ pdfResult(request: PdfRequest, config?: RequestConfig): Promise<PdfResponse>;
113
156
  /**
114
157
  * Execute JavaScript on webpage and return results
115
158
  *
@@ -166,10 +209,26 @@ export declare class Crawl4AI {
166
209
  * Get API schema
167
210
  */
168
211
  schema(config?: RequestConfig): Promise<unknown>;
212
+ /**
213
+ * Get MCP schema
214
+ */
215
+ mcpSchema(config?: RequestConfig): Promise<McpSchemaResponse>;
216
+ /**
217
+ * Connect to MCP Server-Sent Events stream
218
+ */
219
+ mcpSse(config?: RequestConfig): Promise<Response>;
169
220
  /**
170
221
  * Get root endpoint information
171
222
  */
172
223
  getRoot(config?: RequestConfig): Promise<string>;
224
+ /**
225
+ * Request an API token
226
+ */
227
+ token(request: TokenRequest, config?: RequestConfig): Promise<TokenResponse>;
228
+ /**
229
+ * Dump server configuration
230
+ */
231
+ configDump(request: ConfigDumpRequest, config?: RequestConfig): Promise<ConfigDumpResponse>;
173
232
  /**
174
233
  * Test connection to the Crawl4AI API server
175
234
  *
package/dist/types.d.ts CHANGED
@@ -14,7 +14,14 @@ export interface Viewport {
14
14
  export interface BrowserConfig {
15
15
  headless?: boolean;
16
16
  browser_type?: BrowserType;
17
+ browser_mode?: 'dedicated' | 'builtin' | 'custom' | 'docker';
18
+ use_managed_browser?: boolean;
19
+ cdp_url?: string;
20
+ debugging_port?: number;
21
+ host?: string;
22
+ proxy_config?: ProxyConfig;
17
23
  user_agent?: string;
24
+ user_agent_mode?: string;
18
25
  proxy?: string;
19
26
  page_timeout?: number;
20
27
  verbose?: boolean;
@@ -22,8 +29,13 @@ export interface BrowserConfig {
22
29
  magic?: boolean;
23
30
  override_navigator?: boolean;
24
31
  user_data_dir?: string;
25
- use_managed_browser?: boolean;
32
+ use_persistent_context?: boolean;
33
+ text_mode?: boolean;
34
+ light_mode?: boolean;
35
+ enable_stealth?: boolean;
26
36
  viewport?: Viewport;
37
+ viewport_width?: number;
38
+ viewport_height?: number;
27
39
  headers?: Record<string, string>;
28
40
  cookies?: Cookie[];
29
41
  extra_args?: string[];
@@ -31,6 +43,7 @@ export interface BrowserConfig {
31
43
  java_script_enabled?: boolean;
32
44
  accept_downloads?: boolean;
33
45
  downloads_path?: string;
46
+ extra?: Record<string, unknown>;
34
47
  }
35
48
  export interface Cookie {
36
49
  name: string;
@@ -77,6 +90,16 @@ export interface CosineExtractionParams {
77
90
  top_k?: number;
78
91
  model_name?: string;
79
92
  }
93
+ export interface ProxyConfig {
94
+ server: string;
95
+ username?: string;
96
+ password?: string;
97
+ }
98
+ export interface GeolocationConfig {
99
+ latitude: number;
100
+ longitude: number;
101
+ accuracy?: number;
102
+ }
80
103
  export type ExtractionStrategy = {
81
104
  type: 'json_css';
82
105
  params: JsonCssExtractionParams;
@@ -91,20 +114,28 @@ export interface CrawlerRunConfig {
91
114
  word_count_threshold?: number;
92
115
  extraction_strategy?: ExtractionStrategy;
93
116
  chunking_strategy?: ChunkingStrategy;
117
+ markdown_generator?: Record<string, unknown>;
94
118
  css_selector?: string;
95
119
  screenshot?: boolean;
96
120
  pdf?: boolean;
121
+ capture_mhtml?: boolean;
97
122
  cache_mode?: CacheMode;
98
123
  bypass_cache?: boolean;
99
124
  disable_cache?: boolean;
100
125
  no_cache_read?: boolean;
101
126
  no_cache_write?: boolean;
127
+ capture_network_requests?: boolean;
128
+ capture_console_messages?: boolean;
102
129
  log_console?: boolean;
103
130
  stream?: boolean;
104
131
  warmup?: boolean;
105
- js_code?: string[];
132
+ js_code?: string | string[];
133
+ c4a_script?: string | string[];
106
134
  js_only?: boolean;
107
135
  wait_for?: string;
136
+ wait_until?: string;
137
+ scan_full_page?: boolean;
138
+ scroll_delay?: number;
108
139
  page_timeout?: number;
109
140
  delay_before_return_html?: number;
110
141
  remove_overlay_elements?: boolean;
@@ -116,6 +147,15 @@ export interface CrawlerRunConfig {
116
147
  ignore_robots_txt?: boolean;
117
148
  anti_bot?: boolean;
118
149
  light_mode?: boolean;
150
+ locale?: string;
151
+ timezone_id?: string;
152
+ geolocation?: GeolocationConfig;
153
+ proxy_config?: ProxyConfig;
154
+ proxy_rotation_strategy?: string | Record<string, unknown>;
155
+ url_matcher?: string | string[] | Record<string, unknown>;
156
+ match_mode?: 'or' | 'and' | 'OR' | 'AND' | string;
157
+ scraping_strategy?: Record<string, unknown>;
158
+ verbose?: boolean;
119
159
  extra?: Record<string, unknown>;
120
160
  }
121
161
  export interface ChunkingStrategy {
@@ -133,9 +173,12 @@ export interface CrawlRequest {
133
173
  }
134
174
  export interface MarkdownRequest {
135
175
  url: string;
176
+ filter?: ContentFilter;
177
+ query?: string;
178
+ cache?: string;
136
179
  f?: ContentFilter;
137
- q?: string;
138
- c?: string;
180
+ q?: string | null;
181
+ c?: string | null;
139
182
  }
140
183
  export interface HtmlRequest {
141
184
  url: string;
@@ -165,6 +208,46 @@ export interface AskRequest {
165
208
  score_ratio?: number;
166
209
  max_results?: number;
167
210
  }
211
+ export interface MarkdownGenerationResult {
212
+ raw_markdown: string;
213
+ markdown_with_citations: string;
214
+ references_markdown: string;
215
+ fit_markdown?: string;
216
+ fit_html?: string;
217
+ }
218
+ export interface CrawlResponse {
219
+ success?: boolean;
220
+ results: CrawlResult[];
221
+ server_processing_time_s?: number;
222
+ server_memory_delta_mb?: number;
223
+ server_peak_memory_mb?: number;
224
+ }
225
+ export interface CrawlStreamStatus {
226
+ status: string;
227
+ [key: string]: unknown;
228
+ }
229
+ export type CrawlStreamChunk = CrawlResult | CrawlStreamStatus;
230
+ export interface MarkdownResponse {
231
+ url: string;
232
+ filter?: string | null;
233
+ query?: string | null;
234
+ cache?: string | null;
235
+ markdown: string;
236
+ success?: boolean;
237
+ }
238
+ export interface HtmlResponse {
239
+ html: string;
240
+ url?: string;
241
+ success?: boolean;
242
+ }
243
+ export interface ScreenshotResponse {
244
+ screenshot: string;
245
+ success?: boolean;
246
+ }
247
+ export interface PdfResponse {
248
+ pdf: string;
249
+ success?: boolean;
250
+ }
168
251
  export interface CrawlResult {
169
252
  url: string;
170
253
  html: string;
@@ -177,7 +260,7 @@ export interface CrawlResult {
177
260
  screenshot?: string;
178
261
  pdf?: string;
179
262
  mhtml?: string;
180
- markdown?: string;
263
+ markdown?: string | MarkdownGenerationResult;
181
264
  fit_markdown?: string;
182
265
  raw_markdown?: string;
183
266
  markdown_with_citations?: string;
@@ -197,6 +280,8 @@ export interface CrawlResult {
197
280
  crawl_depth?: number;
198
281
  text?: string;
199
282
  cookies?: Cookie[];
283
+ tables?: Array<Record<string, unknown>>;
284
+ server_memory_mb?: number;
200
285
  }
201
286
  export interface MediaInfo {
202
287
  images: MediaItem[];
@@ -207,8 +292,10 @@ export interface MediaItem {
207
292
  src: string;
208
293
  alt?: string;
209
294
  desc?: string;
295
+ description?: string;
210
296
  score?: number;
211
297
  type?: string;
298
+ mime_type?: string;
212
299
  }
213
300
  export interface LinksInfo {
214
301
  internal: LinkItem[];
@@ -218,6 +305,9 @@ export interface LinkItem {
218
305
  href: string;
219
306
  text?: string;
220
307
  title?: string;
308
+ base_domain?: string;
309
+ relevance_score?: number;
310
+ type?: string;
221
311
  }
222
312
  export interface PageMetadata {
223
313
  title?: string;
@@ -246,8 +336,12 @@ export interface HealthResponse {
246
336
  timestamp: number;
247
337
  version: string;
248
338
  }
339
+ export type ConfigDumpResponse = Record<string, unknown> | string;
340
+ export type McpSchemaResponse = Record<string, unknown>;
249
341
  export interface TokenResponse {
250
- token: string;
342
+ email: string;
343
+ access_token: string;
344
+ token_type: string;
251
345
  }
252
346
  export interface AskResponse {
253
347
  context: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawl4ai",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "TypeScript SDK for Crawl4AI REST API - Bun & Node.js compatible",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -9,6 +9,7 @@
9
9
  "build": "bun build ./src/index.ts --outdir ./dist --target node",
10
10
  "build:types": "tsc --declaration --emitDeclarationOnly --outDir dist",
11
11
  "test": "bun test",
12
+ "smoke": "bun run smoke-tests/run.ts",
12
13
  "lint": "biome check src --assist-enabled=true",
13
14
  "fix": "biome check --write src --assist-enabled=true",
14
15
  "format": "biome format --write src",