@crawlgate/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1299 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ AuthenticationError: () => AuthenticationError,
34
+ CrawlGateClient: () => CrawlGateClient,
35
+ CrawlGateError: () => CrawlGateError,
36
+ ExtractionError: () => ExtractionError,
37
+ JobTimeoutError: () => JobTimeoutError,
38
+ RateLimitError: () => RateLimitError,
39
+ ServiceUnavailableError: () => ServiceUnavailableError,
40
+ ValidationError: () => ValidationError,
41
+ default: () => CrawlGateClient
42
+ });
43
+ module.exports = __toCommonJS(index_exports);
44
+
45
+ // src/utils/httpClient.ts
46
+ var import_axios = __toESM(require("axios"), 1);
47
+
48
+ // src/errors.ts
49
+ var CrawlGateError = class _CrawlGateError extends Error {
50
+ /**
51
+ * HTTP status code (if applicable)
52
+ */
53
+ statusCode;
54
+ /**
55
+ * Error code for programmatic handling
56
+ */
57
+ code;
58
+ /**
59
+ * Additional error details
60
+ */
61
+ details;
62
+ constructor(message, statusCode, code, details) {
63
+ super(message);
64
+ this.name = "CrawlGateError";
65
+ this.statusCode = statusCode;
66
+ this.code = code;
67
+ this.details = details;
68
+ if (Error.captureStackTrace) {
69
+ Error.captureStackTrace(this, _CrawlGateError);
70
+ }
71
+ }
72
+ };
73
+ var AuthenticationError = class extends CrawlGateError {
74
+ constructor(message = "Invalid API Key") {
75
+ super(message, 401, "AUTHENTICATION_ERROR");
76
+ this.name = "AuthenticationError";
77
+ }
78
+ };
79
+ var ValidationError = class extends CrawlGateError {
80
+ constructor(message, details) {
81
+ super(message, 400, "VALIDATION_ERROR", details);
82
+ this.name = "ValidationError";
83
+ }
84
+ };
85
+ var JobTimeoutError = class extends CrawlGateError {
86
+ /**
87
+ * Job ID that timed out
88
+ */
89
+ jobId;
90
+ /**
91
+ * Timeout duration in seconds
92
+ */
93
+ timeoutSeconds;
94
+ constructor(jobId, timeoutSeconds) {
95
+ super(
96
+ `Crawl job ${jobId} did not complete within ${timeoutSeconds} seconds`,
97
+ void 0,
98
+ "JOB_TIMEOUT"
99
+ );
100
+ this.name = "JobTimeoutError";
101
+ this.jobId = jobId;
102
+ this.timeoutSeconds = timeoutSeconds;
103
+ }
104
+ };
105
+ var ServiceUnavailableError = class extends CrawlGateError {
106
+ constructor(message = "Service temporarily unavailable") {
107
+ super(message, 503, "SERVICE_UNAVAILABLE");
108
+ this.name = "ServiceUnavailableError";
109
+ }
110
+ };
111
+ var RateLimitError = class extends CrawlGateError {
112
+ /**
113
+ * Time to wait before retrying (in seconds)
114
+ */
115
+ retryAfter;
116
+ constructor(message = "Rate limit exceeded", retryAfter) {
117
+ super(message, 429, "RATE_LIMIT_EXCEEDED");
118
+ this.name = "RateLimitError";
119
+ this.retryAfter = retryAfter;
120
+ }
121
+ };
122
+ var ExtractionError = class extends CrawlGateError {
123
+ /**
124
+ * Provider that failed
125
+ */
126
+ provider;
127
+ constructor(message, provider) {
128
+ super(message, void 0, "EXTRACTION_ERROR");
129
+ this.name = "ExtractionError";
130
+ this.provider = provider;
131
+ }
132
+ };
133
+ function parseApiError(status, data) {
134
+ const message = data.error || data.message || "Unknown error";
135
+ switch (status) {
136
+ case 400:
137
+ throw new ValidationError(message, data.details);
138
+ case 401:
139
+ throw new AuthenticationError(message);
140
+ case 429:
141
+ throw new RateLimitError(message);
142
+ case 502:
143
+ case 503:
144
+ throw new ServiceUnavailableError(message);
145
+ default:
146
+ throw new CrawlGateError(message, status, void 0, data.details);
147
+ }
148
+ }
149
+
150
+ // src/utils/httpClient.ts
151
+ var HttpClient = class {
152
+ instance;
153
+ apiKey;
154
+ apiUrl;
155
+ maxRetries;
156
+ backoffFactor;
157
+ constructor(options) {
158
+ this.apiKey = options.apiKey;
159
+ this.apiUrl = options.apiUrl.replace(/\/$/, "");
160
+ this.maxRetries = options.maxRetries ?? 3;
161
+ this.backoffFactor = options.backoffFactor ?? 0.5;
162
+ this.instance = import_axios.default.create({
163
+ baseURL: this.apiUrl,
164
+ timeout: options.timeoutMs ?? 9e4,
165
+ headers: {
166
+ "Content-Type": "application/json",
167
+ "x-api-key": this.apiKey
168
+ }
169
+ });
170
+ }
171
+ /**
172
+ * Get the configured API URL
173
+ */
174
+ getApiUrl() {
175
+ return this.apiUrl;
176
+ }
177
+ /**
178
+ * Get the configured API key
179
+ */
180
+ getApiKey() {
181
+ return this.apiKey;
182
+ }
183
+ /**
184
+ * Sleep for specified seconds
185
+ */
186
+ sleep(seconds) {
187
+ return new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
188
+ }
189
+ /**
190
+ * Check if error is retryable
191
+ */
192
+ isRetryableError(status) {
193
+ return status === 502 || status === 503 || status === 429;
194
+ }
195
+ /**
196
+ * Make HTTP request with retry logic
197
+ */
198
+ async request(config) {
199
+ let lastError;
200
+ for (let attempt = 0; attempt < this.maxRetries; attempt++) {
201
+ try {
202
+ if (config.method && ["post", "put", "patch"].includes(config.method.toLowerCase())) {
203
+ const data = config.data ?? {};
204
+ config.data = { ...data, origin: "crawlgate-sdk" };
205
+ if (typeof data.timeout === "number") {
206
+ config.timeout = data.timeout + 5e3;
207
+ }
208
+ }
209
+ const response = await this.instance.request(config);
210
+ if (this.isRetryableError(response.status) && attempt < this.maxRetries - 1) {
211
+ await this.sleep(this.backoffFactor * Math.pow(2, attempt));
212
+ continue;
213
+ }
214
+ return response;
215
+ } catch (err) {
216
+ const axiosError = err;
217
+ lastError = err instanceof Error ? err : new Error(String(axiosError?.message));
218
+ const status = axiosError?.response?.status;
219
+ if (this.isRetryableError(status) && attempt < this.maxRetries - 1) {
220
+ await this.sleep(this.backoffFactor * Math.pow(2, attempt));
221
+ continue;
222
+ }
223
+ if (axiosError?.response) {
224
+ parseApiError(
225
+ axiosError.response.status,
226
+ axiosError.response.data
227
+ );
228
+ }
229
+ throw lastError;
230
+ }
231
+ }
232
+ throw lastError ?? new CrawlGateError("Unexpected HTTP client error");
233
+ }
234
+ /**
235
+ * Make POST request
236
+ */
237
+ async post(endpoint, body, headers) {
238
+ return this.request({
239
+ method: "post",
240
+ url: endpoint,
241
+ data: body,
242
+ headers
243
+ });
244
+ }
245
+ /**
246
+ * Make GET request
247
+ */
248
+ async get(endpoint, headers) {
249
+ return this.request({
250
+ method: "get",
251
+ url: endpoint,
252
+ headers
253
+ });
254
+ }
255
+ /**
256
+ * Make DELETE request
257
+ */
258
+ async delete(endpoint, headers) {
259
+ return this.request({
260
+ method: "delete",
261
+ url: endpoint,
262
+ headers
263
+ });
264
+ }
265
+ };
266
+
267
+ // src/methods/scrape.ts
268
+ var import_zod_to_json_schema = require("zod-to-json-schema");
269
+ function convertSchema(schema) {
270
+ if (schema && typeof schema === "object" && "_def" in schema) {
271
+ return (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
272
+ }
273
+ return schema;
274
+ }
275
+ function buildScrapeBody(url, options) {
276
+ const body = { url };
277
+ if (options?.engine) {
278
+ body.engine = options.engine;
279
+ }
280
+ if (options?.formats) {
281
+ body.formats = options.formats;
282
+ }
283
+ if (options?.onlyMainContent !== void 0) {
284
+ body.onlyMainContent = options.onlyMainContent;
285
+ }
286
+ if (options?.excludeTags) {
287
+ body.excludeTags = options.excludeTags;
288
+ }
289
+ if (options?.waitFor !== void 0) {
290
+ body.waitFor = options.waitFor;
291
+ }
292
+ if (options?.timeout !== void 0) {
293
+ body.timeout = options.timeout;
294
+ }
295
+ if (options?.proxy) {
296
+ body.proxy = options.proxy;
297
+ }
298
+ if (options?.projectId) {
299
+ body.project_id = options.projectId;
300
+ }
301
+ if (options?.extract) {
302
+ body.extract = {
303
+ schema: convertSchema(options.extract.schema),
304
+ systemPrompt: options.extract.systemPrompt,
305
+ provider: options.extract.provider,
306
+ enableFallback: options.extract.enableFallback
307
+ };
308
+ Object.keys(body.extract).forEach((key) => {
309
+ if (body.extract[key] === void 0) {
310
+ delete body.extract[key];
311
+ }
312
+ });
313
+ }
314
+ return body;
315
+ }
316
+ async function scrape(http, url, options) {
317
+ const body = buildScrapeBody(url, options);
318
+ const response = await http.post("/v1/scrape", body);
319
+ if (!response.data.success) {
320
+ throw new CrawlGateError(
321
+ response.data.error || "Scrape failed",
322
+ void 0,
323
+ "SCRAPE_ERROR"
324
+ );
325
+ }
326
+ if (!response.data.data) {
327
+ throw new CrawlGateError("No data returned from scrape", void 0, "NO_DATA");
328
+ }
329
+ const document = {
330
+ ...response.data.data
331
+ };
332
+ return document;
333
+ }
334
+
335
+ // src/methods/crawl.ts
336
+ function buildCrawlBody(url, options) {
337
+ const body = { url };
338
+ if (options?.engine) {
339
+ body.engine = options.engine;
340
+ }
341
+ if (options?.limit !== void 0) {
342
+ body.limit = options.limit;
343
+ }
344
+ if (options?.formats) {
345
+ body.formats = options.formats;
346
+ }
347
+ if (options?.onlyMainContent !== void 0) {
348
+ body.onlyMainContent = options.onlyMainContent;
349
+ }
350
+ if (options?.excludeTags) {
351
+ body.excludeTags = options.excludeTags;
352
+ }
353
+ if (options?.proxy) {
354
+ body.proxy = options.proxy;
355
+ }
356
+ if (options?.projectId) {
357
+ body.project_id = options.projectId;
358
+ }
359
+ return body;
360
+ }
361
+ async function startCrawl(http, url, options) {
362
+ const body = buildCrawlBody(url, options);
363
+ const response = await http.post("/v1/crawl", body);
364
+ if (!response.data.success && !response.data.id) {
365
+ throw new CrawlGateError(
366
+ "Failed to start crawl job",
367
+ void 0,
368
+ "CRAWL_START_ERROR"
369
+ );
370
+ }
371
+ return {
372
+ success: true,
373
+ id: response.data.id,
374
+ jobId: response.data.id,
375
+ status: response.data.status || "scraping",
376
+ engine: response.data.engine
377
+ };
378
+ }
379
+ async function getCrawlStatus(http, jobId) {
380
+ const response = await http.get(`/v1/crawl/${jobId}`);
381
+ return {
382
+ id: response.data.id || jobId,
383
+ status: response.data.status,
384
+ total: response.data.total || 0,
385
+ completed: response.data.completed || 0,
386
+ data: response.data.data || [],
387
+ engine: response.data.engine,
388
+ error: response.data.error
389
+ };
390
+ }
391
+ async function cancelCrawl(http, jobId) {
392
+ const response = await http.delete(
393
+ `/v1/crawl/${jobId}`
394
+ );
395
+ return response.data.success !== false;
396
+ }
397
+ function sleep(ms) {
398
+ return new Promise((resolve) => setTimeout(resolve, ms));
399
+ }
400
+ async function crawl(http, url, options) {
401
+ const pollInterval = options?.pollInterval ?? 2e3;
402
+ const timeout = options?.timeout ?? 300;
403
+ const { id: jobId } = await startCrawl(http, url, options);
404
+ const startTime = Date.now();
405
+ const timeoutMs = timeout * 1e3;
406
+ while (true) {
407
+ const status = await getCrawlStatus(http, jobId);
408
+ if (status.status === "completed") {
409
+ return status;
410
+ }
411
+ if (status.status === "failed") {
412
+ throw new CrawlGateError(
413
+ status.error || "Crawl job failed",
414
+ void 0,
415
+ "CRAWL_FAILED"
416
+ );
417
+ }
418
+ if (status.status === "cancelled") {
419
+ throw new CrawlGateError(
420
+ "Crawl job was cancelled",
421
+ void 0,
422
+ "CRAWL_CANCELLED"
423
+ );
424
+ }
425
+ if (Date.now() - startTime > timeoutMs) {
426
+ throw new JobTimeoutError(jobId, timeout);
427
+ }
428
+ await sleep(pollInterval);
429
+ }
430
+ }
431
+ async function getCrawlErrors(http, jobId) {
432
+ const response = await http.get(`/v1/crawl/${jobId}/errors`);
433
+ const payload = response.data.data ?? response.data;
434
+ return {
435
+ errors: (payload.errors || []).map((e) => ({
436
+ id: e.id || "",
437
+ timestamp: e.timestamp,
438
+ url: e.url || "",
439
+ code: e.code,
440
+ error: e.error || e.message || "Unknown error"
441
+ })),
442
+ robotsBlocked: payload.robotsBlocked || []
443
+ };
444
+ }
445
+
446
+ // src/methods/map.ts
447
+ function buildMapBody(url, options) {
448
+ const body = { url };
449
+ if (options?.engine) {
450
+ body.engine = options.engine;
451
+ }
452
+ if (options?.proxy) {
453
+ body.proxy = options.proxy;
454
+ }
455
+ if (options?.projectId) {
456
+ body.project_id = options.projectId;
457
+ }
458
+ return body;
459
+ }
460
+ async function map(http, url, options) {
461
+ const body = buildMapBody(url, options);
462
+ const response = await http.post("/v1/map", body);
463
+ if (!response.data.success) {
464
+ throw new CrawlGateError(
465
+ response.data.error || "Map failed",
466
+ void 0,
467
+ "MAP_ERROR"
468
+ );
469
+ }
470
+ return {
471
+ success: true,
472
+ links: response.data.links || [],
473
+ count: response.data.count || response.data.links?.length || 0,
474
+ engine: response.data.engine
475
+ };
476
+ }
477
+
478
+ // src/methods/search.ts
479
+ var import_zod_to_json_schema2 = require("zod-to-json-schema");
480
+ function convertSchema2(schema) {
481
+ if (schema && typeof schema === "object" && "_def" in schema) {
482
+ return (0, import_zod_to_json_schema2.zodToJsonSchema)(schema);
483
+ }
484
+ return schema;
485
+ }
486
+ function buildSearchBody(query, options) {
487
+ const body = { query };
488
+ if (options?.limit !== void 0) {
489
+ body.limit = options.limit;
490
+ }
491
+ if (options?.lang) {
492
+ body.lang = options.lang;
493
+ }
494
+ if (options?.country) {
495
+ body.country = options.country;
496
+ }
497
+ if (options?.engines) {
498
+ body.engines = options.engines;
499
+ }
500
+ if (options?.scrapeOptions) {
501
+ body.scrapeOptions = options.scrapeOptions;
502
+ }
503
+ if (options?.engine) {
504
+ body.engine = options.engine;
505
+ }
506
+ if (options?.projectId) {
507
+ body.project_id = options.projectId;
508
+ }
509
+ if (options?.extract) {
510
+ body.extract = {
511
+ schema: convertSchema2(options.extract.schema),
512
+ systemPrompt: options.extract.systemPrompt,
513
+ provider: options.extract.provider,
514
+ enableFallback: options.extract.enableFallback
515
+ };
516
+ Object.keys(body.extract).forEach((key) => {
517
+ if (body.extract[key] === void 0) {
518
+ delete body.extract[key];
519
+ }
520
+ });
521
+ }
522
+ return body;
523
+ }
524
+ async function search(http, query, options) {
525
+ const body = buildSearchBody(query, options);
526
+ const response = await http.post("/v1/search", body);
527
+ if (!response.data.success) {
528
+ throw new CrawlGateError(
529
+ response.data.error || "Search failed",
530
+ void 0,
531
+ "SEARCH_ERROR"
532
+ );
533
+ }
534
+ return {
535
+ success: true,
536
+ data: response.data.data || [],
537
+ query: response.data.query || query,
538
+ totalResults: response.data.totalResults,
539
+ searchTime: response.data.searchTime,
540
+ extract: response.data.extract
541
+ };
542
+ }
543
+
544
+ // src/methods/batch.ts
545
+ function buildBatchBody(urls, options) {
546
+ const body = { urls };
547
+ if (options?.options) {
548
+ const scrapeOpts = options.options;
549
+ if (scrapeOpts.engine) body.engine = scrapeOpts.engine;
550
+ if (scrapeOpts.formats) body.formats = scrapeOpts.formats;
551
+ if (scrapeOpts.onlyMainContent !== void 0) body.onlyMainContent = scrapeOpts.onlyMainContent;
552
+ if (scrapeOpts.excludeTags) body.excludeTags = scrapeOpts.excludeTags;
553
+ if (scrapeOpts.waitFor !== void 0) body.waitFor = scrapeOpts.waitFor;
554
+ if (scrapeOpts.timeout !== void 0) body.timeout = scrapeOpts.timeout;
555
+ if (scrapeOpts.proxy) body.proxy = scrapeOpts.proxy;
556
+ }
557
+ if (options?.webhook != null) {
558
+ body.webhook = options.webhook;
559
+ }
560
+ if (options?.appendToId != null) {
561
+ body.appendToId = options.appendToId;
562
+ }
563
+ if (options?.ignoreInvalidURLs != null) {
564
+ body.ignoreInvalidURLs = options.ignoreInvalidURLs;
565
+ }
566
+ if (options?.maxConcurrency != null) {
567
+ body.maxConcurrency = options.maxConcurrency;
568
+ }
569
+ if (options?.projectId) {
570
+ body.project_id = options.projectId;
571
+ }
572
+ return body;
573
+ }
574
+ async function startBatchScrape(http, urls, options) {
575
+ if (!Array.isArray(urls) || urls.length === 0) {
576
+ throw new CrawlGateError("URLs array cannot be empty", 400, "VALIDATION_ERROR");
577
+ }
578
+ const body = buildBatchBody(urls, options);
579
+ const headers = {};
580
+ if (options?.idempotencyKey) {
581
+ headers["Idempotency-Key"] = options.idempotencyKey;
582
+ }
583
+ const response = await http.post(
584
+ "/v1/batch/scrape",
585
+ body,
586
+ Object.keys(headers).length > 0 ? headers : void 0
587
+ );
588
+ if (!response.data.success && !response.data.id) {
589
+ throw new CrawlGateError(
590
+ response.data.error || "Failed to start batch scrape job",
591
+ void 0,
592
+ "BATCH_START_ERROR"
593
+ );
594
+ }
595
+ return {
596
+ success: true,
597
+ id: response.data.id,
598
+ url: response.data.url,
599
+ invalidURLs: response.data.invalidURLs
600
+ };
601
+ }
602
+ async function getBatchScrapeStatus(http, jobId) {
603
+ const response = await http.get(
604
+ `/v1/batch/scrape/${jobId}`
605
+ );
606
+ return {
607
+ id: response.data.id || jobId,
608
+ status: response.data.status,
609
+ total: response.data.total || 0,
610
+ completed: response.data.completed || 0,
611
+ creditsUsed: response.data.creditsUsed,
612
+ expiresAt: response.data.expiresAt,
613
+ next: response.data.next ?? null,
614
+ data: response.data.data || [],
615
+ error: response.data.error
616
+ };
617
+ }
618
+ async function cancelBatchScrape(http, jobId) {
619
+ const response = await http.delete(
620
+ `/v1/batch/scrape/${jobId}`
621
+ );
622
+ return response.data.status === "cancelled" || response.data.success !== false;
623
+ }
624
+ async function getBatchScrapeErrors(http, jobId) {
625
+ const response = await http.get(`/v1/batch/scrape/${jobId}/errors`);
626
+ const payload = response.data.data ?? response.data;
627
+ return {
628
+ errors: (payload.errors || []).map((e) => ({
629
+ id: e.id || "",
630
+ timestamp: e.timestamp,
631
+ url: e.url || "",
632
+ code: e.code,
633
+ error: e.error || e.message || "Unknown error"
634
+ })),
635
+ robotsBlocked: payload.robotsBlocked || []
636
+ };
637
+ }
638
+ function sleep2(ms) {
639
+ return new Promise((resolve) => setTimeout(resolve, ms));
640
+ }
641
+ async function waitForBatchCompletion(http, jobId, pollInterval = 2e3, timeout) {
642
+ const startTime = Date.now();
643
+ const timeoutMs = timeout ? timeout * 1e3 : void 0;
644
+ while (true) {
645
+ const status = await getBatchScrapeStatus(http, jobId);
646
+ if (status.status === "completed") {
647
+ return status;
648
+ }
649
+ if (status.status === "failed") {
650
+ throw new CrawlGateError(
651
+ status.error || "Batch scrape job failed",
652
+ void 0,
653
+ "BATCH_FAILED"
654
+ );
655
+ }
656
+ if (status.status === "cancelled") {
657
+ throw new CrawlGateError(
658
+ "Batch scrape job was cancelled",
659
+ void 0,
660
+ "BATCH_CANCELLED"
661
+ );
662
+ }
663
+ if (timeoutMs && Date.now() - startTime > timeoutMs) {
664
+ throw new JobTimeoutError(jobId, timeout);
665
+ }
666
+ await sleep2(Math.max(1e3, pollInterval));
667
+ }
668
+ }
669
+ async function batchScrape(http, urls, options) {
670
+ const pollInterval = options?.pollInterval ?? 2e3;
671
+ const timeout = options?.timeout;
672
+ const { id: jobId } = await startBatchScrape(http, urls, options);
673
+ return waitForBatchCompletion(http, jobId, pollInterval, timeout);
674
+ }
675
+
676
+ // src/methods/extract.ts
677
+ var import_zod_to_json_schema3 = require("zod-to-json-schema");
678
+ function isZodSchema(value) {
679
+ return value !== null && typeof value === "object" && "_def" in value && (typeof value.safeParse === "function" || typeof value.parse === "function");
680
+ }
681
+ function convertSchema3(schema) {
682
+ if (isZodSchema(schema)) {
683
+ return (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
684
+ }
685
+ return schema;
686
+ }
687
+ function buildExtractBody(options) {
688
+ const body = {};
689
+ if (options.urls) {
690
+ body.urls = options.urls;
691
+ }
692
+ if (options.prompt != null) {
693
+ body.prompt = options.prompt;
694
+ }
695
+ if (options.schema != null) {
696
+ body.schema = convertSchema3(options.schema);
697
+ }
698
+ if (options.systemPrompt != null) {
699
+ body.systemPrompt = options.systemPrompt;
700
+ }
701
+ if (options.allowExternalLinks != null) {
702
+ body.allowExternalLinks = options.allowExternalLinks;
703
+ }
704
+ if (options.enableWebSearch != null) {
705
+ body.enableWebSearch = options.enableWebSearch;
706
+ }
707
+ if (options.showSources != null) {
708
+ body.showSources = options.showSources;
709
+ }
710
+ if (options.ignoreInvalidURLs != null) {
711
+ body.ignoreInvalidURLs = options.ignoreInvalidURLs;
712
+ }
713
+ if (options.provider) {
714
+ body.provider = options.provider;
715
+ }
716
+ if (options.projectId) {
717
+ body.project_id = options.projectId;
718
+ }
719
+ if (options.scrapeOptions) {
720
+ body.scrapeOptions = options.scrapeOptions;
721
+ }
722
+ return body;
723
+ }
724
+ async function startExtract(http, options) {
725
+ const body = buildExtractBody(options);
726
+ const response = await http.post("/v1/extract", body);
727
+ if (response.data.success === false && response.data.error) {
728
+ throw new CrawlGateError(
729
+ response.data.error,
730
+ void 0,
731
+ "EXTRACT_ERROR"
732
+ );
733
+ }
734
+ return response.data;
735
+ }
736
+ async function getExtractStatus(http, jobId) {
737
+ const response = await http.get(`/v1/extract/${jobId}`);
738
+ if (response.data.success === false && response.data.error) {
739
+ throw new CrawlGateError(
740
+ response.data.error,
741
+ void 0,
742
+ "EXTRACT_STATUS_ERROR"
743
+ );
744
+ }
745
+ return response.data;
746
+ }
747
+ function sleep3(ms) {
748
+ return new Promise((resolve) => setTimeout(resolve, ms));
749
+ }
750
+ async function waitForExtractCompletion(http, jobId, pollInterval = 2e3, timeout) {
751
+ const startTime = Date.now();
752
+ const timeoutMs = timeout ? timeout * 1e3 : void 0;
753
+ while (true) {
754
+ const status = await getExtractStatus(http, jobId);
755
+ if (status.status === "completed") {
756
+ return status;
757
+ }
758
+ if (status.status === "failed") {
759
+ throw new CrawlGateError(
760
+ status.error || "Extract job failed",
761
+ void 0,
762
+ "EXTRACT_FAILED"
763
+ );
764
+ }
765
+ if (status.status === "cancelled") {
766
+ throw new CrawlGateError(
767
+ "Extract job was cancelled",
768
+ void 0,
769
+ "EXTRACT_CANCELLED"
770
+ );
771
+ }
772
+ if (timeoutMs && Date.now() - startTime > timeoutMs) {
773
+ throw new JobTimeoutError(jobId, timeout);
774
+ }
775
+ await sleep3(Math.max(1e3, pollInterval));
776
+ }
777
+ }
778
+ async function extract(http, options) {
779
+ const pollInterval = options.pollInterval ?? 2e3;
780
+ const timeout = options.timeout;
781
+ const started = await startExtract(http, options);
782
+ if (!started.id) {
783
+ return started;
784
+ }
785
+ if (started.status === "completed") {
786
+ return started;
787
+ }
788
+ return waitForExtractCompletion(http, started.id, pollInterval, timeout);
789
+ }
790
+
791
+ // src/methods/usage.ts
792
+ async function getConcurrency(http) {
793
+ const response = await http.get("/v1/concurrency");
794
+ return {
795
+ concurrency: response.data.concurrency ?? 0,
796
+ maxConcurrency: response.data.maxConcurrency ?? 0
797
+ };
798
+ }
799
+ async function getCreditUsage(http) {
800
+ const response = await http.get("/v1/credits");
801
+ return {
802
+ remainingCredits: response.data.remainingCredits ?? 0,
803
+ planCredits: response.data.planCredits,
804
+ billingPeriodStart: response.data.billingPeriodStart,
805
+ billingPeriodEnd: response.data.billingPeriodEnd
806
+ };
807
+ }
808
+ async function getTokenUsage(http) {
809
+ const response = await http.get("/v1/tokens");
810
+ return {
811
+ remainingTokens: response.data.remainingTokens ?? 0,
812
+ planTokens: response.data.planTokens,
813
+ billingPeriodStart: response.data.billingPeriodStart,
814
+ billingPeriodEnd: response.data.billingPeriodEnd
815
+ };
816
+ }
817
+ async function getQueueStatus(http) {
818
+ const response = await http.get("/v1/queue");
819
+ return {
820
+ success: response.data.success ?? true,
821
+ jobsInQueue: response.data.jobsInQueue ?? 0,
822
+ activeJobsInQueue: response.data.activeJobsInQueue ?? 0,
823
+ waitingJobsInQueue: response.data.waitingJobsInQueue ?? 0,
824
+ maxConcurrency: response.data.maxConcurrency ?? 0,
825
+ mostRecentSuccess: response.data.mostRecentSuccess
826
+ };
827
+ }
828
+
829
+ // src/client.ts
830
+ var CrawlGateClient = class {
831
+ http;
832
+ /**
833
+ * Create a new CrawlGate client
834
+ *
835
+ * @param options - Client configuration options
836
+ * @throws {CrawlGateError} If API key is not provided
837
+ */
838
+ constructor(options = {}) {
839
+ const apiKey = options.apiKey ?? process.env.CRAWLGATE_API_KEY ?? "";
840
+ const apiUrl = (options.apiUrl ?? process.env.CRAWLGATE_API_URL ?? "https://api.crawlgate.io").replace(/\/$/, "");
841
+ if (!apiKey) {
842
+ throw new CrawlGateError(
843
+ "API key is required. Set CRAWLGATE_API_KEY env variable or pass apiKey option.",
844
+ void 0,
845
+ "MISSING_API_KEY"
846
+ );
847
+ }
848
+ this.http = new HttpClient({
849
+ apiKey,
850
+ apiUrl,
851
+ timeoutMs: options.timeoutMs,
852
+ maxRetries: options.maxRetries,
853
+ backoffFactor: options.backoffFactor
854
+ });
855
+ }
856
+ // ==========================================================================
857
+ // Scrape Methods
858
+ // ==========================================================================
859
+ /**
860
+ * Scrape a single URL
861
+ *
862
+ * @param url - URL to scrape
863
+ * @param options - Scrape options
864
+ * @returns Scraped document with requested formats
865
+ *
866
+ * @example
867
+ * ```typescript
868
+ * const doc = await client.scrape('https://example.com', {
869
+ * engine: 'smart',
870
+ * formats: ['markdown', 'html'],
871
+ * onlyMainContent: true
872
+ * });
873
+ * console.log(doc.markdown);
874
+ * ```
875
+ *
876
+ * @example With LLM extraction
877
+ * ```typescript
878
+ * import { z } from 'zod';
879
+ *
880
+ * const schema = z.object({
881
+ * title: z.string(),
882
+ * price: z.number(),
883
+ * inStock: z.boolean()
884
+ * });
885
+ *
886
+ * const doc = await client.scrape('https://example.com/product', {
887
+ * engine: 'smart',
888
+ * extract: {
889
+ * schema,
890
+ * systemPrompt: 'Extract product details',
891
+ * provider: 'openai'
892
+ * }
893
+ * });
894
+ * console.log(doc.extract?.data);
895
+ * ```
896
+ */
897
+ async scrape(url, options) {
898
+ return scrape(this.http, url, options);
899
+ }
900
+ // ==========================================================================
901
+ // Batch Scrape Methods
902
+ // ==========================================================================
903
+ /**
904
+ * Start a batch scrape job (async)
905
+ *
906
+ * @param urls - Array of URLs to scrape
907
+ * @param options - Batch scrape options
908
+ * @returns Batch job ID and initial status
909
+ *
910
+ * @example
911
+ * ```typescript
912
+ * const { id } = await client.startBatchScrape(
913
+ * ['https://a.com', 'https://b.com', 'https://c.com'],
914
+ * { options: { formats: ['markdown'] } }
915
+ * );
916
+ *
917
+ * // Poll manually
918
+ * let status = await client.getBatchScrapeStatus(id);
919
+ * while (status.status === 'scraping') {
920
+ * await new Promise(r => setTimeout(r, 2000));
921
+ * status = await client.getBatchScrapeStatus(id);
922
+ * }
923
+ * ```
924
+ */
925
+ async startBatchScrape(urls, options) {
926
+ return startBatchScrape(this.http, urls, options);
927
+ }
928
+ /**
929
+ * Get batch scrape job status and data
930
+ *
931
+ * @param jobId - Batch job ID
932
+ * @returns Current job status and scraped data
933
+ */
934
+ async getBatchScrapeStatus(jobId) {
935
+ return getBatchScrapeStatus(this.http, jobId);
936
+ }
937
+ /**
938
+ * Cancel a batch scrape job
939
+ *
940
+ * @param jobId - Batch job ID
941
+ * @returns True if cancelled successfully
942
+ */
943
+ async cancelBatchScrape(jobId) {
944
+ return cancelBatchScrape(this.http, jobId);
945
+ }
946
+ /**
947
+ * Get batch scrape job errors
948
+ *
949
+ * @param jobId - Batch job ID
950
+ * @returns Errors and robots.txt blocked URLs
951
+ */
952
+ async getBatchScrapeErrors(jobId) {
953
+ return getBatchScrapeErrors(this.http, jobId);
954
+ }
955
+ /**
956
+ * Batch scrape multiple URLs and wait for completion
957
+ *
958
+ * @param urls - Array of URLs to scrape
959
+ * @param options - Batch options including pollInterval and timeout
960
+ * @returns Final job with all scraped data
961
+ *
962
+ * @example
963
+ * ```typescript
964
+ * const job = await client.batchScrape(
965
+ * ['https://a.com', 'https://b.com', 'https://c.com'],
966
+ * {
967
+ * options: { formats: ['markdown'], engine: 'smart' },
968
+ * pollInterval: 2000,
969
+ * timeout: 300
970
+ * }
971
+ * );
972
+ *
973
+ * console.log(`Scraped ${job.completed} URLs`);
974
+ * job.data.forEach(doc => console.log(doc.url, doc.markdown?.length));
975
+ * ```
976
+ */
977
+ async batchScrape(urls, options) {
978
+ return batchScrape(this.http, urls, options);
979
+ }
980
+ // ==========================================================================
981
+ // Crawl Methods
982
+ // ==========================================================================
983
+ /**
984
+ * Start a crawl job (async)
985
+ *
986
+ * Use this method when you want to start a crawl and manage polling yourself.
987
+ * For automatic polling, use the `crawl()` method instead.
988
+ *
989
+ * @param url - Root URL to crawl
990
+ * @param options - Crawl options
991
+ * @returns Crawl job ID and initial status
992
+ *
993
+ * @example
994
+ * ```typescript
995
+ * const { id } = await client.startCrawl('https://example.com', {
996
+ * limit: 10,
997
+ * engine: 'dynamic'
998
+ * });
999
+ *
1000
+ * // Poll for status manually
1001
+ * let status = await client.getCrawlStatus(id);
1002
+ * while (status.status === 'scraping') {
1003
+ * await new Promise(r => setTimeout(r, 2000));
1004
+ * status = await client.getCrawlStatus(id);
1005
+ * }
1006
+ * ```
1007
+ */
1008
+ async startCrawl(url, options) {
1009
+ return startCrawl(this.http, url, options);
1010
+ }
1011
+ /**
1012
+ * Get crawl job status and data
1013
+ *
1014
+ * @param jobId - Crawl job ID
1015
+ * @returns Current job status and scraped data
1016
+ */
1017
+ async getCrawlStatus(jobId) {
1018
+ return getCrawlStatus(this.http, jobId);
1019
+ }
1020
+ /**
1021
+ * Cancel a crawl job
1022
+ *
1023
+ * @param jobId - Crawl job ID
1024
+ * @returns True if cancelled successfully
1025
+ */
1026
+ async cancelCrawl(jobId) {
1027
+ return cancelCrawl(this.http, jobId);
1028
+ }
1029
+ /**
1030
+ * Get crawl job errors and robots.txt blocks
1031
+ *
1032
+ * @param jobId - Crawl job ID
1033
+ * @returns Errors and robots.txt blocked URLs
1034
+ */
1035
+ async getCrawlErrors(jobId) {
1036
+ return getCrawlErrors(this.http, jobId);
1037
+ }
1038
+ /**
1039
+ * Crawl a website and wait for completion
1040
+ *
1041
+ * This method starts a crawl job and automatically polls until completion.
1042
+ *
1043
+ * @param url - Root URL to crawl
1044
+ * @param options - Crawl options including pollInterval and timeout
1045
+ * @returns Final crawl job with all scraped data
1046
+ *
1047
+ * @example
1048
+ * ```typescript
1049
+ * const job = await client.crawl('https://example.com', {
1050
+ * limit: 10,
1051
+ * engine: 'dynamic',
1052
+ * formats: ['markdown'],
1053
+ * pollInterval: 2000, // Poll every 2 seconds
1054
+ * timeout: 300 // 5 minute timeout
1055
+ * });
1056
+ *
1057
+ * console.log(`Crawled ${job.completed} pages`);
1058
+ * job.data.forEach(doc => console.log(doc.url));
1059
+ * ```
1060
+ */
1061
+ async crawl(url, options) {
1062
+ return crawl(this.http, url, options);
1063
+ }
1064
+ // ==========================================================================
1065
+ // Extract Methods (Standalone LLM Extraction)
1066
+ // ==========================================================================
1067
+ /**
1068
+ * Start an extract job (async)
1069
+ *
1070
+ * @param options - Extract request options
1071
+ * @returns Extract job ID or immediate result
1072
+ *
1073
+ * @example
1074
+ * ```typescript
1075
+ * const { id } = await client.startExtract({
1076
+ * urls: ['https://example.com/product'],
1077
+ * schema: { name: 'string', price: 'number' },
1078
+ * provider: 'openai'
1079
+ * });
1080
+ *
1081
+ * // Poll manually
1082
+ * let status = await client.getExtractStatus(id);
1083
+ * while (status.status === 'processing') {
1084
+ * await new Promise(r => setTimeout(r, 2000));
1085
+ * status = await client.getExtractStatus(id);
1086
+ * }
1087
+ * console.log(status.data);
1088
+ * ```
1089
+ */
1090
+ async startExtract(options) {
1091
+ return startExtract(this.http, options);
1092
+ }
1093
+ /**
1094
+ * Get extract job status and data
1095
+ *
1096
+ * @param jobId - Extract job ID
1097
+ * @returns Current job status and extracted data
1098
+ */
1099
+ async getExtractStatus(jobId) {
1100
+ return getExtractStatus(this.http, jobId);
1101
+ }
1102
+ /**
1103
+ * Extract structured data from URLs using LLM and wait for completion
1104
+ *
1105
+ * @param options - Extract options including schema, prompt, and timeout
1106
+ * @returns Final extract result with structured data
1107
+ *
1108
+ * @example With Zod schema
1109
+ * ```typescript
1110
+ * import { z } from 'zod';
1111
+ *
1112
+ * const result = await client.extract({
1113
+ * urls: ['https://example.com/product'],
1114
+ * schema: z.object({
1115
+ * name: z.string(),
1116
+ * price: z.number(),
1117
+ * inStock: z.boolean(),
1118
+ * features: z.array(z.string())
1119
+ * }),
1120
+ * systemPrompt: 'Extract product information from the page',
1121
+ * provider: 'openai',
1122
+ * timeout: 60
1123
+ * });
1124
+ *
1125
+ * console.log(result.data);
1126
+ * ```
1127
+ *
1128
+ * @example With natural language prompt
1129
+ * ```typescript
1130
+ * const result = await client.extract({
1131
+ * urls: ['https://example.com/about'],
1132
+ * prompt: 'Extract the company name, founding year, and list of team members',
1133
+ * enableWebSearch: true
1134
+ * });
1135
+ *
1136
+ * console.log(result.data);
1137
+ * ```
1138
+ */
1139
+ async extract(options) {
1140
+ return extract(this.http, options);
1141
+ }
1142
+ // ==========================================================================
1143
+ // Map Methods
1144
+ // ==========================================================================
1145
+ /**
1146
+ * Map a website to discover all URLs
1147
+ *
1148
+ * @param url - Root URL to map
1149
+ * @param options - Map options
1150
+ * @returns List of discovered URLs
1151
+ *
1152
+ * @example
1153
+ * ```typescript
1154
+ * const result = await client.map('https://example.com', {
1155
+ * engine: 'dynamic'
1156
+ * });
1157
+ *
1158
+ * console.log(`Found ${result.count} URLs:`);
1159
+ * result.links.forEach(url => console.log(url));
1160
+ * ```
1161
+ */
1162
+ async map(url, options) {
1163
+ return map(this.http, url, options);
1164
+ }
1165
+ // ==========================================================================
1166
+ // Search Methods
1167
+ // ==========================================================================
1168
+ /**
1169
+ * Search the web and optionally scrape results
1170
+ *
1171
+ * @param query - Search query
1172
+ * @param options - Search options
1173
+ * @returns Search results with optional scraped content
1174
+ *
1175
+ * @example Basic search
1176
+ * ```typescript
1177
+ * const results = await client.search('best restaurants in NYC', {
1178
+ * limit: 10,
1179
+ * lang: 'en',
1180
+ * country: 'us'
1181
+ * });
1182
+ *
1183
+ * results.data.forEach(r => {
1184
+ * console.log(`${r.title}: ${r.url}`);
1185
+ * });
1186
+ * ```
1187
+ *
1188
+ * @example Search with scraping
1189
+ * ```typescript
1190
+ * const results = await client.search('best laptops 2024', {
1191
+ * limit: 5,
1192
+ * scrapeOptions: {
1193
+ * formats: ['markdown']
1194
+ * },
1195
+ * engine: 'smart'
1196
+ * });
1197
+ *
1198
+ * results.data.forEach(r => {
1199
+ * console.log(r.title);
1200
+ * console.log(r.markdown?.substring(0, 200));
1201
+ * });
1202
+ * ```
1203
+ *
1204
+ * @example Search with LLM extraction
1205
+ * ```typescript
1206
+ * import { z } from 'zod';
1207
+ *
1208
+ * const results = await client.search('iPhone 15 Pro reviews', {
1209
+ * limit: 5,
1210
+ * scrapeOptions: { formats: ['markdown'] },
1211
+ * extract: {
1212
+ * schema: z.object({
1213
+ * pros: z.array(z.string()),
1214
+ * cons: z.array(z.string()),
1215
+ * rating: z.number()
1216
+ * }),
1217
+ * systemPrompt: 'Extract review summary from the content'
1218
+ * }
1219
+ * });
1220
+ *
1221
+ * console.log(results.extract?.data);
1222
+ * ```
1223
+ */
1224
+ async search(query, options) {
1225
+ return search(this.http, query, options);
1226
+ }
1227
+ // ==========================================================================
1228
+ // Usage & Monitoring Methods
1229
+ // ==========================================================================
1230
+ /**
1231
+ * Get current concurrency usage
1232
+ *
1233
+ * @returns Current and max concurrency
1234
+ *
1235
+ * @example
1236
+ * ```typescript
1237
+ * const { concurrency, maxConcurrency } = await client.getConcurrency();
1238
+ * console.log(`Using ${concurrency}/${maxConcurrency} concurrent requests`);
1239
+ * ```
1240
+ */
1241
+ async getConcurrency() {
1242
+ return getConcurrency(this.http);
1243
+ }
1244
+ /**
1245
+ * Get current credit usage
1246
+ *
1247
+ * @returns Credit usage information
1248
+ *
1249
+ * @example
1250
+ * ```typescript
1251
+ * const credits = await client.getCreditUsage();
1252
+ * console.log(`Remaining credits: ${credits.remainingCredits}`);
1253
+ * ```
1254
+ */
1255
+ async getCreditUsage() {
1256
+ return getCreditUsage(this.http);
1257
+ }
1258
+ /**
1259
+ * Get current token usage (for LLM extraction)
1260
+ *
1261
+ * @returns Token usage information
1262
+ *
1263
+ * @example
1264
+ * ```typescript
1265
+ * const tokens = await client.getTokenUsage();
1266
+ * console.log(`Remaining tokens: ${tokens.remainingTokens}`);
1267
+ * ```
1268
+ */
1269
+ async getTokenUsage() {
1270
+ return getTokenUsage(this.http);
1271
+ }
1272
+ /**
1273
+ * Get queue status information
1274
+ *
1275
+ * @returns Queue status metrics
1276
+ *
1277
+ * @example
1278
+ * ```typescript
1279
+ * const queue = await client.getQueueStatus();
1280
+ * console.log(`Jobs in queue: ${queue.jobsInQueue}`);
1281
+ * console.log(`Active: ${queue.activeJobsInQueue}, Waiting: ${queue.waitingJobsInQueue}`);
1282
+ * ```
1283
+ */
1284
+ async getQueueStatus() {
1285
+ return getQueueStatus(this.http);
1286
+ }
1287
+ };
1288
+ // Annotate the CommonJS export names for ESM import in node:
1289
+ 0 && (module.exports = {
1290
+ AuthenticationError,
1291
+ CrawlGateClient,
1292
+ CrawlGateError,
1293
+ ExtractionError,
1294
+ JobTimeoutError,
1295
+ RateLimitError,
1296
+ ServiceUnavailableError,
1297
+ ValidationError
1298
+ });
1299
+ //# sourceMappingURL=index.cjs.map