@crawlgate/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1255 @@
1
+ // src/utils/httpClient.ts
2
+ import axios from "axios";
3
+
4
+ // src/errors.ts
5
+ var CrawlGateError = class _CrawlGateError extends Error {
6
+ /**
7
+ * HTTP status code (if applicable)
8
+ */
9
+ statusCode;
10
+ /**
11
+ * Error code for programmatic handling
12
+ */
13
+ code;
14
+ /**
15
+ * Additional error details
16
+ */
17
+ details;
18
+ constructor(message, statusCode, code, details) {
19
+ super(message);
20
+ this.name = "CrawlGateError";
21
+ this.statusCode = statusCode;
22
+ this.code = code;
23
+ this.details = details;
24
+ if (Error.captureStackTrace) {
25
+ Error.captureStackTrace(this, _CrawlGateError);
26
+ }
27
+ }
28
+ };
29
+ var AuthenticationError = class extends CrawlGateError {
30
+ constructor(message = "Invalid API Key") {
31
+ super(message, 401, "AUTHENTICATION_ERROR");
32
+ this.name = "AuthenticationError";
33
+ }
34
+ };
35
+ var ValidationError = class extends CrawlGateError {
36
+ constructor(message, details) {
37
+ super(message, 400, "VALIDATION_ERROR", details);
38
+ this.name = "ValidationError";
39
+ }
40
+ };
41
+ var JobTimeoutError = class extends CrawlGateError {
42
+ /**
43
+ * Job ID that timed out
44
+ */
45
+ jobId;
46
+ /**
47
+ * Timeout duration in seconds
48
+ */
49
+ timeoutSeconds;
50
+ constructor(jobId, timeoutSeconds) {
51
+ super(
52
+ `Crawl job ${jobId} did not complete within ${timeoutSeconds} seconds`,
53
+ void 0,
54
+ "JOB_TIMEOUT"
55
+ );
56
+ this.name = "JobTimeoutError";
57
+ this.jobId = jobId;
58
+ this.timeoutSeconds = timeoutSeconds;
59
+ }
60
+ };
61
+ var ServiceUnavailableError = class extends CrawlGateError {
62
+ constructor(message = "Service temporarily unavailable") {
63
+ super(message, 503, "SERVICE_UNAVAILABLE");
64
+ this.name = "ServiceUnavailableError";
65
+ }
66
+ };
67
+ var RateLimitError = class extends CrawlGateError {
68
+ /**
69
+ * Time to wait before retrying (in seconds)
70
+ */
71
+ retryAfter;
72
+ constructor(message = "Rate limit exceeded", retryAfter) {
73
+ super(message, 429, "RATE_LIMIT_EXCEEDED");
74
+ this.name = "RateLimitError";
75
+ this.retryAfter = retryAfter;
76
+ }
77
+ };
78
+ var ExtractionError = class extends CrawlGateError {
79
+ /**
80
+ * Provider that failed
81
+ */
82
+ provider;
83
+ constructor(message, provider) {
84
+ super(message, void 0, "EXTRACTION_ERROR");
85
+ this.name = "ExtractionError";
86
+ this.provider = provider;
87
+ }
88
+ };
89
+ function parseApiError(status, data) {
90
+ const message = data.error || data.message || "Unknown error";
91
+ switch (status) {
92
+ case 400:
93
+ throw new ValidationError(message, data.details);
94
+ case 401:
95
+ throw new AuthenticationError(message);
96
+ case 429:
97
+ throw new RateLimitError(message);
98
+ case 502:
99
+ case 503:
100
+ throw new ServiceUnavailableError(message);
101
+ default:
102
+ throw new CrawlGateError(message, status, void 0, data.details);
103
+ }
104
+ }
105
+
106
+ // src/utils/httpClient.ts
107
+ var HttpClient = class {
108
+ instance;
109
+ apiKey;
110
+ apiUrl;
111
+ maxRetries;
112
+ backoffFactor;
113
+ constructor(options) {
114
+ this.apiKey = options.apiKey;
115
+ this.apiUrl = options.apiUrl.replace(/\/$/, "");
116
+ this.maxRetries = options.maxRetries ?? 3;
117
+ this.backoffFactor = options.backoffFactor ?? 0.5;
118
+ this.instance = axios.create({
119
+ baseURL: this.apiUrl,
120
+ timeout: options.timeoutMs ?? 9e4,
121
+ headers: {
122
+ "Content-Type": "application/json",
123
+ "x-api-key": this.apiKey
124
+ }
125
+ });
126
+ }
127
+ /**
128
+ * Get the configured API URL
129
+ */
130
+ getApiUrl() {
131
+ return this.apiUrl;
132
+ }
133
+ /**
134
+ * Get the configured API key
135
+ */
136
+ getApiKey() {
137
+ return this.apiKey;
138
+ }
139
+ /**
140
+ * Sleep for specified seconds
141
+ */
142
+ sleep(seconds) {
143
+ return new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
144
+ }
145
+ /**
146
+ * Check if error is retryable
147
+ */
148
+ isRetryableError(status) {
149
+ return status === 502 || status === 503 || status === 429;
150
+ }
151
+ /**
152
+ * Make HTTP request with retry logic
153
+ */
154
+ async request(config) {
155
+ let lastError;
156
+ for (let attempt = 0; attempt < this.maxRetries; attempt++) {
157
+ try {
158
+ if (config.method && ["post", "put", "patch"].includes(config.method.toLowerCase())) {
159
+ const data = config.data ?? {};
160
+ config.data = { ...data, origin: "crawlgate-sdk" };
161
+ if (typeof data.timeout === "number") {
162
+ config.timeout = data.timeout + 5e3;
163
+ }
164
+ }
165
+ const response = await this.instance.request(config);
166
+ if (this.isRetryableError(response.status) && attempt < this.maxRetries - 1) {
167
+ await this.sleep(this.backoffFactor * Math.pow(2, attempt));
168
+ continue;
169
+ }
170
+ return response;
171
+ } catch (err) {
172
+ const axiosError = err;
173
+ lastError = err instanceof Error ? err : new Error(String(axiosError?.message));
174
+ const status = axiosError?.response?.status;
175
+ if (this.isRetryableError(status) && attempt < this.maxRetries - 1) {
176
+ await this.sleep(this.backoffFactor * Math.pow(2, attempt));
177
+ continue;
178
+ }
179
+ if (axiosError?.response) {
180
+ parseApiError(
181
+ axiosError.response.status,
182
+ axiosError.response.data
183
+ );
184
+ }
185
+ throw lastError;
186
+ }
187
+ }
188
+ throw lastError ?? new CrawlGateError("Unexpected HTTP client error");
189
+ }
190
+ /**
191
+ * Make POST request
192
+ */
193
+ async post(endpoint, body, headers) {
194
+ return this.request({
195
+ method: "post",
196
+ url: endpoint,
197
+ data: body,
198
+ headers
199
+ });
200
+ }
201
+ /**
202
+ * Make GET request
203
+ */
204
+ async get(endpoint, headers) {
205
+ return this.request({
206
+ method: "get",
207
+ url: endpoint,
208
+ headers
209
+ });
210
+ }
211
+ /**
212
+ * Make DELETE request
213
+ */
214
+ async delete(endpoint, headers) {
215
+ return this.request({
216
+ method: "delete",
217
+ url: endpoint,
218
+ headers
219
+ });
220
+ }
221
+ };
222
+
223
+ // src/methods/scrape.ts
224
+ import { zodToJsonSchema } from "zod-to-json-schema";
225
+ function convertSchema(schema) {
226
+ if (schema && typeof schema === "object" && "_def" in schema) {
227
+ return zodToJsonSchema(schema);
228
+ }
229
+ return schema;
230
+ }
231
+ function buildScrapeBody(url, options) {
232
+ const body = { url };
233
+ if (options?.engine) {
234
+ body.engine = options.engine;
235
+ }
236
+ if (options?.formats) {
237
+ body.formats = options.formats;
238
+ }
239
+ if (options?.onlyMainContent !== void 0) {
240
+ body.onlyMainContent = options.onlyMainContent;
241
+ }
242
+ if (options?.excludeTags) {
243
+ body.excludeTags = options.excludeTags;
244
+ }
245
+ if (options?.waitFor !== void 0) {
246
+ body.waitFor = options.waitFor;
247
+ }
248
+ if (options?.timeout !== void 0) {
249
+ body.timeout = options.timeout;
250
+ }
251
+ if (options?.proxy) {
252
+ body.proxy = options.proxy;
253
+ }
254
+ if (options?.projectId) {
255
+ body.project_id = options.projectId;
256
+ }
257
+ if (options?.extract) {
258
+ body.extract = {
259
+ schema: convertSchema(options.extract.schema),
260
+ systemPrompt: options.extract.systemPrompt,
261
+ provider: options.extract.provider,
262
+ enableFallback: options.extract.enableFallback
263
+ };
264
+ Object.keys(body.extract).forEach((key) => {
265
+ if (body.extract[key] === void 0) {
266
+ delete body.extract[key];
267
+ }
268
+ });
269
+ }
270
+ return body;
271
+ }
272
+ async function scrape(http, url, options) {
273
+ const body = buildScrapeBody(url, options);
274
+ const response = await http.post("/v1/scrape", body);
275
+ if (!response.data.success) {
276
+ throw new CrawlGateError(
277
+ response.data.error || "Scrape failed",
278
+ void 0,
279
+ "SCRAPE_ERROR"
280
+ );
281
+ }
282
+ if (!response.data.data) {
283
+ throw new CrawlGateError("No data returned from scrape", void 0, "NO_DATA");
284
+ }
285
+ const document = {
286
+ ...response.data.data
287
+ };
288
+ return document;
289
+ }
290
+
291
+ // src/methods/crawl.ts
292
+ function buildCrawlBody(url, options) {
293
+ const body = { url };
294
+ if (options?.engine) {
295
+ body.engine = options.engine;
296
+ }
297
+ if (options?.limit !== void 0) {
298
+ body.limit = options.limit;
299
+ }
300
+ if (options?.formats) {
301
+ body.formats = options.formats;
302
+ }
303
+ if (options?.onlyMainContent !== void 0) {
304
+ body.onlyMainContent = options.onlyMainContent;
305
+ }
306
+ if (options?.excludeTags) {
307
+ body.excludeTags = options.excludeTags;
308
+ }
309
+ if (options?.proxy) {
310
+ body.proxy = options.proxy;
311
+ }
312
+ if (options?.projectId) {
313
+ body.project_id = options.projectId;
314
+ }
315
+ return body;
316
+ }
317
+ async function startCrawl(http, url, options) {
318
+ const body = buildCrawlBody(url, options);
319
+ const response = await http.post("/v1/crawl", body);
320
+ if (!response.data.success && !response.data.id) {
321
+ throw new CrawlGateError(
322
+ "Failed to start crawl job",
323
+ void 0,
324
+ "CRAWL_START_ERROR"
325
+ );
326
+ }
327
+ return {
328
+ success: true,
329
+ id: response.data.id,
330
+ jobId: response.data.id,
331
+ status: response.data.status || "scraping",
332
+ engine: response.data.engine
333
+ };
334
+ }
335
+ async function getCrawlStatus(http, jobId) {
336
+ const response = await http.get(`/v1/crawl/${jobId}`);
337
+ return {
338
+ id: response.data.id || jobId,
339
+ status: response.data.status,
340
+ total: response.data.total || 0,
341
+ completed: response.data.completed || 0,
342
+ data: response.data.data || [],
343
+ engine: response.data.engine,
344
+ error: response.data.error
345
+ };
346
+ }
347
+ async function cancelCrawl(http, jobId) {
348
+ const response = await http.delete(
349
+ `/v1/crawl/${jobId}`
350
+ );
351
+ return response.data.success !== false;
352
+ }
353
+ function sleep(ms) {
354
+ return new Promise((resolve) => setTimeout(resolve, ms));
355
+ }
356
+ async function crawl(http, url, options) {
357
+ const pollInterval = options?.pollInterval ?? 2e3;
358
+ const timeout = options?.timeout ?? 300;
359
+ const { id: jobId } = await startCrawl(http, url, options);
360
+ const startTime = Date.now();
361
+ const timeoutMs = timeout * 1e3;
362
+ while (true) {
363
+ const status = await getCrawlStatus(http, jobId);
364
+ if (status.status === "completed") {
365
+ return status;
366
+ }
367
+ if (status.status === "failed") {
368
+ throw new CrawlGateError(
369
+ status.error || "Crawl job failed",
370
+ void 0,
371
+ "CRAWL_FAILED"
372
+ );
373
+ }
374
+ if (status.status === "cancelled") {
375
+ throw new CrawlGateError(
376
+ "Crawl job was cancelled",
377
+ void 0,
378
+ "CRAWL_CANCELLED"
379
+ );
380
+ }
381
+ if (Date.now() - startTime > timeoutMs) {
382
+ throw new JobTimeoutError(jobId, timeout);
383
+ }
384
+ await sleep(pollInterval);
385
+ }
386
+ }
387
+ async function getCrawlErrors(http, jobId) {
388
+ const response = await http.get(`/v1/crawl/${jobId}/errors`);
389
+ const payload = response.data.data ?? response.data;
390
+ return {
391
+ errors: (payload.errors || []).map((e) => ({
392
+ id: e.id || "",
393
+ timestamp: e.timestamp,
394
+ url: e.url || "",
395
+ code: e.code,
396
+ error: e.error || e.message || "Unknown error"
397
+ })),
398
+ robotsBlocked: payload.robotsBlocked || []
399
+ };
400
+ }
401
+
402
+ // src/methods/map.ts
403
+ function buildMapBody(url, options) {
404
+ const body = { url };
405
+ if (options?.engine) {
406
+ body.engine = options.engine;
407
+ }
408
+ if (options?.proxy) {
409
+ body.proxy = options.proxy;
410
+ }
411
+ if (options?.projectId) {
412
+ body.project_id = options.projectId;
413
+ }
414
+ return body;
415
+ }
416
+ async function map(http, url, options) {
417
+ const body = buildMapBody(url, options);
418
+ const response = await http.post("/v1/map", body);
419
+ if (!response.data.success) {
420
+ throw new CrawlGateError(
421
+ response.data.error || "Map failed",
422
+ void 0,
423
+ "MAP_ERROR"
424
+ );
425
+ }
426
+ return {
427
+ success: true,
428
+ links: response.data.links || [],
429
+ count: response.data.count || response.data.links?.length || 0,
430
+ engine: response.data.engine
431
+ };
432
+ }
433
+
434
+ // src/methods/search.ts
435
+ import { zodToJsonSchema as zodToJsonSchema2 } from "zod-to-json-schema";
436
+ function convertSchema2(schema) {
437
+ if (schema && typeof schema === "object" && "_def" in schema) {
438
+ return zodToJsonSchema2(schema);
439
+ }
440
+ return schema;
441
+ }
442
+ function buildSearchBody(query, options) {
443
+ const body = { query };
444
+ if (options?.limit !== void 0) {
445
+ body.limit = options.limit;
446
+ }
447
+ if (options?.lang) {
448
+ body.lang = options.lang;
449
+ }
450
+ if (options?.country) {
451
+ body.country = options.country;
452
+ }
453
+ if (options?.engines) {
454
+ body.engines = options.engines;
455
+ }
456
+ if (options?.scrapeOptions) {
457
+ body.scrapeOptions = options.scrapeOptions;
458
+ }
459
+ if (options?.engine) {
460
+ body.engine = options.engine;
461
+ }
462
+ if (options?.projectId) {
463
+ body.project_id = options.projectId;
464
+ }
465
+ if (options?.extract) {
466
+ body.extract = {
467
+ schema: convertSchema2(options.extract.schema),
468
+ systemPrompt: options.extract.systemPrompt,
469
+ provider: options.extract.provider,
470
+ enableFallback: options.extract.enableFallback
471
+ };
472
+ Object.keys(body.extract).forEach((key) => {
473
+ if (body.extract[key] === void 0) {
474
+ delete body.extract[key];
475
+ }
476
+ });
477
+ }
478
+ return body;
479
+ }
480
+ async function search(http, query, options) {
481
+ const body = buildSearchBody(query, options);
482
+ const response = await http.post("/v1/search", body);
483
+ if (!response.data.success) {
484
+ throw new CrawlGateError(
485
+ response.data.error || "Search failed",
486
+ void 0,
487
+ "SEARCH_ERROR"
488
+ );
489
+ }
490
+ return {
491
+ success: true,
492
+ data: response.data.data || [],
493
+ query: response.data.query || query,
494
+ totalResults: response.data.totalResults,
495
+ searchTime: response.data.searchTime,
496
+ extract: response.data.extract
497
+ };
498
+ }
499
+
500
+ // src/methods/batch.ts
501
+ function buildBatchBody(urls, options) {
502
+ const body = { urls };
503
+ if (options?.options) {
504
+ const scrapeOpts = options.options;
505
+ if (scrapeOpts.engine) body.engine = scrapeOpts.engine;
506
+ if (scrapeOpts.formats) body.formats = scrapeOpts.formats;
507
+ if (scrapeOpts.onlyMainContent !== void 0) body.onlyMainContent = scrapeOpts.onlyMainContent;
508
+ if (scrapeOpts.excludeTags) body.excludeTags = scrapeOpts.excludeTags;
509
+ if (scrapeOpts.waitFor !== void 0) body.waitFor = scrapeOpts.waitFor;
510
+ if (scrapeOpts.timeout !== void 0) body.timeout = scrapeOpts.timeout;
511
+ if (scrapeOpts.proxy) body.proxy = scrapeOpts.proxy;
512
+ }
513
+ if (options?.webhook != null) {
514
+ body.webhook = options.webhook;
515
+ }
516
+ if (options?.appendToId != null) {
517
+ body.appendToId = options.appendToId;
518
+ }
519
+ if (options?.ignoreInvalidURLs != null) {
520
+ body.ignoreInvalidURLs = options.ignoreInvalidURLs;
521
+ }
522
+ if (options?.maxConcurrency != null) {
523
+ body.maxConcurrency = options.maxConcurrency;
524
+ }
525
+ if (options?.projectId) {
526
+ body.project_id = options.projectId;
527
+ }
528
+ return body;
529
+ }
530
+ async function startBatchScrape(http, urls, options) {
531
+ if (!Array.isArray(urls) || urls.length === 0) {
532
+ throw new CrawlGateError("URLs array cannot be empty", 400, "VALIDATION_ERROR");
533
+ }
534
+ const body = buildBatchBody(urls, options);
535
+ const headers = {};
536
+ if (options?.idempotencyKey) {
537
+ headers["Idempotency-Key"] = options.idempotencyKey;
538
+ }
539
+ const response = await http.post(
540
+ "/v1/batch/scrape",
541
+ body,
542
+ Object.keys(headers).length > 0 ? headers : void 0
543
+ );
544
+ if (!response.data.success && !response.data.id) {
545
+ throw new CrawlGateError(
546
+ response.data.error || "Failed to start batch scrape job",
547
+ void 0,
548
+ "BATCH_START_ERROR"
549
+ );
550
+ }
551
+ return {
552
+ success: true,
553
+ id: response.data.id,
554
+ url: response.data.url,
555
+ invalidURLs: response.data.invalidURLs
556
+ };
557
+ }
558
+ async function getBatchScrapeStatus(http, jobId) {
559
+ const response = await http.get(
560
+ `/v1/batch/scrape/${jobId}`
561
+ );
562
+ return {
563
+ id: response.data.id || jobId,
564
+ status: response.data.status,
565
+ total: response.data.total || 0,
566
+ completed: response.data.completed || 0,
567
+ creditsUsed: response.data.creditsUsed,
568
+ expiresAt: response.data.expiresAt,
569
+ next: response.data.next ?? null,
570
+ data: response.data.data || [],
571
+ error: response.data.error
572
+ };
573
+ }
574
+ async function cancelBatchScrape(http, jobId) {
575
+ const response = await http.delete(
576
+ `/v1/batch/scrape/${jobId}`
577
+ );
578
+ return response.data.status === "cancelled" || response.data.success !== false;
579
+ }
580
+ async function getBatchScrapeErrors(http, jobId) {
581
+ const response = await http.get(`/v1/batch/scrape/${jobId}/errors`);
582
+ const payload = response.data.data ?? response.data;
583
+ return {
584
+ errors: (payload.errors || []).map((e) => ({
585
+ id: e.id || "",
586
+ timestamp: e.timestamp,
587
+ url: e.url || "",
588
+ code: e.code,
589
+ error: e.error || e.message || "Unknown error"
590
+ })),
591
+ robotsBlocked: payload.robotsBlocked || []
592
+ };
593
+ }
594
+ function sleep2(ms) {
595
+ return new Promise((resolve) => setTimeout(resolve, ms));
596
+ }
597
+ async function waitForBatchCompletion(http, jobId, pollInterval = 2e3, timeout) {
598
+ const startTime = Date.now();
599
+ const timeoutMs = timeout ? timeout * 1e3 : void 0;
600
+ while (true) {
601
+ const status = await getBatchScrapeStatus(http, jobId);
602
+ if (status.status === "completed") {
603
+ return status;
604
+ }
605
+ if (status.status === "failed") {
606
+ throw new CrawlGateError(
607
+ status.error || "Batch scrape job failed",
608
+ void 0,
609
+ "BATCH_FAILED"
610
+ );
611
+ }
612
+ if (status.status === "cancelled") {
613
+ throw new CrawlGateError(
614
+ "Batch scrape job was cancelled",
615
+ void 0,
616
+ "BATCH_CANCELLED"
617
+ );
618
+ }
619
+ if (timeoutMs && Date.now() - startTime > timeoutMs) {
620
+ throw new JobTimeoutError(jobId, timeout);
621
+ }
622
+ await sleep2(Math.max(1e3, pollInterval));
623
+ }
624
+ }
625
+ async function batchScrape(http, urls, options) {
626
+ const pollInterval = options?.pollInterval ?? 2e3;
627
+ const timeout = options?.timeout;
628
+ const { id: jobId } = await startBatchScrape(http, urls, options);
629
+ return waitForBatchCompletion(http, jobId, pollInterval, timeout);
630
+ }
631
+
632
+ // src/methods/extract.ts
633
+ import { zodToJsonSchema as zodToJsonSchema3 } from "zod-to-json-schema";
634
+ function isZodSchema(value) {
635
+ return value !== null && typeof value === "object" && "_def" in value && (typeof value.safeParse === "function" || typeof value.parse === "function");
636
+ }
637
+ function convertSchema3(schema) {
638
+ if (isZodSchema(schema)) {
639
+ return zodToJsonSchema3(schema);
640
+ }
641
+ return schema;
642
+ }
643
+ function buildExtractBody(options) {
644
+ const body = {};
645
+ if (options.urls) {
646
+ body.urls = options.urls;
647
+ }
648
+ if (options.prompt != null) {
649
+ body.prompt = options.prompt;
650
+ }
651
+ if (options.schema != null) {
652
+ body.schema = convertSchema3(options.schema);
653
+ }
654
+ if (options.systemPrompt != null) {
655
+ body.systemPrompt = options.systemPrompt;
656
+ }
657
+ if (options.allowExternalLinks != null) {
658
+ body.allowExternalLinks = options.allowExternalLinks;
659
+ }
660
+ if (options.enableWebSearch != null) {
661
+ body.enableWebSearch = options.enableWebSearch;
662
+ }
663
+ if (options.showSources != null) {
664
+ body.showSources = options.showSources;
665
+ }
666
+ if (options.ignoreInvalidURLs != null) {
667
+ body.ignoreInvalidURLs = options.ignoreInvalidURLs;
668
+ }
669
+ if (options.provider) {
670
+ body.provider = options.provider;
671
+ }
672
+ if (options.projectId) {
673
+ body.project_id = options.projectId;
674
+ }
675
+ if (options.scrapeOptions) {
676
+ body.scrapeOptions = options.scrapeOptions;
677
+ }
678
+ return body;
679
+ }
680
+ async function startExtract(http, options) {
681
+ const body = buildExtractBody(options);
682
+ const response = await http.post("/v1/extract", body);
683
+ if (response.data.success === false && response.data.error) {
684
+ throw new CrawlGateError(
685
+ response.data.error,
686
+ void 0,
687
+ "EXTRACT_ERROR"
688
+ );
689
+ }
690
+ return response.data;
691
+ }
692
+ async function getExtractStatus(http, jobId) {
693
+ const response = await http.get(`/v1/extract/${jobId}`);
694
+ if (response.data.success === false && response.data.error) {
695
+ throw new CrawlGateError(
696
+ response.data.error,
697
+ void 0,
698
+ "EXTRACT_STATUS_ERROR"
699
+ );
700
+ }
701
+ return response.data;
702
+ }
703
+ function sleep3(ms) {
704
+ return new Promise((resolve) => setTimeout(resolve, ms));
705
+ }
706
+ async function waitForExtractCompletion(http, jobId, pollInterval = 2e3, timeout) {
707
+ const startTime = Date.now();
708
+ const timeoutMs = timeout ? timeout * 1e3 : void 0;
709
+ while (true) {
710
+ const status = await getExtractStatus(http, jobId);
711
+ if (status.status === "completed") {
712
+ return status;
713
+ }
714
+ if (status.status === "failed") {
715
+ throw new CrawlGateError(
716
+ status.error || "Extract job failed",
717
+ void 0,
718
+ "EXTRACT_FAILED"
719
+ );
720
+ }
721
+ if (status.status === "cancelled") {
722
+ throw new CrawlGateError(
723
+ "Extract job was cancelled",
724
+ void 0,
725
+ "EXTRACT_CANCELLED"
726
+ );
727
+ }
728
+ if (timeoutMs && Date.now() - startTime > timeoutMs) {
729
+ throw new JobTimeoutError(jobId, timeout);
730
+ }
731
+ await sleep3(Math.max(1e3, pollInterval));
732
+ }
733
+ }
734
+ async function extract(http, options) {
735
+ const pollInterval = options.pollInterval ?? 2e3;
736
+ const timeout = options.timeout;
737
+ const started = await startExtract(http, options);
738
+ if (!started.id) {
739
+ return started;
740
+ }
741
+ if (started.status === "completed") {
742
+ return started;
743
+ }
744
+ return waitForExtractCompletion(http, started.id, pollInterval, timeout);
745
+ }
746
+
747
+ // src/methods/usage.ts
748
+ async function getConcurrency(http) {
749
+ const response = await http.get("/v1/concurrency");
750
+ return {
751
+ concurrency: response.data.concurrency ?? 0,
752
+ maxConcurrency: response.data.maxConcurrency ?? 0
753
+ };
754
+ }
755
+ async function getCreditUsage(http) {
756
+ const response = await http.get("/v1/credits");
757
+ return {
758
+ remainingCredits: response.data.remainingCredits ?? 0,
759
+ planCredits: response.data.planCredits,
760
+ billingPeriodStart: response.data.billingPeriodStart,
761
+ billingPeriodEnd: response.data.billingPeriodEnd
762
+ };
763
+ }
764
+ async function getTokenUsage(http) {
765
+ const response = await http.get("/v1/tokens");
766
+ return {
767
+ remainingTokens: response.data.remainingTokens ?? 0,
768
+ planTokens: response.data.planTokens,
769
+ billingPeriodStart: response.data.billingPeriodStart,
770
+ billingPeriodEnd: response.data.billingPeriodEnd
771
+ };
772
+ }
773
+ async function getQueueStatus(http) {
774
+ const response = await http.get("/v1/queue");
775
+ return {
776
+ success: response.data.success ?? true,
777
+ jobsInQueue: response.data.jobsInQueue ?? 0,
778
+ activeJobsInQueue: response.data.activeJobsInQueue ?? 0,
779
+ waitingJobsInQueue: response.data.waitingJobsInQueue ?? 0,
780
+ maxConcurrency: response.data.maxConcurrency ?? 0,
781
+ mostRecentSuccess: response.data.mostRecentSuccess
782
+ };
783
+ }
784
+
785
+ // src/client.ts
786
+ var CrawlGateClient = class {
787
+ http;
788
+ /**
789
+ * Create a new CrawlGate client
790
+ *
791
+ * @param options - Client configuration options
792
+ * @throws {CrawlGateError} If API key is not provided
793
+ */
794
+ constructor(options = {}) {
795
+ const apiKey = options.apiKey ?? process.env.CRAWLGATE_API_KEY ?? "";
796
+ const apiUrl = (options.apiUrl ?? process.env.CRAWLGATE_API_URL ?? "https://api.crawlgate.io").replace(/\/$/, "");
797
+ if (!apiKey) {
798
+ throw new CrawlGateError(
799
+ "API key is required. Set CRAWLGATE_API_KEY env variable or pass apiKey option.",
800
+ void 0,
801
+ "MISSING_API_KEY"
802
+ );
803
+ }
804
+ this.http = new HttpClient({
805
+ apiKey,
806
+ apiUrl,
807
+ timeoutMs: options.timeoutMs,
808
+ maxRetries: options.maxRetries,
809
+ backoffFactor: options.backoffFactor
810
+ });
811
+ }
812
+ // ==========================================================================
813
+ // Scrape Methods
814
+ // ==========================================================================
815
+ /**
816
+ * Scrape a single URL
817
+ *
818
+ * @param url - URL to scrape
819
+ * @param options - Scrape options
820
+ * @returns Scraped document with requested formats
821
+ *
822
+ * @example
823
+ * ```typescript
824
+ * const doc = await client.scrape('https://example.com', {
825
+ * engine: 'smart',
826
+ * formats: ['markdown', 'html'],
827
+ * onlyMainContent: true
828
+ * });
829
+ * console.log(doc.markdown);
830
+ * ```
831
+ *
832
+ * @example With LLM extraction
833
+ * ```typescript
834
+ * import { z } from 'zod';
835
+ *
836
+ * const schema = z.object({
837
+ * title: z.string(),
838
+ * price: z.number(),
839
+ * inStock: z.boolean()
840
+ * });
841
+ *
842
+ * const doc = await client.scrape('https://example.com/product', {
843
+ * engine: 'smart',
844
+ * extract: {
845
+ * schema,
846
+ * systemPrompt: 'Extract product details',
847
+ * provider: 'openai'
848
+ * }
849
+ * });
850
+ * console.log(doc.extract?.data);
851
+ * ```
852
+ */
853
+ async scrape(url, options) {
854
+ return scrape(this.http, url, options);
855
+ }
856
+ // ==========================================================================
857
+ // Batch Scrape Methods
858
+ // ==========================================================================
859
+ /**
860
+ * Start a batch scrape job (async)
861
+ *
862
+ * @param urls - Array of URLs to scrape
863
+ * @param options - Batch scrape options
864
+ * @returns Batch job ID and initial status
865
+ *
866
+ * @example
867
+ * ```typescript
868
+ * const { id } = await client.startBatchScrape(
869
+ * ['https://a.com', 'https://b.com', 'https://c.com'],
870
+ * { options: { formats: ['markdown'] } }
871
+ * );
872
+ *
873
+ * // Poll manually
874
+ * let status = await client.getBatchScrapeStatus(id);
875
+ * while (status.status === 'scraping') {
876
+ * await new Promise(r => setTimeout(r, 2000));
877
+ * status = await client.getBatchScrapeStatus(id);
878
+ * }
879
+ * ```
880
+ */
881
+ async startBatchScrape(urls, options) {
882
+ return startBatchScrape(this.http, urls, options);
883
+ }
884
+ /**
885
+ * Get batch scrape job status and data
886
+ *
887
+ * @param jobId - Batch job ID
888
+ * @returns Current job status and scraped data
889
+ */
890
+ async getBatchScrapeStatus(jobId) {
891
+ return getBatchScrapeStatus(this.http, jobId);
892
+ }
893
+ /**
894
+ * Cancel a batch scrape job
895
+ *
896
+ * @param jobId - Batch job ID
897
+ * @returns True if cancelled successfully
898
+ */
899
+ async cancelBatchScrape(jobId) {
900
+ return cancelBatchScrape(this.http, jobId);
901
+ }
902
+ /**
903
+ * Get batch scrape job errors
904
+ *
905
+ * @param jobId - Batch job ID
906
+ * @returns Errors and robots.txt blocked URLs
907
+ */
908
+ async getBatchScrapeErrors(jobId) {
909
+ return getBatchScrapeErrors(this.http, jobId);
910
+ }
911
+ /**
912
+ * Batch scrape multiple URLs and wait for completion
913
+ *
914
+ * @param urls - Array of URLs to scrape
915
+ * @param options - Batch options including pollInterval and timeout
916
+ * @returns Final job with all scraped data
917
+ *
918
+ * @example
919
+ * ```typescript
920
+ * const job = await client.batchScrape(
921
+ * ['https://a.com', 'https://b.com', 'https://c.com'],
922
+ * {
923
+ * options: { formats: ['markdown'], engine: 'smart' },
924
+ * pollInterval: 2000,
925
+ * timeout: 300
926
+ * }
927
+ * );
928
+ *
929
+ * console.log(`Scraped ${job.completed} URLs`);
930
+ * job.data.forEach(doc => console.log(doc.url, doc.markdown?.length));
931
+ * ```
932
+ */
933
+ async batchScrape(urls, options) {
934
+ return batchScrape(this.http, urls, options);
935
+ }
936
+ // ==========================================================================
937
+ // Crawl Methods
938
+ // ==========================================================================
939
+ /**
940
+ * Start a crawl job (async)
941
+ *
942
+ * Use this method when you want to start a crawl and manage polling yourself.
943
+ * For automatic polling, use the `crawl()` method instead.
944
+ *
945
+ * @param url - Root URL to crawl
946
+ * @param options - Crawl options
947
+ * @returns Crawl job ID and initial status
948
+ *
949
+ * @example
950
+ * ```typescript
951
+ * const { id } = await client.startCrawl('https://example.com', {
952
+ * limit: 10,
953
+ * engine: 'dynamic'
954
+ * });
955
+ *
956
+ * // Poll for status manually
957
+ * let status = await client.getCrawlStatus(id);
958
+ * while (status.status === 'scraping') {
959
+ * await new Promise(r => setTimeout(r, 2000));
960
+ * status = await client.getCrawlStatus(id);
961
+ * }
962
+ * ```
963
+ */
964
+ async startCrawl(url, options) {
965
+ return startCrawl(this.http, url, options);
966
+ }
967
+ /**
968
+ * Get crawl job status and data
969
+ *
970
+ * @param jobId - Crawl job ID
971
+ * @returns Current job status and scraped data
972
+ */
973
+ async getCrawlStatus(jobId) {
974
+ return getCrawlStatus(this.http, jobId);
975
+ }
976
+ /**
977
+ * Cancel a crawl job
978
+ *
979
+ * @param jobId - Crawl job ID
980
+ * @returns True if cancelled successfully
981
+ */
982
+ async cancelCrawl(jobId) {
983
+ return cancelCrawl(this.http, jobId);
984
+ }
985
+ /**
986
+ * Get crawl job errors and robots.txt blocks
987
+ *
988
+ * @param jobId - Crawl job ID
989
+ * @returns Errors and robots.txt blocked URLs
990
+ */
991
+ async getCrawlErrors(jobId) {
992
+ return getCrawlErrors(this.http, jobId);
993
+ }
994
+ /**
995
+ * Crawl a website and wait for completion
996
+ *
997
+ * This method starts a crawl job and automatically polls until completion.
998
+ *
999
+ * @param url - Root URL to crawl
1000
+ * @param options - Crawl options including pollInterval and timeout
1001
+ * @returns Final crawl job with all scraped data
1002
+ *
1003
+ * @example
1004
+ * ```typescript
1005
+ * const job = await client.crawl('https://example.com', {
1006
+ * limit: 10,
1007
+ * engine: 'dynamic',
1008
+ * formats: ['markdown'],
1009
+ * pollInterval: 2000, // Poll every 2 seconds
1010
+ * timeout: 300 // 5 minute timeout
1011
+ * });
1012
+ *
1013
+ * console.log(`Crawled ${job.completed} pages`);
1014
+ * job.data.forEach(doc => console.log(doc.url));
1015
+ * ```
1016
+ */
1017
+ async crawl(url, options) {
1018
+ return crawl(this.http, url, options);
1019
+ }
1020
+ // ==========================================================================
1021
+ // Extract Methods (Standalone LLM Extraction)
1022
+ // ==========================================================================
1023
+ /**
1024
+ * Start an extract job (async)
1025
+ *
1026
+ * @param options - Extract request options
1027
+ * @returns Extract job ID or immediate result
1028
+ *
1029
+ * @example
1030
+ * ```typescript
1031
+ * const { id } = await client.startExtract({
1032
+ * urls: ['https://example.com/product'],
1033
+ * schema: { name: 'string', price: 'number' },
1034
+ * provider: 'openai'
1035
+ * });
1036
+ *
1037
+ * // Poll manually
1038
+ * let status = await client.getExtractStatus(id);
1039
+ * while (status.status === 'processing') {
1040
+ * await new Promise(r => setTimeout(r, 2000));
1041
+ * status = await client.getExtractStatus(id);
1042
+ * }
1043
+ * console.log(status.data);
1044
+ * ```
1045
+ */
1046
+ async startExtract(options) {
1047
+ return startExtract(this.http, options);
1048
+ }
1049
+ /**
1050
+ * Get extract job status and data
1051
+ *
1052
+ * @param jobId - Extract job ID
1053
+ * @returns Current job status and extracted data
1054
+ */
1055
+ async getExtractStatus(jobId) {
1056
+ return getExtractStatus(this.http, jobId);
1057
+ }
1058
+ /**
1059
+ * Extract structured data from URLs using LLM and wait for completion
1060
+ *
1061
+ * @param options - Extract options including schema, prompt, and timeout
1062
+ * @returns Final extract result with structured data
1063
+ *
1064
+ * @example With Zod schema
1065
+ * ```typescript
1066
+ * import { z } from 'zod';
1067
+ *
1068
+ * const result = await client.extract({
1069
+ * urls: ['https://example.com/product'],
1070
+ * schema: z.object({
1071
+ * name: z.string(),
1072
+ * price: z.number(),
1073
+ * inStock: z.boolean(),
1074
+ * features: z.array(z.string())
1075
+ * }),
1076
+ * systemPrompt: 'Extract product information from the page',
1077
+ * provider: 'openai',
1078
+ * timeout: 60
1079
+ * });
1080
+ *
1081
+ * console.log(result.data);
1082
+ * ```
1083
+ *
1084
+ * @example With natural language prompt
1085
+ * ```typescript
1086
+ * const result = await client.extract({
1087
+ * urls: ['https://example.com/about'],
1088
+ * prompt: 'Extract the company name, founding year, and list of team members',
1089
+ * enableWebSearch: true
1090
+ * });
1091
+ *
1092
+ * console.log(result.data);
1093
+ * ```
1094
+ */
1095
+ async extract(options) {
1096
+ return extract(this.http, options);
1097
+ }
1098
+ // ==========================================================================
1099
+ // Map Methods
1100
+ // ==========================================================================
1101
+ /**
1102
+ * Map a website to discover all URLs
1103
+ *
1104
+ * @param url - Root URL to map
1105
+ * @param options - Map options
1106
+ * @returns List of discovered URLs
1107
+ *
1108
+ * @example
1109
+ * ```typescript
1110
+ * const result = await client.map('https://example.com', {
1111
+ * engine: 'dynamic'
1112
+ * });
1113
+ *
1114
+ * console.log(`Found ${result.count} URLs:`);
1115
+ * result.links.forEach(url => console.log(url));
1116
+ * ```
1117
+ */
1118
+ async map(url, options) {
1119
+ return map(this.http, url, options);
1120
+ }
1121
+ // ==========================================================================
1122
+ // Search Methods
1123
+ // ==========================================================================
1124
+ /**
1125
+ * Search the web and optionally scrape results
1126
+ *
1127
+ * @param query - Search query
1128
+ * @param options - Search options
1129
+ * @returns Search results with optional scraped content
1130
+ *
1131
+ * @example Basic search
1132
+ * ```typescript
1133
+ * const results = await client.search('best restaurants in NYC', {
1134
+ * limit: 10,
1135
+ * lang: 'en',
1136
+ * country: 'us'
1137
+ * });
1138
+ *
1139
+ * results.data.forEach(r => {
1140
+ * console.log(`${r.title}: ${r.url}`);
1141
+ * });
1142
+ * ```
1143
+ *
1144
+ * @example Search with scraping
1145
+ * ```typescript
1146
+ * const results = await client.search('best laptops 2024', {
1147
+ * limit: 5,
1148
+ * scrapeOptions: {
1149
+ * formats: ['markdown']
1150
+ * },
1151
+ * engine: 'smart'
1152
+ * });
1153
+ *
1154
+ * results.data.forEach(r => {
1155
+ * console.log(r.title);
1156
+ * console.log(r.markdown?.substring(0, 200));
1157
+ * });
1158
+ * ```
1159
+ *
1160
+ * @example Search with LLM extraction
1161
+ * ```typescript
1162
+ * import { z } from 'zod';
1163
+ *
1164
+ * const results = await client.search('iPhone 15 Pro reviews', {
1165
+ * limit: 5,
1166
+ * scrapeOptions: { formats: ['markdown'] },
1167
+ * extract: {
1168
+ * schema: z.object({
1169
+ * pros: z.array(z.string()),
1170
+ * cons: z.array(z.string()),
1171
+ * rating: z.number()
1172
+ * }),
1173
+ * systemPrompt: 'Extract review summary from the content'
1174
+ * }
1175
+ * });
1176
+ *
1177
+ * console.log(results.extract?.data);
1178
+ * ```
1179
+ */
1180
+ async search(query, options) {
1181
+ return search(this.http, query, options);
1182
+ }
1183
+ // ==========================================================================
1184
+ // Usage & Monitoring Methods
1185
+ // ==========================================================================
1186
+ /**
1187
+ * Get current concurrency usage
1188
+ *
1189
+ * @returns Current and max concurrency
1190
+ *
1191
+ * @example
1192
+ * ```typescript
1193
+ * const { concurrency, maxConcurrency } = await client.getConcurrency();
1194
+ * console.log(`Using ${concurrency}/${maxConcurrency} concurrent requests`);
1195
+ * ```
1196
+ */
1197
+ async getConcurrency() {
1198
+ return getConcurrency(this.http);
1199
+ }
1200
+ /**
1201
+ * Get current credit usage
1202
+ *
1203
+ * @returns Credit usage information
1204
+ *
1205
+ * @example
1206
+ * ```typescript
1207
+ * const credits = await client.getCreditUsage();
1208
+ * console.log(`Remaining credits: ${credits.remainingCredits}`);
1209
+ * ```
1210
+ */
1211
+ async getCreditUsage() {
1212
+ return getCreditUsage(this.http);
1213
+ }
1214
+ /**
1215
+ * Get current token usage (for LLM extraction)
1216
+ *
1217
+ * @returns Token usage information
1218
+ *
1219
+ * @example
1220
+ * ```typescript
1221
+ * const tokens = await client.getTokenUsage();
1222
+ * console.log(`Remaining tokens: ${tokens.remainingTokens}`);
1223
+ * ```
1224
+ */
1225
+ async getTokenUsage() {
1226
+ return getTokenUsage(this.http);
1227
+ }
1228
+ /**
1229
+ * Get queue status information
1230
+ *
1231
+ * @returns Queue status metrics
1232
+ *
1233
+ * @example
1234
+ * ```typescript
1235
+ * const queue = await client.getQueueStatus();
1236
+ * console.log(`Jobs in queue: ${queue.jobsInQueue}`);
1237
+ * console.log(`Active: ${queue.activeJobsInQueue}, Waiting: ${queue.waitingJobsInQueue}`);
1238
+ * ```
1239
+ */
1240
+ async getQueueStatus() {
1241
+ return getQueueStatus(this.http);
1242
+ }
1243
+ };
1244
+ export {
1245
+ AuthenticationError,
1246
+ CrawlGateClient,
1247
+ CrawlGateError,
1248
+ ExtractionError,
1249
+ JobTimeoutError,
1250
+ RateLimitError,
1251
+ ServiceUnavailableError,
1252
+ ValidationError,
1253
+ CrawlGateClient as default
1254
+ };
1255
+ //# sourceMappingURL=index.js.map