scrapex 1.0.0-alpha.1 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +164 -5
  2. package/dist/embeddings/index.cjs +52 -0
  3. package/dist/embeddings/index.d.cts +3 -0
  4. package/dist/embeddings/index.d.mts +3 -0
  5. package/dist/embeddings/index.mjs +4 -0
  6. package/dist/embeddings-BjNTQSG9.cjs +1455 -0
  7. package/dist/embeddings-BjNTQSG9.cjs.map +1 -0
  8. package/dist/embeddings-Bsymy_jA.mjs +1215 -0
  9. package/dist/embeddings-Bsymy_jA.mjs.map +1 -0
  10. package/dist/{enhancer-oM4BhYYS.cjs → enhancer-Cs_WyWtJ.cjs} +2 -51
  11. package/dist/enhancer-Cs_WyWtJ.cjs.map +1 -0
  12. package/dist/{enhancer-Q6CSc1gA.mjs → enhancer-INx5NlgO.mjs} +2 -45
  13. package/dist/enhancer-INx5NlgO.mjs.map +1 -0
  14. package/dist/http-base-CHLf-Tco.cjs +684 -0
  15. package/dist/http-base-CHLf-Tco.cjs.map +1 -0
  16. package/dist/http-base-DM7YNo6X.mjs +618 -0
  17. package/dist/http-base-DM7YNo6X.mjs.map +1 -0
  18. package/dist/index-Bvseqli-.d.cts +268 -0
  19. package/dist/index-Bvseqli-.d.cts.map +1 -0
  20. package/dist/index-CIFjNySr.d.mts +268 -0
  21. package/dist/index-CIFjNySr.d.mts.map +1 -0
  22. package/dist/index-D6qfjmZQ.d.mts +401 -0
  23. package/dist/index-D6qfjmZQ.d.mts.map +1 -0
  24. package/dist/index-RFSpP5g8.d.cts +401 -0
  25. package/dist/index-RFSpP5g8.d.cts.map +1 -0
  26. package/dist/index.cjs +171 -51
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +61 -2
  29. package/dist/index.d.cts.map +1 -1
  30. package/dist/index.d.mts +61 -2
  31. package/dist/index.d.mts.map +1 -1
  32. package/dist/index.mjs +129 -6
  33. package/dist/index.mjs.map +1 -1
  34. package/dist/llm/index.cjs +252 -233
  35. package/dist/llm/index.cjs.map +1 -1
  36. package/dist/llm/index.d.cts +132 -85
  37. package/dist/llm/index.d.cts.map +1 -1
  38. package/dist/llm/index.d.mts +132 -85
  39. package/dist/llm/index.d.mts.map +1 -1
  40. package/dist/llm/index.mjs +244 -236
  41. package/dist/llm/index.mjs.map +1 -1
  42. package/dist/parsers/index.cjs +10 -199
  43. package/dist/parsers/index.d.cts +2 -133
  44. package/dist/parsers/index.d.mts +2 -133
  45. package/dist/parsers/index.mjs +2 -191
  46. package/dist/parsers-Bneuws8x.cjs +569 -0
  47. package/dist/parsers-Bneuws8x.cjs.map +1 -0
  48. package/dist/parsers-DsawHeo0.mjs +482 -0
  49. package/dist/parsers-DsawHeo0.mjs.map +1 -0
  50. package/dist/types-BOcHQU9s.d.mts +831 -0
  51. package/dist/types-BOcHQU9s.d.mts.map +1 -0
  52. package/dist/types-DutdBpqd.d.cts +831 -0
  53. package/dist/types-DutdBpqd.d.cts.map +1 -0
  54. package/package.json +15 -16
  55. package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
  56. package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
  57. package/dist/parsers/index.cjs.map +0 -1
  58. package/dist/parsers/index.d.cts.map +0 -1
  59. package/dist/parsers/index.d.mts.map +0 -1
  60. package/dist/parsers/index.mjs.map +0 -1
  61. package/dist/types-CNQZVW36.d.mts +0 -150
  62. package/dist/types-CNQZVW36.d.mts.map +0 -1
  63. package/dist/types-D0HYR95H.d.cts +0 -150
  64. package/dist/types-D0HYR95H.d.cts.map +0 -1
@@ -0,0 +1,618 @@
1
+ import { promises } from "node:dns";
2
+ import { isIP } from "node:net";
3
+
4
+ //#region src/core/errors.ts
5
+ /**
6
+ * Custom error class for scraping failures with structured error codes
7
+ */
8
+ var ScrapeError = class ScrapeError extends Error {
9
+ code;
10
+ statusCode;
11
+ constructor(message, code, statusCode, cause) {
12
+ super(message, { cause });
13
+ this.name = "ScrapeError";
14
+ this.code = code;
15
+ this.statusCode = statusCode;
16
+ if (Error.captureStackTrace) Error.captureStackTrace(this, ScrapeError);
17
+ }
18
+ /**
19
+ * Create a ScrapeError from an unknown error
20
+ */
21
+ static from(error, code = "FETCH_FAILED") {
22
+ if (error instanceof ScrapeError) return error;
23
+ if (error instanceof Error) return new ScrapeError(error.message, code, void 0, error);
24
+ return new ScrapeError(String(error), code);
25
+ }
26
+ /**
27
+ * Check if error is retryable (network issues, timeouts)
28
+ */
29
+ isRetryable() {
30
+ return this.code === "FETCH_FAILED" || this.code === "TIMEOUT";
31
+ }
32
+ /**
33
+ * Convert to a plain object for serialization
34
+ */
35
+ toJSON() {
36
+ return {
37
+ name: this.name,
38
+ message: this.message,
39
+ code: this.code,
40
+ statusCode: this.statusCode,
41
+ stack: this.stack
42
+ };
43
+ }
44
+ };
45
+
46
+ //#endregion
47
+ //#region src/common/errors.ts
48
+ /**
49
+ * Error normalization utilities for HTTP providers.
50
+ * Maps HTTP status codes to consistent ScrapeError codes.
51
+ */
52
+ /**
53
+ * HTTP status code to ScrapeError code mapping.
54
+ */
55
+ function getErrorCodeFromStatus(status) {
56
+ if (status === 401 || status === 403) return "BLOCKED";
57
+ if (status === 404) return "NOT_FOUND";
58
+ if (status === 429) return "BLOCKED";
59
+ if (status === 408) return "TIMEOUT";
60
+ if (status >= 500) return "LLM_ERROR";
61
+ return "FETCH_FAILED";
62
+ }
63
+ /**
64
+ * Parse error message from API response body.
65
+ */
66
+ async function parseErrorBody(response) {
67
+ try {
68
+ const text = await response.text();
69
+ try {
70
+ const json = JSON.parse(text);
71
+ if (typeof json.error === "object" && json.error !== null) {
72
+ const error = json.error;
73
+ return String(error.message ?? error.msg ?? JSON.stringify(error));
74
+ }
75
+ if (typeof json.error === "string") return json.error;
76
+ if (typeof json.message === "string") return json.message;
77
+ if (typeof json.detail === "string") return json.detail;
78
+ return text;
79
+ } catch {
80
+ return text || `HTTP ${response.status} ${response.statusText}`;
81
+ }
82
+ } catch {
83
+ return `HTTP ${response.status} ${response.statusText}`;
84
+ }
85
+ }
86
+ /**
87
+ * Create a ScrapeError from an HTTP response.
88
+ */
89
+ async function createHttpError(response, providerName, errorMapper) {
90
+ const code = getErrorCodeFromStatus(response.status);
91
+ let message;
92
+ if (errorMapper) try {
93
+ message = errorMapper(await response.json());
94
+ } catch {
95
+ message = await parseErrorBody(response);
96
+ }
97
+ else message = await parseErrorBody(response);
98
+ return new ScrapeError(`${providerName} API error (${response.status}): ${message}`, code, response.status);
99
+ }
100
+
101
+ //#endregion
102
+ //#region src/common/resilience.ts
103
+ /**
104
+ * Default retry configuration.
105
+ */
106
+ const DEFAULT_RETRY = {
107
+ maxAttempts: 3,
108
+ backoffMs: 1e3,
109
+ backoffMultiplier: 2,
110
+ retryableStatuses: [
111
+ 408,
112
+ 429,
113
+ 500,
114
+ 502,
115
+ 503,
116
+ 504
117
+ ]
118
+ };
119
+ /**
120
+ * Errors that should be retried (transient failures).
121
+ */
122
+ const RETRYABLE_ERROR_CODES = [
123
+ "ECONNRESET",
124
+ "ETIMEDOUT",
125
+ "ECONNREFUSED",
126
+ "EPIPE",
127
+ "ENOTFOUND",
128
+ "ENETUNREACH",
129
+ "EAI_AGAIN"
130
+ ];
131
+ /**
132
+ * Check if an error is retryable.
133
+ */
134
+ function isRetryableError(error, retryableStatuses = DEFAULT_RETRY.retryableStatuses) {
135
+ if (error instanceof Error) {
136
+ const code = error.code;
137
+ if (code && RETRYABLE_ERROR_CODES.includes(code)) return true;
138
+ if ("statusCode" in error && typeof error.statusCode === "number") return retryableStatuses.includes(error.statusCode);
139
+ if ("status" in error && typeof error.status === "number") return retryableStatuses.includes(error.status);
140
+ if ("code" in error) {
141
+ const errCode = error.code;
142
+ if (errCode === "TIMEOUT" || errCode === "FETCH_FAILED") return true;
143
+ }
144
+ const message = error.message.toLowerCase();
145
+ if (message.includes("timeout") || message.includes("rate limit") || message.includes("too many requests") || message.includes("temporarily unavailable")) return true;
146
+ }
147
+ return false;
148
+ }
149
+ /**
150
+ * Sleep for specified milliseconds.
151
+ */
152
+ function sleep(ms) {
153
+ return new Promise((resolve) => setTimeout(resolve, ms));
154
+ }
155
+ /**
156
+ * Execute a function with retry logic.
157
+ */
158
+ async function withRetry(fn, config, onRetry) {
159
+ const maxAttempts = config?.maxAttempts ?? DEFAULT_RETRY.maxAttempts;
160
+ const backoffMs = config?.backoffMs ?? DEFAULT_RETRY.backoffMs;
161
+ const multiplier = config?.backoffMultiplier ?? DEFAULT_RETRY.backoffMultiplier;
162
+ const retryableStatuses = config?.retryableStatuses ?? DEFAULT_RETRY.retryableStatuses;
163
+ let lastError;
164
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) try {
165
+ return {
166
+ result: await fn(),
167
+ attempts: attempt
168
+ };
169
+ } catch (error) {
170
+ lastError = error instanceof Error ? error : new Error(String(error));
171
+ if (attempt === maxAttempts || !isRetryableError(error, retryableStatuses)) throw lastError;
172
+ const jitter = backoffMs * multiplier ** (attempt - 1) * (.9 + Math.random() * .2);
173
+ onRetry?.(attempt, lastError, jitter);
174
+ await sleep(jitter);
175
+ }
176
+ throw lastError ?? /* @__PURE__ */ new Error("Retry failed");
177
+ }
178
+ /**
179
+ * Execute a function with timeout.
180
+ */
181
+ async function withTimeout(fn, timeoutMs) {
182
+ const controller = new AbortController();
183
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
184
+ try {
185
+ return await fn(controller.signal);
186
+ } finally {
187
+ clearTimeout(timeoutId);
188
+ }
189
+ }
190
+ /**
191
+ * Create an AbortSignal that times out after specified milliseconds.
192
+ * If parentSignal is provided, this signal will abort when the parent aborts.
193
+ */
194
+ function createTimeoutSignal(timeoutMs, parentSignal) {
195
+ const controller = new AbortController();
196
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
197
+ timeoutId.unref?.();
198
+ const clear = () => clearTimeout(timeoutId);
199
+ controller.signal.addEventListener("abort", clear, { once: true });
200
+ if (parentSignal) {
201
+ if (parentSignal.aborted) {
202
+ clear();
203
+ controller.abort(parentSignal.reason);
204
+ return controller.signal;
205
+ }
206
+ parentSignal.addEventListener("abort", () => {
207
+ clear();
208
+ controller.abort(parentSignal.reason);
209
+ }, { once: true });
210
+ }
211
+ return controller.signal;
212
+ }
213
+ /**
214
+ * Default circuit breaker configuration.
215
+ */
216
+ const DEFAULT_CIRCUIT_BREAKER = {
217
+ failureThreshold: 5,
218
+ resetTimeoutMs: 3e4
219
+ };
220
+ /**
221
+ * Error thrown when circuit breaker is open.
222
+ */
223
+ var CircuitOpenError = class extends Error {
224
+ isCircuitOpen = true;
225
+ constructor(message) {
226
+ super(message);
227
+ this.name = "CircuitOpenError";
228
+ }
229
+ };
230
+ /**
231
+ * Circuit breaker implementation.
232
+ * Prevents cascade failures by stopping requests when failure rate is high.
233
+ */
234
+ var CircuitBreaker = class {
235
+ state;
236
+ failureThreshold;
237
+ resetTimeoutMs;
238
+ constructor(config) {
239
+ this.failureThreshold = config?.failureThreshold ?? DEFAULT_CIRCUIT_BREAKER.failureThreshold;
240
+ this.resetTimeoutMs = config?.resetTimeoutMs ?? DEFAULT_CIRCUIT_BREAKER.resetTimeoutMs;
241
+ this.state = {
242
+ state: "closed",
243
+ failures: 0
244
+ };
245
+ }
246
+ /**
247
+ * Check if requests are blocked.
248
+ */
249
+ isOpen() {
250
+ this.updateState();
251
+ return this.state.state === "open";
252
+ }
253
+ /**
254
+ * Get current circuit state.
255
+ */
256
+ getState() {
257
+ this.updateState();
258
+ return this.state.state;
259
+ }
260
+ /**
261
+ * Record a successful request.
262
+ */
263
+ recordSuccess() {
264
+ this.state.failures = 0;
265
+ this.state.state = "closed";
266
+ this.state.lastFailureTime = void 0;
267
+ this.state.nextAttemptTime = void 0;
268
+ }
269
+ /**
270
+ * Record a failed request.
271
+ */
272
+ recordFailure() {
273
+ this.state.failures++;
274
+ this.state.lastFailureTime = Date.now();
275
+ if (this.state.failures >= this.failureThreshold) {
276
+ this.state.state = "open";
277
+ this.state.nextAttemptTime = Date.now() + this.resetTimeoutMs;
278
+ }
279
+ }
280
+ /**
281
+ * Execute a function with circuit breaker protection.
282
+ */
283
+ async execute(fn) {
284
+ if (this.isOpen()) throw new CircuitOpenError(`Circuit breaker is open. Next attempt at ${this.state.nextAttemptTime ? new Date(this.state.nextAttemptTime).toISOString() : "unknown"}`);
285
+ try {
286
+ const result = await fn();
287
+ this.recordSuccess();
288
+ return result;
289
+ } catch (error) {
290
+ this.recordFailure();
291
+ throw error;
292
+ }
293
+ }
294
+ /**
295
+ * Reset the circuit breaker.
296
+ */
297
+ reset() {
298
+ this.state = {
299
+ state: "closed",
300
+ failures: 0
301
+ };
302
+ }
303
+ /**
304
+ * Update state based on time (open -> half-open transition).
305
+ */
306
+ updateState() {
307
+ if (this.state.state === "open" && this.state.nextAttemptTime && Date.now() >= this.state.nextAttemptTime) this.state.state = "half-open";
308
+ }
309
+ };
310
+ /**
311
+ * Token bucket rate limiter.
312
+ */
313
+ var RateLimiter = class {
314
+ tokens;
315
+ lastRefill;
316
+ maxTokens;
317
+ refillRate;
318
+ constructor(config) {
319
+ const requestsPerSecond = (config.requestsPerMinute ?? 60) / 60;
320
+ this.maxTokens = Math.max(1, Math.ceil(requestsPerSecond * 10));
321
+ this.refillRate = requestsPerSecond;
322
+ this.tokens = this.maxTokens;
323
+ this.lastRefill = Date.now();
324
+ }
325
+ /**
326
+ * Check if a request is allowed without consuming tokens.
327
+ */
328
+ canProceed() {
329
+ this.refill();
330
+ return this.tokens >= 1;
331
+ }
332
+ /**
333
+ * Attempt to acquire tokens for a request.
334
+ * Returns true if allowed, false if rate limited.
335
+ */
336
+ tryAcquire(tokens = 1) {
337
+ this.refill();
338
+ if (this.tokens >= tokens) {
339
+ this.tokens -= tokens;
340
+ return true;
341
+ }
342
+ return false;
343
+ }
344
+ /**
345
+ * Wait until tokens are available, then acquire.
346
+ */
347
+ async acquire(tokens = 1) {
348
+ if (this.tryAcquire(tokens)) return;
349
+ this.refill();
350
+ const tokensNeeded = tokens - this.tokens;
351
+ const waitMs = Math.ceil(tokensNeeded / this.refillRate * 1e3);
352
+ if (waitMs > 0) await sleep(waitMs);
353
+ while (!this.tryAcquire(tokens)) await sleep(Math.ceil(1 / this.refillRate * 1e3));
354
+ }
355
+ /**
356
+ * Get time until next token is available (in milliseconds).
357
+ */
358
+ getWaitTime() {
359
+ this.refill();
360
+ if (this.tokens >= 1) return 0;
361
+ return Math.ceil(1 / this.refillRate * 1e3);
362
+ }
363
+ /**
364
+ * Refill tokens based on elapsed time.
365
+ */
366
+ refill() {
367
+ const now = Date.now();
368
+ const newTokens = (now - this.lastRefill) / 1e3 * this.refillRate;
369
+ this.tokens = Math.min(this.maxTokens, this.tokens + newTokens);
370
+ this.lastRefill = now;
371
+ }
372
+ };
373
+ /**
374
+ * Semaphore for limiting concurrent operations.
375
+ */
376
+ var Semaphore = class {
377
+ permits;
378
+ waiting = [];
379
+ constructor(permits) {
380
+ this.permits = permits;
381
+ }
382
+ /**
383
+ * Acquire a permit, waiting if necessary.
384
+ */
385
+ async acquire() {
386
+ if (this.permits > 0) {
387
+ this.permits--;
388
+ return;
389
+ }
390
+ return new Promise((resolve) => {
391
+ this.waiting.push(resolve);
392
+ });
393
+ }
394
+ /**
395
+ * Release a permit.
396
+ */
397
+ release() {
398
+ const next = this.waiting.shift();
399
+ if (next) next();
400
+ else this.permits++;
401
+ }
402
+ /**
403
+ * Execute function with semaphore protection.
404
+ */
405
+ async execute(fn) {
406
+ await this.acquire();
407
+ try {
408
+ return await fn();
409
+ } finally {
410
+ this.release();
411
+ }
412
+ }
413
+ };
414
+ /**
415
+ * Execute a function with all resilience features.
416
+ *
417
+ * @param fn - The async function to execute with resilience
418
+ * @param config - Configuration for retry and timeout behavior
419
+ * @param state - Pre-instantiated resilience primitives for stateful features.
420
+ * Circuit breaker, rate limiter, and semaphore must be instantiated by the caller
421
+ * and passed via state to enable those features. This allows sharing state across
422
+ * multiple calls for proper circuit breaker tracking and rate limiting.
423
+ * The config parameter is only used for retry and timeout settings.
424
+ * @param callbacks - Optional callbacks for retry events
425
+ */
426
+ async function withResilience(fn, config, state, callbacks) {
427
+ const timeoutMs = config?.timeoutMs ?? 3e4;
428
+ if (state?.circuitBreaker?.isOpen()) throw new CircuitOpenError("Circuit breaker is open");
429
+ if (state?.rateLimiter) await state.rateLimiter.acquire();
430
+ const executeWithConcurrency = async () => {
431
+ const withTimeoutFn = () => withTimeout(fn, timeoutMs);
432
+ try {
433
+ const retryResult = await withRetry(withTimeoutFn, config?.retry, callbacks?.onRetry);
434
+ state?.circuitBreaker?.recordSuccess();
435
+ return retryResult;
436
+ } catch (error) {
437
+ state?.circuitBreaker?.recordFailure();
438
+ throw error;
439
+ }
440
+ };
441
+ if (state?.semaphore) return state.semaphore.execute(executeWithConcurrency);
442
+ return executeWithConcurrency();
443
+ }
444
+
445
+ //#endregion
446
+ //#region src/common/http-base.ts
447
+ /**
448
+ * Shared HTTP provider infrastructure for LLM and Embedding providers.
449
+ * Provides SSRF protection, resilience, and error normalization.
450
+ */
451
+ /**
452
+ * Private IP ranges blocked for SSRF protection.
453
+ */
454
+ const PRIVATE_IP_PATTERNS = [
455
+ /^10\./,
456
+ /^172\.(1[6-9]|2\d|3[01])\./,
457
+ /^192\.168\./,
458
+ /^127\./,
459
+ /^0\./,
460
+ /^169\.254\./,
461
+ /^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\./,
462
+ /^::1$/,
463
+ /^(fc|fd)[0-9a-f]{2}:/i,
464
+ /^fe80:/i,
465
+ /^fec0:/i,
466
+ /^::ffff:(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.)/i,
467
+ /^localhost$/i
468
+ ];
469
+ /**
470
+ * Check if a hostname/IP is private.
471
+ */
472
+ function isPrivateHost(hostname) {
473
+ return PRIVATE_IP_PATTERNS.some((pattern) => pattern.test(hostname));
474
+ }
475
+ /**
476
+ * Validate a URL for security.
477
+ */
478
+ function validateUrl(url, options = {}) {
479
+ const requireHttps = options.requireHttps ?? true;
480
+ const allowPrivate = options.allowPrivate ?? false;
481
+ let parsed;
482
+ try {
483
+ parsed = new URL(url);
484
+ } catch {
485
+ throw new ScrapeError(`Invalid URL: ${url}`, "INVALID_URL");
486
+ }
487
+ if (requireHttps && parsed.protocol !== "https:") throw new ScrapeError(`HTTPS required. Got: ${parsed.protocol}`, "VALIDATION_ERROR");
488
+ if (!allowPrivate && isPrivateHost(parsed.hostname)) throw new ScrapeError(`Private/internal addresses not allowed: ${parsed.hostname}`, "VALIDATION_ERROR");
489
+ return parsed;
490
+ }
491
+ /**
492
+ * Validate URL and resolve DNS to check for private IPs.
493
+ */
494
+ async function validateUrlWithDns(url, options = {}) {
495
+ const parsed = validateUrl(url, options);
496
+ const resolveDns = options.resolveDns ?? true;
497
+ const allowPrivate = options.allowPrivate ?? false;
498
+ if (!resolveDns || allowPrivate) return;
499
+ const host = parsed.hostname;
500
+ if (isIP(host)) return;
501
+ try {
502
+ const addresses = await promises.lookup(host, { all: true });
503
+ for (const addr of addresses) if (isPrivateHost(addr.address)) throw new ScrapeError(`DNS resolved to private address: ${host} -> ${addr.address}`, "VALIDATION_ERROR");
504
+ } catch (error) {
505
+ if (error instanceof ScrapeError) throw error;
506
+ throw new ScrapeError(`Failed to resolve hostname: ${host} (${error instanceof Error ? error.message : String(error)})`, "FETCH_FAILED");
507
+ }
508
+ }
509
+ /**
510
+ * Base HTTP provider with shared security and resilience.
511
+ */
512
+ var BaseHttpProvider = class {
513
+ baseUrl;
514
+ model;
515
+ headers;
516
+ errorMapper;
517
+ requireHttps;
518
+ allowPrivate;
519
+ resolveDns;
520
+ allowRedirects;
521
+ timeoutMs;
522
+ retryConfig;
523
+ concurrency;
524
+ circuitBreaker;
525
+ rateLimiter;
526
+ semaphore;
527
+ constructor(config) {
528
+ this.baseUrl = config.baseUrl.replace(/\/$/, "");
529
+ this.model = config.model;
530
+ this.headers = {
531
+ "Content-Type": "application/json",
532
+ ...config.headers
533
+ };
534
+ this.errorMapper = config.errorMapper;
535
+ this.requireHttps = config.requireHttps ?? true;
536
+ this.allowPrivate = config.allowPrivate ?? false;
537
+ this.resolveDns = config.resolveDns ?? true;
538
+ this.allowRedirects = config.allowRedirects ?? false;
539
+ this.timeoutMs = config.resilience?.timeoutMs ?? 3e4;
540
+ this.retryConfig = config.resilience?.retry;
541
+ this.concurrency = config.resilience?.concurrency ?? 1;
542
+ const sharedState = config.resilience?.state;
543
+ this.circuitBreaker = sharedState?.circuitBreaker ?? (config.resilience?.circuitBreaker ? new CircuitBreaker(config.resilience.circuitBreaker) : void 0);
544
+ this.rateLimiter = sharedState?.rateLimiter ?? (config.resilience?.rateLimit ? new RateLimiter(config.resilience.rateLimit) : void 0);
545
+ this.semaphore = sharedState?.semaphore ?? new Semaphore(this.concurrency);
546
+ validateUrl(this.baseUrl, {
547
+ requireHttps: this.requireHttps,
548
+ allowPrivate: this.allowPrivate
549
+ });
550
+ }
551
+ /**
552
+ * Get the current resilience state for persistence across calls.
553
+ */
554
+ getResilienceState() {
555
+ return {
556
+ circuitBreaker: this.circuitBreaker,
557
+ rateLimiter: this.rateLimiter,
558
+ semaphore: this.semaphore
559
+ };
560
+ }
561
+ /**
562
+ * Make an HTTP request with security and resilience.
563
+ */
564
+ async fetch(url, options = {}) {
565
+ const securityOptions = {
566
+ requireHttps: this.requireHttps,
567
+ allowPrivate: this.allowPrivate,
568
+ resolveDns: this.resolveDns,
569
+ allowRedirects: this.allowRedirects
570
+ };
571
+ await validateUrlWithDns(url, securityOptions);
572
+ if (this.circuitBreaker?.isOpen()) throw new CircuitOpenError("Circuit breaker is open. Too many recent failures.");
573
+ if (this.rateLimiter) await this.rateLimiter.acquire();
574
+ const doFetch = async (signal) => {
575
+ const composedSignal = options.signal ? AbortSignal.any([options.signal, signal]) : signal;
576
+ const response = await fetch(url, {
577
+ method: options.method ?? "POST",
578
+ headers: {
579
+ ...this.headers,
580
+ ...options.headers
581
+ },
582
+ body: options.body ? JSON.stringify(options.body) : void 0,
583
+ signal: composedSignal,
584
+ redirect: this.allowRedirects ? "follow" : "error"
585
+ });
586
+ if (this.allowRedirects && response.redirected) await validateUrlWithDns(response.url, securityOptions);
587
+ if (!response.ok) throw await createHttpError(response, this.constructor.name, this.errorMapper);
588
+ return {
589
+ data: await response.json(),
590
+ status: response.status,
591
+ headers: response.headers
592
+ };
593
+ };
594
+ const executeWithConcurrency = async () => {
595
+ if (!this.semaphore) throw new ScrapeError("Semaphore not initialized", "VALIDATION_ERROR");
596
+ return this.semaphore.execute(async () => {
597
+ const fetchWithTimeout = async () => {
598
+ return withTimeout((signal) => doFetch(signal), this.timeoutMs);
599
+ };
600
+ try {
601
+ let result;
602
+ if (this.retryConfig) result = (await withRetry(fetchWithTimeout, this.retryConfig)).result;
603
+ else result = await fetchWithTimeout();
604
+ this.circuitBreaker?.recordSuccess();
605
+ return result;
606
+ } catch (error) {
607
+ this.circuitBreaker?.recordFailure();
608
+ throw error;
609
+ }
610
+ });
611
+ };
612
+ return executeWithConcurrency();
613
+ }
614
+ };
615
+
616
+ //#endregion
617
+ export { Semaphore as a, withResilience as c, ScrapeError as d, RateLimiter as i, withRetry as l, CircuitBreaker as n, createTimeoutSignal as o, CircuitOpenError as r, isRetryableError as s, BaseHttpProvider as t, withTimeout as u };
618
+ //# sourceMappingURL=http-base-DM7YNo6X.mjs.map