recker 1.0.26 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/browser/cache.d.ts +40 -0
- package/dist/browser/browser/cache.js +199 -0
- package/dist/browser/browser/crypto.d.ts +24 -0
- package/dist/browser/browser/crypto.js +80 -0
- package/dist/browser/browser/index.d.ts +31 -0
- package/dist/browser/browser/index.js +31 -0
- package/dist/browser/browser/recker.d.ts +26 -0
- package/dist/browser/browser/recker.js +61 -0
- package/dist/browser/cache/basic-file-storage.d.ts +12 -0
- package/dist/browser/cache/basic-file-storage.js +50 -0
- package/dist/browser/cache/memory-limits.d.ts +20 -0
- package/dist/browser/cache/memory-limits.js +96 -0
- package/dist/browser/cache/memory-storage.d.ts +132 -0
- package/dist/browser/cache/memory-storage.js +454 -0
- package/dist/browser/cache.d.ts +40 -0
- package/dist/browser/cache.js +199 -0
- package/dist/browser/constants/http-status.d.ts +73 -0
- package/dist/browser/constants/http-status.js +156 -0
- package/dist/browser/cookies/memory-cookie-jar.d.ts +30 -0
- package/dist/browser/cookies/memory-cookie-jar.js +210 -0
- package/dist/browser/core/client.d.ts +118 -0
- package/dist/browser/core/client.js +667 -0
- package/dist/browser/core/errors.d.ts +142 -0
- package/dist/browser/core/errors.js +308 -0
- package/dist/browser/core/index.d.ts +5 -0
- package/dist/browser/core/index.js +5 -0
- package/dist/browser/core/request-promise.d.ts +23 -0
- package/dist/browser/core/request-promise.js +82 -0
- package/dist/browser/core/request.d.ts +20 -0
- package/dist/browser/core/request.js +76 -0
- package/dist/browser/core/response.d.ts +34 -0
- package/dist/browser/core/response.js +178 -0
- package/dist/browser/crypto.d.ts +24 -0
- package/dist/browser/crypto.js +80 -0
- package/dist/browser/index.d.ts +31 -0
- package/dist/browser/index.js +31 -0
- package/dist/browser/plugins/auth/api-key.d.ts +8 -0
- package/dist/browser/plugins/auth/api-key.js +27 -0
- package/dist/browser/plugins/auth/auth0.d.ts +33 -0
- package/dist/browser/plugins/auth/auth0.js +94 -0
- package/dist/browser/plugins/auth/aws-sigv4.d.ts +10 -0
- package/dist/browser/plugins/auth/aws-sigv4.js +88 -0
- package/dist/browser/plugins/auth/azure-ad.d.ts +48 -0
- package/dist/browser/plugins/auth/azure-ad.js +152 -0
- package/dist/browser/plugins/auth/basic.d.ts +7 -0
- package/dist/browser/plugins/auth/basic.js +13 -0
- package/dist/browser/plugins/auth/bearer.d.ts +8 -0
- package/dist/browser/plugins/auth/bearer.js +17 -0
- package/dist/browser/plugins/auth/cognito.d.ts +45 -0
- package/dist/browser/plugins/auth/cognito.js +208 -0
- package/dist/browser/plugins/auth/digest.d.ts +8 -0
- package/dist/browser/plugins/auth/digest.js +100 -0
- package/dist/browser/plugins/auth/firebase.d.ts +32 -0
- package/dist/browser/plugins/auth/firebase.js +195 -0
- package/dist/browser/plugins/auth/github-app.d.ts +36 -0
- package/dist/browser/plugins/auth/github-app.js +170 -0
- package/dist/browser/plugins/auth/google-service-account.d.ts +49 -0
- package/dist/browser/plugins/auth/google-service-account.js +172 -0
- package/dist/browser/plugins/auth/index.d.ts +15 -0
- package/dist/browser/plugins/auth/index.js +15 -0
- package/dist/browser/plugins/auth/mtls.d.ts +37 -0
- package/dist/browser/plugins/auth/mtls.js +140 -0
- package/dist/browser/plugins/auth/oauth2.d.ts +8 -0
- package/dist/browser/plugins/auth/oauth2.js +26 -0
- package/dist/browser/plugins/auth/oidc.d.ts +55 -0
- package/dist/browser/plugins/auth/oidc.js +222 -0
- package/dist/browser/plugins/auth/okta.d.ts +47 -0
- package/dist/browser/plugins/auth/okta.js +157 -0
- package/dist/browser/plugins/auth.d.ts +1 -0
- package/dist/browser/plugins/auth.js +1 -0
- package/dist/browser/plugins/cache.d.ts +15 -0
- package/dist/browser/plugins/cache.js +486 -0
- package/dist/browser/plugins/circuit-breaker.d.ts +13 -0
- package/dist/browser/plugins/circuit-breaker.js +100 -0
- package/dist/browser/plugins/compression.d.ts +4 -0
- package/dist/browser/plugins/compression.js +130 -0
- package/dist/browser/plugins/cookie-jar.d.ts +5 -0
- package/dist/browser/plugins/cookie-jar.js +72 -0
- package/dist/browser/plugins/dedup.d.ts +5 -0
- package/dist/browser/plugins/dedup.js +35 -0
- package/dist/browser/plugins/graphql.d.ts +13 -0
- package/dist/browser/plugins/graphql.js +58 -0
- package/dist/browser/plugins/grpc-web.d.ts +79 -0
- package/dist/browser/plugins/grpc-web.js +261 -0
- package/dist/browser/plugins/hls.d.ts +105 -0
- package/dist/browser/plugins/hls.js +395 -0
- package/dist/browser/plugins/jsonrpc.d.ts +75 -0
- package/dist/browser/plugins/jsonrpc.js +143 -0
- package/dist/browser/plugins/logger.d.ts +13 -0
- package/dist/browser/plugins/logger.js +108 -0
- package/dist/browser/plugins/odata.d.ts +181 -0
- package/dist/browser/plugins/odata.js +564 -0
- package/dist/browser/plugins/pagination.d.ts +16 -0
- package/dist/browser/plugins/pagination.js +105 -0
- package/dist/browser/plugins/rate-limit.d.ts +15 -0
- package/dist/browser/plugins/rate-limit.js +162 -0
- package/dist/browser/plugins/retry.d.ts +14 -0
- package/dist/browser/plugins/retry.js +116 -0
- package/dist/browser/plugins/scrape.d.ts +21 -0
- package/dist/browser/plugins/scrape.js +82 -0
- package/dist/browser/plugins/server-timing.d.ts +7 -0
- package/dist/browser/plugins/server-timing.js +24 -0
- package/dist/browser/plugins/soap.d.ts +72 -0
- package/dist/browser/plugins/soap.js +347 -0
- package/dist/browser/plugins/xml.d.ts +9 -0
- package/dist/browser/plugins/xml.js +194 -0
- package/dist/browser/plugins/xsrf.d.ts +9 -0
- package/dist/browser/plugins/xsrf.js +48 -0
- package/dist/browser/recker.d.ts +26 -0
- package/dist/browser/recker.js +61 -0
- package/dist/browser/runner/request-runner.d.ts +46 -0
- package/dist/browser/runner/request-runner.js +89 -0
- package/dist/browser/scrape/document.d.ts +44 -0
- package/dist/browser/scrape/document.js +210 -0
- package/dist/browser/scrape/element.d.ts +49 -0
- package/dist/browser/scrape/element.js +176 -0
- package/dist/browser/scrape/extractors.d.ts +16 -0
- package/dist/browser/scrape/extractors.js +356 -0
- package/dist/browser/scrape/types.d.ts +107 -0
- package/dist/browser/scrape/types.js +1 -0
- package/dist/browser/transport/fetch.d.ts +11 -0
- package/dist/browser/transport/fetch.js +143 -0
- package/dist/browser/transport/undici.d.ts +38 -0
- package/dist/browser/transport/undici.js +897 -0
- package/dist/browser/types/ai.d.ts +267 -0
- package/dist/browser/types/ai.js +1 -0
- package/dist/browser/types/index.d.ts +351 -0
- package/dist/browser/types/index.js +1 -0
- package/dist/browser/types/logger.d.ts +16 -0
- package/dist/browser/types/logger.js +66 -0
- package/dist/browser/types/udp.d.ts +138 -0
- package/dist/browser/types/udp.js +1 -0
- package/dist/browser/utils/agent-manager.d.ts +29 -0
- package/dist/browser/utils/agent-manager.js +160 -0
- package/dist/browser/utils/body.d.ts +10 -0
- package/dist/browser/utils/body.js +148 -0
- package/dist/browser/utils/charset.d.ts +15 -0
- package/dist/browser/utils/charset.js +169 -0
- package/dist/browser/utils/concurrency.d.ts +20 -0
- package/dist/browser/utils/concurrency.js +120 -0
- package/dist/browser/utils/dns.d.ts +6 -0
- package/dist/browser/utils/dns.js +26 -0
- package/dist/browser/utils/header-parser.d.ts +94 -0
- package/dist/browser/utils/header-parser.js +617 -0
- package/dist/browser/utils/html-cleaner.d.ts +1 -0
- package/dist/browser/utils/html-cleaner.js +21 -0
- package/dist/browser/utils/link-header.d.ts +69 -0
- package/dist/browser/utils/link-header.js +190 -0
- package/dist/browser/utils/optional-require.d.ts +19 -0
- package/dist/browser/utils/optional-require.js +105 -0
- package/dist/browser/utils/progress.d.ts +8 -0
- package/dist/browser/utils/progress.js +82 -0
- package/dist/browser/utils/request-pool.d.ts +22 -0
- package/dist/browser/utils/request-pool.js +101 -0
- package/dist/browser/utils/sse.d.ts +7 -0
- package/dist/browser/utils/sse.js +67 -0
- package/dist/browser/utils/streaming.d.ts +17 -0
- package/dist/browser/utils/streaming.js +84 -0
- package/dist/browser/utils/try-fn.d.ts +3 -0
- package/dist/browser/utils/try-fn.js +59 -0
- package/dist/browser/utils/user-agent.d.ts +44 -0
- package/dist/browser/utils/user-agent.js +100 -0
- package/dist/browser/utils/whois.d.ts +32 -0
- package/dist/browser/utils/whois.js +246 -0
- package/dist/browser/websocket/client.d.ts +65 -0
- package/dist/browser/websocket/client.js +313 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +1 -0
- package/dist/transport/fetch.d.ts +7 -1
- package/dist/transport/fetch.js +58 -76
- package/package.json +34 -2
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
export class RateLimitExceededError extends Error {
|
|
2
|
+
limit;
|
|
3
|
+
window;
|
|
4
|
+
key;
|
|
5
|
+
constructor(limit, window, key) {
|
|
6
|
+
super(`Rate limit exceeded for ${key}: ${limit} requests per ${window}ms`);
|
|
7
|
+
this.limit = limit;
|
|
8
|
+
this.window = window;
|
|
9
|
+
this.key = key;
|
|
10
|
+
this.name = 'RateLimitExceededError';
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
function parseRateLimitHeaders(headers) {
|
|
14
|
+
const result = {};
|
|
15
|
+
const remaining = headers.get('x-ratelimit-remaining') || headers.get('ratelimit-remaining');
|
|
16
|
+
if (remaining !== null) {
|
|
17
|
+
result.remaining = parseInt(remaining, 10);
|
|
18
|
+
}
|
|
19
|
+
const reset = headers.get('x-ratelimit-reset') || headers.get('ratelimit-reset');
|
|
20
|
+
if (reset !== null) {
|
|
21
|
+
const val = parseFloat(reset);
|
|
22
|
+
if (val < 1000000000) {
|
|
23
|
+
result.reset = Date.now() + (val * 1000);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
result.reset = val * 1000;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
const retryAfter = headers.get('retry-after');
|
|
30
|
+
if (retryAfter !== null) {
|
|
31
|
+
if (/^\d+$/.test(retryAfter)) {
|
|
32
|
+
result.retryAfter = parseInt(retryAfter, 10) * 1000;
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
const date = Date.parse(retryAfter);
|
|
36
|
+
if (!isNaN(date)) {
|
|
37
|
+
result.retryAfter = Math.max(0, date - Date.now());
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
export function rateLimitPlugin(options) {
|
|
44
|
+
const limit = options.limit;
|
|
45
|
+
const windowMs = options.window || 1000;
|
|
46
|
+
const strategy = options.strategy || 'queue';
|
|
47
|
+
const adaptive = options.adaptive || false;
|
|
48
|
+
const buckets = new Map();
|
|
49
|
+
const getKey = options.keyGenerator || ((req) => {
|
|
50
|
+
try {
|
|
51
|
+
return new URL(req.url).hostname;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return 'global';
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
const refillBucket = (bucket) => {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
if (now < bucket.blockedUntil) {
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const elapsed = now - bucket.lastRefill;
|
|
63
|
+
if (elapsed > windowMs) {
|
|
64
|
+
bucket.tokens = limit;
|
|
65
|
+
bucket.lastRefill = now;
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
const processQueue = (bucket) => {
|
|
69
|
+
const now = Date.now();
|
|
70
|
+
if (now < bucket.blockedUntil) {
|
|
71
|
+
const wait = bucket.blockedUntil - now;
|
|
72
|
+
setTimeout(() => processQueue(bucket), wait);
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
refillBucket(bucket);
|
|
76
|
+
while (bucket.queue.length > 0 && bucket.tokens > 0) {
|
|
77
|
+
bucket.tokens--;
|
|
78
|
+
const next = bucket.queue.shift();
|
|
79
|
+
if (next)
|
|
80
|
+
next.resolve();
|
|
81
|
+
}
|
|
82
|
+
if (bucket.queue.length > 0) {
|
|
83
|
+
const timeToNextRefill = windowMs - (Date.now() - bucket.lastRefill);
|
|
84
|
+
setTimeout(() => processQueue(bucket), Math.max(0, timeToNextRefill));
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
return (client) => {
|
|
88
|
+
const middleware = async (req, next) => {
|
|
89
|
+
const key = getKey(req);
|
|
90
|
+
let bucket = buckets.get(key);
|
|
91
|
+
if (!bucket) {
|
|
92
|
+
bucket = {
|
|
93
|
+
tokens: limit,
|
|
94
|
+
lastRefill: Date.now(),
|
|
95
|
+
queue: [],
|
|
96
|
+
blockedUntil: 0
|
|
97
|
+
};
|
|
98
|
+
buckets.set(key, bucket);
|
|
99
|
+
}
|
|
100
|
+
const now = Date.now();
|
|
101
|
+
if (now < bucket.blockedUntil) {
|
|
102
|
+
bucket.tokens = 0;
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
refillBucket(bucket);
|
|
106
|
+
}
|
|
107
|
+
if (bucket.tokens > 0) {
|
|
108
|
+
bucket.tokens--;
|
|
109
|
+
const response = await next(req);
|
|
110
|
+
if (adaptive) {
|
|
111
|
+
adaptLimits(bucket, response);
|
|
112
|
+
}
|
|
113
|
+
return response;
|
|
114
|
+
}
|
|
115
|
+
if (strategy === 'throw') {
|
|
116
|
+
throw new RateLimitExceededError(limit, windowMs, key);
|
|
117
|
+
}
|
|
118
|
+
if (strategy === 'drop') {
|
|
119
|
+
throw new Error(`Request dropped due to rate limit (${key})`);
|
|
120
|
+
}
|
|
121
|
+
return new Promise((resolve, reject) => {
|
|
122
|
+
bucket.queue.push({
|
|
123
|
+
resolve: async () => {
|
|
124
|
+
try {
|
|
125
|
+
const response = await next(req);
|
|
126
|
+
if (adaptive) {
|
|
127
|
+
adaptLimits(bucket, response);
|
|
128
|
+
}
|
|
129
|
+
resolve(response);
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
reject(err);
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
reject
|
|
136
|
+
});
|
|
137
|
+
if (bucket.queue.length === 1) {
|
|
138
|
+
const timeToNextRefill = windowMs - (Date.now() - bucket.lastRefill);
|
|
139
|
+
setTimeout(() => processQueue(bucket), Math.max(0, timeToNextRefill));
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
};
|
|
143
|
+
function adaptLimits(bucket, response) {
|
|
144
|
+
const limits = parseRateLimitHeaders(response.headers);
|
|
145
|
+
let backoffTime = 0;
|
|
146
|
+
if (limits.retryAfter) {
|
|
147
|
+
backoffTime = limits.retryAfter;
|
|
148
|
+
}
|
|
149
|
+
else if (limits.remaining !== undefined && limits.remaining <= 0 && limits.reset) {
|
|
150
|
+
backoffTime = Math.max(0, limits.reset - Date.now());
|
|
151
|
+
}
|
|
152
|
+
if (backoffTime > 0) {
|
|
153
|
+
bucket.blockedUntil = Date.now() + backoffTime;
|
|
154
|
+
bucket.tokens = 0;
|
|
155
|
+
if (bucket.queue.length > 0) {
|
|
156
|
+
setTimeout(() => processQueue(bucket), 0);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
client.use(middleware);
|
|
161
|
+
};
|
|
162
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Plugin } from '../types/index.js';
|
|
2
|
+
export type BackoffStrategy = 'linear' | 'exponential' | 'decorrelated';
|
|
3
|
+
export interface RetryOptions {
|
|
4
|
+
maxAttempts?: number;
|
|
5
|
+
delay?: number;
|
|
6
|
+
maxDelay?: number;
|
|
7
|
+
backoff?: BackoffStrategy;
|
|
8
|
+
jitter?: boolean;
|
|
9
|
+
statusCodes?: number[];
|
|
10
|
+
shouldRetry?: (error: unknown) => boolean;
|
|
11
|
+
onRetry?: (attempt: number, error: unknown, delay: number) => void;
|
|
12
|
+
respectRetryAfter?: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare function retryPlugin(options?: RetryOptions): Plugin;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { HttpError, NetworkError, TimeoutError } from '../core/errors.js';
|
|
2
|
+
function calculateDelay(attempt, baseDelay, maxDelay, strategy, useJitter) {
|
|
3
|
+
let calculatedDelay;
|
|
4
|
+
switch (strategy) {
|
|
5
|
+
case 'linear':
|
|
6
|
+
calculatedDelay = baseDelay * attempt;
|
|
7
|
+
break;
|
|
8
|
+
case 'exponential':
|
|
9
|
+
calculatedDelay = Math.pow(2, attempt - 1) * baseDelay;
|
|
10
|
+
break;
|
|
11
|
+
case 'decorrelated':
|
|
12
|
+
const prevDelay = attempt === 1 ? baseDelay : Math.pow(2, attempt - 2) * baseDelay;
|
|
13
|
+
calculatedDelay = Math.random() * (prevDelay * 3 - baseDelay) + baseDelay;
|
|
14
|
+
break;
|
|
15
|
+
default:
|
|
16
|
+
calculatedDelay = baseDelay * attempt;
|
|
17
|
+
}
|
|
18
|
+
calculatedDelay = Math.min(calculatedDelay, maxDelay);
|
|
19
|
+
if (useJitter) {
|
|
20
|
+
const jitterRange = calculatedDelay * 0.25;
|
|
21
|
+
const jitterAmount = (Math.random() * jitterRange * 2) - jitterRange;
|
|
22
|
+
calculatedDelay += jitterAmount;
|
|
23
|
+
}
|
|
24
|
+
return Math.max(0, Math.floor(calculatedDelay));
|
|
25
|
+
}
|
|
26
|
+
function parseRetryAfter(headerValue) {
|
|
27
|
+
if (!headerValue)
|
|
28
|
+
return undefined;
|
|
29
|
+
const seconds = parseInt(headerValue, 10);
|
|
30
|
+
if (!isNaN(seconds) && seconds >= 0) {
|
|
31
|
+
return seconds * 1000;
|
|
32
|
+
}
|
|
33
|
+
const date = Date.parse(headerValue);
|
|
34
|
+
if (!isNaN(date)) {
|
|
35
|
+
const delay = date - Date.now();
|
|
36
|
+
return delay > 0 ? delay : undefined;
|
|
37
|
+
}
|
|
38
|
+
return undefined;
|
|
39
|
+
}
|
|
40
|
+
export function retryPlugin(options = {}) {
|
|
41
|
+
const maxAttempts = options.maxAttempts || 3;
|
|
42
|
+
const baseDelay = options.delay || 1000;
|
|
43
|
+
const maxDelay = options.maxDelay || 30000;
|
|
44
|
+
const backoffStrategy = options.backoff || 'exponential';
|
|
45
|
+
const useJitter = options.jitter !== false;
|
|
46
|
+
const statusCodes = options.statusCodes || [408, 429, 500, 502, 503, 504];
|
|
47
|
+
const onRetry = options.onRetry;
|
|
48
|
+
const respectRetryAfter = options.respectRetryAfter !== false;
|
|
49
|
+
const defaultShouldRetry = (error) => {
|
|
50
|
+
if (error instanceof NetworkError)
|
|
51
|
+
return true;
|
|
52
|
+
if (error instanceof TimeoutError)
|
|
53
|
+
return true;
|
|
54
|
+
if (error instanceof HttpError) {
|
|
55
|
+
return statusCodes.includes(error.status);
|
|
56
|
+
}
|
|
57
|
+
if (error && typeof error === 'object' && 'code' in error) {
|
|
58
|
+
const code = error.code;
|
|
59
|
+
return code === 'ECONNRESET' || code === 'ETIMEDOUT' || code === 'ENOTFOUND';
|
|
60
|
+
}
|
|
61
|
+
return false;
|
|
62
|
+
};
|
|
63
|
+
const shouldRetry = options.shouldRetry || defaultShouldRetry;
|
|
64
|
+
return (client) => {
|
|
65
|
+
const middleware = async (req, next) => {
|
|
66
|
+
let attempt = 0;
|
|
67
|
+
while (true) {
|
|
68
|
+
try {
|
|
69
|
+
attempt++;
|
|
70
|
+
const res = await next(req);
|
|
71
|
+
if (attempt < maxAttempts && !res.ok && statusCodes.includes(res.status)) {
|
|
72
|
+
let delayMs;
|
|
73
|
+
if (respectRetryAfter) {
|
|
74
|
+
const retryAfterDelay = parseRetryAfter(res.headers.get('Retry-After'));
|
|
75
|
+
delayMs = retryAfterDelay !== undefined
|
|
76
|
+
? Math.min(retryAfterDelay, maxDelay)
|
|
77
|
+
: calculateDelay(attempt, baseDelay, maxDelay, backoffStrategy, useJitter);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
delayMs = calculateDelay(attempt, baseDelay, maxDelay, backoffStrategy, useJitter);
|
|
81
|
+
}
|
|
82
|
+
const err = new HttpError(res, req);
|
|
83
|
+
if (onRetry) {
|
|
84
|
+
onRetry(attempt, err, delayMs);
|
|
85
|
+
}
|
|
86
|
+
if (client.hooks?.onRetry) {
|
|
87
|
+
for (const hook of client.hooks.onRetry) {
|
|
88
|
+
await hook(err, attempt, delayMs, req);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
return res;
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
if (attempt < maxAttempts && shouldRetry(error)) {
|
|
98
|
+
const delayMs = calculateDelay(attempt, baseDelay, maxDelay, backoffStrategy, useJitter);
|
|
99
|
+
if (onRetry) {
|
|
100
|
+
onRetry(attempt, error, delayMs);
|
|
101
|
+
}
|
|
102
|
+
if (client.hooks?.onRetry) {
|
|
103
|
+
for (const hook of client.hooks.onRetry) {
|
|
104
|
+
await hook(error, attempt, delayMs, req);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
throw error;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
client.use(middleware);
|
|
115
|
+
};
|
|
116
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { ReckerResponse } from '../types/index.js';
|
|
2
|
+
import type { RequestPromise } from '../core/request-promise.js';
|
|
3
|
+
import type { ScrapeOptions, ExtractionSchema, ExtractedLink, ExtractedImage, ExtractedMeta, OpenGraphData, TwitterCardData, JsonLdData, ExtractedForm, ExtractedTable, ExtractedScript, ExtractedStyle, LinkExtractionOptions, ImageExtractionOptions } from '../scrape/types.js';
|
|
4
|
+
export interface ScrapePromise<T> extends Promise<T> {
|
|
5
|
+
scrape(options?: ScrapeOptions): Promise<import('../scrape/document.js').ScrapeDocument>;
|
|
6
|
+
links(options?: LinkExtractionOptions): Promise<ExtractedLink[]>;
|
|
7
|
+
images(options?: ImageExtractionOptions): Promise<ExtractedImage[]>;
|
|
8
|
+
meta(): Promise<ExtractedMeta>;
|
|
9
|
+
openGraph(): Promise<OpenGraphData>;
|
|
10
|
+
twitterCard(): Promise<TwitterCardData>;
|
|
11
|
+
jsonLd(): Promise<JsonLdData[]>;
|
|
12
|
+
forms(selector?: string): Promise<ExtractedForm[]>;
|
|
13
|
+
tables(selector?: string): Promise<ExtractedTable[]>;
|
|
14
|
+
scripts(): Promise<ExtractedScript[]>;
|
|
15
|
+
styles(): Promise<ExtractedStyle[]>;
|
|
16
|
+
extract<R extends Record<string, unknown>>(schema: ExtractionSchema): Promise<R>;
|
|
17
|
+
}
|
|
18
|
+
export declare function scrape<T extends ReckerResponse>(promise: RequestPromise<T> | Promise<T>): ScrapePromise<T>;
|
|
19
|
+
export declare function parseHtml(html: string, options?: ScrapeOptions): Promise<import('../scrape/document.js').ScrapeDocument>;
|
|
20
|
+
export declare function scrapeResponse(promise: Promise<ReckerResponse>, options?: ScrapeOptions): Promise<import('../scrape/document.js').ScrapeDocument>;
|
|
21
|
+
export type { ScrapeOptions, ExtractionSchema, ExtractedLink, ExtractedImage, ExtractedMeta, OpenGraphData, TwitterCardData, JsonLdData, ExtractedForm, ExtractedFormField, ExtractedTable, ExtractedScript, ExtractedStyle, LinkExtractionOptions, ImageExtractionOptions, } from '../scrape/types.js';
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
let ScrapeDocumentClass = null;
|
|
2
|
+
async function getScrapeDocumentClass() {
|
|
3
|
+
if (!ScrapeDocumentClass) {
|
|
4
|
+
const module = await import('../scrape/document.js');
|
|
5
|
+
ScrapeDocumentClass = module.ScrapeDocument;
|
|
6
|
+
}
|
|
7
|
+
return ScrapeDocumentClass;
|
|
8
|
+
}
|
|
9
|
+
export function scrape(promise) {
|
|
10
|
+
const basePromise = Promise.resolve(promise);
|
|
11
|
+
const getDocument = async (options) => {
|
|
12
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
13
|
+
const response = await basePromise;
|
|
14
|
+
const html = await response.text();
|
|
15
|
+
return ScrapeDoc.create(html, {
|
|
16
|
+
baseUrl: options?.baseUrl || response.url,
|
|
17
|
+
...options,
|
|
18
|
+
});
|
|
19
|
+
};
|
|
20
|
+
const enhanced = basePromise;
|
|
21
|
+
enhanced.scrape = async (options) => {
|
|
22
|
+
return getDocument(options);
|
|
23
|
+
};
|
|
24
|
+
enhanced.links = async (options) => {
|
|
25
|
+
const doc = await getDocument();
|
|
26
|
+
return doc.links(options);
|
|
27
|
+
};
|
|
28
|
+
enhanced.images = async (options) => {
|
|
29
|
+
const doc = await getDocument();
|
|
30
|
+
return doc.images(options);
|
|
31
|
+
};
|
|
32
|
+
enhanced.meta = async () => {
|
|
33
|
+
const doc = await getDocument();
|
|
34
|
+
return doc.meta();
|
|
35
|
+
};
|
|
36
|
+
enhanced.openGraph = async () => {
|
|
37
|
+
const doc = await getDocument();
|
|
38
|
+
return doc.openGraph();
|
|
39
|
+
};
|
|
40
|
+
enhanced.twitterCard = async () => {
|
|
41
|
+
const doc = await getDocument();
|
|
42
|
+
return doc.twitterCard();
|
|
43
|
+
};
|
|
44
|
+
enhanced.jsonLd = async () => {
|
|
45
|
+
const doc = await getDocument();
|
|
46
|
+
return doc.jsonLd();
|
|
47
|
+
};
|
|
48
|
+
enhanced.forms = async (selector) => {
|
|
49
|
+
const doc = await getDocument();
|
|
50
|
+
return doc.forms(selector);
|
|
51
|
+
};
|
|
52
|
+
enhanced.tables = async (selector) => {
|
|
53
|
+
const doc = await getDocument();
|
|
54
|
+
return doc.tables(selector);
|
|
55
|
+
};
|
|
56
|
+
enhanced.scripts = async () => {
|
|
57
|
+
const doc = await getDocument();
|
|
58
|
+
return doc.scripts();
|
|
59
|
+
};
|
|
60
|
+
enhanced.styles = async () => {
|
|
61
|
+
const doc = await getDocument();
|
|
62
|
+
return doc.styles();
|
|
63
|
+
};
|
|
64
|
+
enhanced.extract = async (schema) => {
|
|
65
|
+
const doc = await getDocument();
|
|
66
|
+
return doc.extract(schema);
|
|
67
|
+
};
|
|
68
|
+
return enhanced;
|
|
69
|
+
}
|
|
70
|
+
export async function parseHtml(html, options) {
|
|
71
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
72
|
+
return ScrapeDoc.create(html, options);
|
|
73
|
+
}
|
|
74
|
+
export async function scrapeResponse(promise, options) {
|
|
75
|
+
const ScrapeDoc = await getScrapeDocumentClass();
|
|
76
|
+
const response = await promise;
|
|
77
|
+
const html = await response.text();
|
|
78
|
+
return ScrapeDoc.create(html, {
|
|
79
|
+
baseUrl: options?.baseUrl || response.url,
|
|
80
|
+
...options,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export function serverTimingPlugin() {
|
|
2
|
+
return (client) => {
|
|
3
|
+
client.afterResponse((req, res) => {
|
|
4
|
+
const header = res.headers.get('server-timing');
|
|
5
|
+
if (!header)
|
|
6
|
+
return;
|
|
7
|
+
const timings = header.split(',').map(entry => {
|
|
8
|
+
const parts = entry.split(';');
|
|
9
|
+
const name = parts[0].trim();
|
|
10
|
+
let duration;
|
|
11
|
+
let description;
|
|
12
|
+
for (let i = 1; i < parts.length; i++) {
|
|
13
|
+
const [key, val] = parts[i].split('=').map(s => s.trim());
|
|
14
|
+
if (key === 'dur')
|
|
15
|
+
duration = parseFloat(val);
|
|
16
|
+
if (key === 'desc')
|
|
17
|
+
description = val?.replace(/"/g, '');
|
|
18
|
+
}
|
|
19
|
+
return { name, duration, description };
|
|
20
|
+
});
|
|
21
|
+
res.serverTimings = timings;
|
|
22
|
+
});
|
|
23
|
+
};
|
|
24
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import type { Client } from '../core/client.js';
|
|
2
|
+
import type { RequestOptions } from '../types/index.js';
|
|
3
|
+
export interface XmlRpcValue {
|
|
4
|
+
type: 'int' | 'i4' | 'i8' | 'boolean' | 'string' | 'double' | 'dateTime.iso8601' | 'base64' | 'array' | 'struct' | 'nil';
|
|
5
|
+
value: unknown;
|
|
6
|
+
}
|
|
7
|
+
export interface XmlRpcResponse<T = unknown> {
|
|
8
|
+
success: boolean;
|
|
9
|
+
result?: T;
|
|
10
|
+
fault?: {
|
|
11
|
+
faultCode: number;
|
|
12
|
+
faultString: string;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export declare class XmlRpcClient {
|
|
16
|
+
private client;
|
|
17
|
+
private endpoint;
|
|
18
|
+
private requestOptions;
|
|
19
|
+
constructor(client: Client, options: {
|
|
20
|
+
endpoint: string;
|
|
21
|
+
requestOptions?: RequestOptions;
|
|
22
|
+
});
|
|
23
|
+
call<T = unknown>(method: string, params?: unknown[]): Promise<XmlRpcResponse<T>>;
|
|
24
|
+
private parseResponse;
|
|
25
|
+
}
|
|
26
|
+
export type SoapVersion = '1.1' | '1.2';
|
|
27
|
+
export interface SoapOptions {
|
|
28
|
+
endpoint: string;
|
|
29
|
+
version?: SoapVersion;
|
|
30
|
+
namespace?: string;
|
|
31
|
+
namespacePrefix?: string;
|
|
32
|
+
wsdl?: string;
|
|
33
|
+
soapHeaders?: Record<string, unknown>;
|
|
34
|
+
requestOptions?: RequestOptions;
|
|
35
|
+
}
|
|
36
|
+
export interface SoapFault {
|
|
37
|
+
code: string;
|
|
38
|
+
string: string;
|
|
39
|
+
actor?: string;
|
|
40
|
+
detail?: unknown;
|
|
41
|
+
}
|
|
42
|
+
export interface SoapResponse<T = unknown> {
|
|
43
|
+
success: boolean;
|
|
44
|
+
result?: T;
|
|
45
|
+
fault?: SoapFault;
|
|
46
|
+
rawXml: string;
|
|
47
|
+
}
|
|
48
|
+
export declare class SoapClient {
|
|
49
|
+
private client;
|
|
50
|
+
private options;
|
|
51
|
+
constructor(client: Client, options: SoapOptions);
|
|
52
|
+
call<T = unknown>(method: string, params?: Record<string, unknown>, options?: {
|
|
53
|
+
soapHeaders?: Record<string, unknown>;
|
|
54
|
+
soapAction?: string;
|
|
55
|
+
}): Promise<SoapResponse<T>>;
|
|
56
|
+
getWsdl(): Promise<string | null>;
|
|
57
|
+
private objectToXml;
|
|
58
|
+
private parseResponse;
|
|
59
|
+
private parseXmlToObject;
|
|
60
|
+
}
|
|
61
|
+
export declare function createXmlRpcClient(client: Client, options: {
|
|
62
|
+
endpoint: string;
|
|
63
|
+
requestOptions?: RequestOptions;
|
|
64
|
+
}): XmlRpcClient;
|
|
65
|
+
export declare function createSoapClient(client: Client, options: SoapOptions): SoapClient;
|
|
66
|
+
export declare function soap(): (client: Client) => void;
|
|
67
|
+
declare module '../core/client.js' {
|
|
68
|
+
interface Client {
|
|
69
|
+
soap(options: SoapOptions): SoapClient;
|
|
70
|
+
xmlrpc(endpoint: string, requestOptions?: RequestOptions): XmlRpcClient;
|
|
71
|
+
}
|
|
72
|
+
}
|