recker 1.0.26 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dist/browser/browser/cache.d.ts +40 -0
  2. package/dist/browser/browser/cache.js +199 -0
  3. package/dist/browser/browser/crypto.d.ts +24 -0
  4. package/dist/browser/browser/crypto.js +80 -0
  5. package/dist/browser/browser/index.d.ts +31 -0
  6. package/dist/browser/browser/index.js +31 -0
  7. package/dist/browser/browser/recker.d.ts +26 -0
  8. package/dist/browser/browser/recker.js +61 -0
  9. package/dist/browser/cache/basic-file-storage.d.ts +12 -0
  10. package/dist/browser/cache/basic-file-storage.js +50 -0
  11. package/dist/browser/cache/memory-limits.d.ts +20 -0
  12. package/dist/browser/cache/memory-limits.js +96 -0
  13. package/dist/browser/cache/memory-storage.d.ts +132 -0
  14. package/dist/browser/cache/memory-storage.js +454 -0
  15. package/dist/browser/cache.d.ts +40 -0
  16. package/dist/browser/cache.js +199 -0
  17. package/dist/browser/constants/http-status.d.ts +73 -0
  18. package/dist/browser/constants/http-status.js +156 -0
  19. package/dist/browser/cookies/memory-cookie-jar.d.ts +30 -0
  20. package/dist/browser/cookies/memory-cookie-jar.js +210 -0
  21. package/dist/browser/core/client.d.ts +118 -0
  22. package/dist/browser/core/client.js +667 -0
  23. package/dist/browser/core/errors.d.ts +142 -0
  24. package/dist/browser/core/errors.js +308 -0
  25. package/dist/browser/core/index.d.ts +5 -0
  26. package/dist/browser/core/index.js +5 -0
  27. package/dist/browser/core/request-promise.d.ts +23 -0
  28. package/dist/browser/core/request-promise.js +82 -0
  29. package/dist/browser/core/request.d.ts +20 -0
  30. package/dist/browser/core/request.js +76 -0
  31. package/dist/browser/core/response.d.ts +34 -0
  32. package/dist/browser/core/response.js +178 -0
  33. package/dist/browser/crypto.d.ts +24 -0
  34. package/dist/browser/crypto.js +80 -0
  35. package/dist/browser/index.d.ts +31 -0
  36. package/dist/browser/index.js +31 -0
  37. package/dist/browser/plugins/auth/api-key.d.ts +8 -0
  38. package/dist/browser/plugins/auth/api-key.js +27 -0
  39. package/dist/browser/plugins/auth/auth0.d.ts +33 -0
  40. package/dist/browser/plugins/auth/auth0.js +94 -0
  41. package/dist/browser/plugins/auth/aws-sigv4.d.ts +10 -0
  42. package/dist/browser/plugins/auth/aws-sigv4.js +88 -0
  43. package/dist/browser/plugins/auth/azure-ad.d.ts +48 -0
  44. package/dist/browser/plugins/auth/azure-ad.js +152 -0
  45. package/dist/browser/plugins/auth/basic.d.ts +7 -0
  46. package/dist/browser/plugins/auth/basic.js +13 -0
  47. package/dist/browser/plugins/auth/bearer.d.ts +8 -0
  48. package/dist/browser/plugins/auth/bearer.js +17 -0
  49. package/dist/browser/plugins/auth/cognito.d.ts +45 -0
  50. package/dist/browser/plugins/auth/cognito.js +208 -0
  51. package/dist/browser/plugins/auth/digest.d.ts +8 -0
  52. package/dist/browser/plugins/auth/digest.js +100 -0
  53. package/dist/browser/plugins/auth/firebase.d.ts +32 -0
  54. package/dist/browser/plugins/auth/firebase.js +195 -0
  55. package/dist/browser/plugins/auth/github-app.d.ts +36 -0
  56. package/dist/browser/plugins/auth/github-app.js +170 -0
  57. package/dist/browser/plugins/auth/google-service-account.d.ts +49 -0
  58. package/dist/browser/plugins/auth/google-service-account.js +172 -0
  59. package/dist/browser/plugins/auth/index.d.ts +15 -0
  60. package/dist/browser/plugins/auth/index.js +15 -0
  61. package/dist/browser/plugins/auth/mtls.d.ts +37 -0
  62. package/dist/browser/plugins/auth/mtls.js +140 -0
  63. package/dist/browser/plugins/auth/oauth2.d.ts +8 -0
  64. package/dist/browser/plugins/auth/oauth2.js +26 -0
  65. package/dist/browser/plugins/auth/oidc.d.ts +55 -0
  66. package/dist/browser/plugins/auth/oidc.js +222 -0
  67. package/dist/browser/plugins/auth/okta.d.ts +47 -0
  68. package/dist/browser/plugins/auth/okta.js +157 -0
  69. package/dist/browser/plugins/auth.d.ts +1 -0
  70. package/dist/browser/plugins/auth.js +1 -0
  71. package/dist/browser/plugins/cache.d.ts +15 -0
  72. package/dist/browser/plugins/cache.js +486 -0
  73. package/dist/browser/plugins/circuit-breaker.d.ts +13 -0
  74. package/dist/browser/plugins/circuit-breaker.js +100 -0
  75. package/dist/browser/plugins/compression.d.ts +4 -0
  76. package/dist/browser/plugins/compression.js +130 -0
  77. package/dist/browser/plugins/cookie-jar.d.ts +5 -0
  78. package/dist/browser/plugins/cookie-jar.js +72 -0
  79. package/dist/browser/plugins/dedup.d.ts +5 -0
  80. package/dist/browser/plugins/dedup.js +35 -0
  81. package/dist/browser/plugins/graphql.d.ts +13 -0
  82. package/dist/browser/plugins/graphql.js +58 -0
  83. package/dist/browser/plugins/grpc-web.d.ts +79 -0
  84. package/dist/browser/plugins/grpc-web.js +261 -0
  85. package/dist/browser/plugins/hls.d.ts +105 -0
  86. package/dist/browser/plugins/hls.js +395 -0
  87. package/dist/browser/plugins/jsonrpc.d.ts +75 -0
  88. package/dist/browser/plugins/jsonrpc.js +143 -0
  89. package/dist/browser/plugins/logger.d.ts +13 -0
  90. package/dist/browser/plugins/logger.js +108 -0
  91. package/dist/browser/plugins/odata.d.ts +181 -0
  92. package/dist/browser/plugins/odata.js +564 -0
  93. package/dist/browser/plugins/pagination.d.ts +16 -0
  94. package/dist/browser/plugins/pagination.js +105 -0
  95. package/dist/browser/plugins/rate-limit.d.ts +15 -0
  96. package/dist/browser/plugins/rate-limit.js +162 -0
  97. package/dist/browser/plugins/retry.d.ts +14 -0
  98. package/dist/browser/plugins/retry.js +116 -0
  99. package/dist/browser/plugins/scrape.d.ts +21 -0
  100. package/dist/browser/plugins/scrape.js +82 -0
  101. package/dist/browser/plugins/server-timing.d.ts +7 -0
  102. package/dist/browser/plugins/server-timing.js +24 -0
  103. package/dist/browser/plugins/soap.d.ts +72 -0
  104. package/dist/browser/plugins/soap.js +347 -0
  105. package/dist/browser/plugins/xml.d.ts +9 -0
  106. package/dist/browser/plugins/xml.js +194 -0
  107. package/dist/browser/plugins/xsrf.d.ts +9 -0
  108. package/dist/browser/plugins/xsrf.js +48 -0
  109. package/dist/browser/recker.d.ts +26 -0
  110. package/dist/browser/recker.js +61 -0
  111. package/dist/browser/runner/request-runner.d.ts +46 -0
  112. package/dist/browser/runner/request-runner.js +89 -0
  113. package/dist/browser/scrape/document.d.ts +44 -0
  114. package/dist/browser/scrape/document.js +210 -0
  115. package/dist/browser/scrape/element.d.ts +49 -0
  116. package/dist/browser/scrape/element.js +176 -0
  117. package/dist/browser/scrape/extractors.d.ts +16 -0
  118. package/dist/browser/scrape/extractors.js +356 -0
  119. package/dist/browser/scrape/types.d.ts +107 -0
  120. package/dist/browser/scrape/types.js +1 -0
  121. package/dist/browser/transport/fetch.d.ts +11 -0
  122. package/dist/browser/transport/fetch.js +143 -0
  123. package/dist/browser/transport/undici.d.ts +38 -0
  124. package/dist/browser/transport/undici.js +897 -0
  125. package/dist/browser/types/ai.d.ts +267 -0
  126. package/dist/browser/types/ai.js +1 -0
  127. package/dist/browser/types/index.d.ts +351 -0
  128. package/dist/browser/types/index.js +1 -0
  129. package/dist/browser/types/logger.d.ts +16 -0
  130. package/dist/browser/types/logger.js +66 -0
  131. package/dist/browser/types/udp.d.ts +138 -0
  132. package/dist/browser/types/udp.js +1 -0
  133. package/dist/browser/utils/agent-manager.d.ts +29 -0
  134. package/dist/browser/utils/agent-manager.js +160 -0
  135. package/dist/browser/utils/body.d.ts +10 -0
  136. package/dist/browser/utils/body.js +148 -0
  137. package/dist/browser/utils/charset.d.ts +15 -0
  138. package/dist/browser/utils/charset.js +169 -0
  139. package/dist/browser/utils/concurrency.d.ts +20 -0
  140. package/dist/browser/utils/concurrency.js +120 -0
  141. package/dist/browser/utils/dns.d.ts +6 -0
  142. package/dist/browser/utils/dns.js +26 -0
  143. package/dist/browser/utils/header-parser.d.ts +94 -0
  144. package/dist/browser/utils/header-parser.js +617 -0
  145. package/dist/browser/utils/html-cleaner.d.ts +1 -0
  146. package/dist/browser/utils/html-cleaner.js +21 -0
  147. package/dist/browser/utils/link-header.d.ts +69 -0
  148. package/dist/browser/utils/link-header.js +190 -0
  149. package/dist/browser/utils/optional-require.d.ts +19 -0
  150. package/dist/browser/utils/optional-require.js +105 -0
  151. package/dist/browser/utils/progress.d.ts +8 -0
  152. package/dist/browser/utils/progress.js +82 -0
  153. package/dist/browser/utils/request-pool.d.ts +22 -0
  154. package/dist/browser/utils/request-pool.js +101 -0
  155. package/dist/browser/utils/sse.d.ts +7 -0
  156. package/dist/browser/utils/sse.js +67 -0
  157. package/dist/browser/utils/streaming.d.ts +17 -0
  158. package/dist/browser/utils/streaming.js +84 -0
  159. package/dist/browser/utils/try-fn.d.ts +3 -0
  160. package/dist/browser/utils/try-fn.js +59 -0
  161. package/dist/browser/utils/user-agent.d.ts +44 -0
  162. package/dist/browser/utils/user-agent.js +100 -0
  163. package/dist/browser/utils/whois.d.ts +32 -0
  164. package/dist/browser/utils/whois.js +246 -0
  165. package/dist/browser/websocket/client.d.ts +65 -0
  166. package/dist/browser/websocket/client.js +313 -0
  167. package/dist/cli/index.d.ts +1 -0
  168. package/dist/cli/index.js +1 -0
  169. package/dist/transport/fetch.d.ts +7 -1
  170. package/dist/transport/fetch.js +58 -76
  171. package/package.json +34 -2
@@ -0,0 +1,617 @@
1
+ export function parseCacheInfo(headers) {
2
+ const info = { hit: false };
3
+ const cfCacheStatus = headers.get('cf-cache-status');
4
+ if (cfCacheStatus) {
5
+ info.provider = 'cloudflare';
6
+ info.status = cfCacheStatus.toUpperCase();
7
+ info.hit = cfCacheStatus.toUpperCase() === 'HIT';
8
+ }
9
+ const xCache = headers.get('x-cache');
10
+ if (xCache) {
11
+ const upper = xCache.toUpperCase();
12
+ info.hit = upper.includes('HIT');
13
+ if (upper.includes('FASTLY'))
14
+ info.provider = 'fastly';
15
+ else if (upper.includes('VARNISH'))
16
+ info.provider = 'varnish';
17
+ if (upper.includes('HIT'))
18
+ info.status = 'HIT';
19
+ else if (upper.includes('MISS'))
20
+ info.status = 'MISS';
21
+ else if (upper.includes('EXPIRED'))
22
+ info.status = 'EXPIRED';
23
+ else if (upper.includes('STALE'))
24
+ info.status = 'STALE';
25
+ }
26
+ const akamaiCache = headers.get('x-akamai-cache-status');
27
+ if (akamaiCache) {
28
+ info.provider = 'akamai';
29
+ info.hit = akamaiCache.toUpperCase().includes('HIT');
30
+ }
31
+ const cloudFrontCache = headers.get('x-cache');
32
+ const cloudFrontId = headers.get('x-amz-cf-id');
33
+ if (cloudFrontId) {
34
+ info.provider = 'cloudfront';
35
+ }
36
+ const cacheControl = headers.get('cache-control');
37
+ if (cacheControl) {
38
+ const maxAgeMatch = cacheControl.match(/max-age=(\d+)/);
39
+ if (maxAgeMatch) {
40
+ info.maxAge = parseInt(maxAgeMatch[1], 10);
41
+ }
42
+ }
43
+ const age = headers.get('age');
44
+ if (age) {
45
+ info.age = parseInt(age, 10);
46
+ }
47
+ return info;
48
+ }
49
+ const PLATFORM_DETECTORS = [
50
+ {
51
+ name: 'cloudflare',
52
+ category: 'cdn',
53
+ headers: ['cf-ray', 'cf-cache-status'],
54
+ detect: (headers) => headers.get('cf-ray') !== null,
55
+ extract: (headers) => {
56
+ const ray = headers.get('cf-ray');
57
+ const metadata = {};
58
+ if (ray) {
59
+ metadata.ray = ray;
60
+ const parts = ray.split('-');
61
+ if (parts.length > 1) {
62
+ return { region: parts[1], metadata };
63
+ }
64
+ }
65
+ return { metadata };
66
+ }
67
+ },
68
+ {
69
+ name: 'fastly',
70
+ category: 'cdn',
71
+ headers: ['fastly-debug-digest', 'x-served-by'],
72
+ detect: (headers) => {
73
+ const servedBy = headers.get('x-served-by');
74
+ return servedBy?.includes('cache-') || headers.get('fastly-debug-digest') !== null;
75
+ },
76
+ extract: (headers) => ({
77
+ server: headers.get('x-served-by') || undefined,
78
+ metadata: {
79
+ digest: headers.get('fastly-debug-digest')
80
+ }
81
+ })
82
+ },
83
+ {
84
+ name: 'akamai',
85
+ category: 'cdn',
86
+ headers: ['x-akamai-request-id', 'x-akamai-cache-status'],
87
+ detect: (headers) => headers.get('x-akamai-request-id') !== null,
88
+ extract: (headers) => ({
89
+ metadata: {
90
+ requestId: headers.get('x-akamai-request-id'),
91
+ cacheId: headers.get('x-akamai-cache-status')
92
+ }
93
+ })
94
+ },
95
+ {
96
+ name: 'aws-cloudfront',
97
+ category: 'cloud',
98
+ headers: ['x-amz-cf-id', 'x-amz-cf-pop'],
99
+ detect: (headers) => headers.get('x-amz-cf-id') !== null,
100
+ extract: (headers) => ({
101
+ region: headers.get('x-amz-cf-pop') || undefined,
102
+ metadata: {
103
+ requestId: headers.get('x-amz-cf-id'),
104
+ pop: headers.get('x-amz-cf-pop')
105
+ }
106
+ })
107
+ },
108
+ {
109
+ name: 'gcp',
110
+ category: 'cloud',
111
+ headers: ['x-cloud-trace-context', 'x-goog-request-id'],
112
+ detect: (headers) => headers.get('x-cloud-trace-context') !== null || headers.get('x-goog-request-id') !== null,
113
+ extract: (headers) => {
114
+ const trace = headers.get('x-cloud-trace-context');
115
+ return {
116
+ metadata: {
117
+ traceId: trace ? trace.split('/')[0] : undefined,
118
+ requestId: headers.get('x-goog-request-id')
119
+ }
120
+ };
121
+ }
122
+ },
123
+ {
124
+ name: 'azure',
125
+ category: 'cloud',
126
+ headers: ['x-ms-request-id', 'x-azure-ref'],
127
+ detect: (headers) => headers.get('x-ms-request-id') !== null,
128
+ extract: (headers) => ({
129
+ metadata: {
130
+ requestId: headers.get('x-ms-request-id'),
131
+ ref: headers.get('x-azure-ref')
132
+ }
133
+ })
134
+ },
135
+ {
136
+ name: 'oracle-cloud',
137
+ category: 'cloud',
138
+ headers: ['x-oracle-dms-rid', 'x-oracle-dms-ecid'],
139
+ detect: (headers) => headers.get('x-oracle-dms-rid') !== null,
140
+ extract: (headers) => ({
141
+ metadata: {
142
+ requestId: headers.get('x-oracle-dms-rid'),
143
+ ecid: headers.get('x-oracle-dms-ecid')
144
+ }
145
+ })
146
+ },
147
+ {
148
+ name: 'alibaba-cloud',
149
+ category: 'cloud',
150
+ headers: ['ali-swift-global-savetime', 'eagleeye-traceid'],
151
+ detect: (headers) => headers.get('eagleeye-traceid') !== null,
152
+ extract: (headers) => ({
153
+ metadata: {
154
+ traceId: headers.get('eagleeye-traceid')
155
+ }
156
+ })
157
+ },
158
+ {
159
+ name: 'vercel',
160
+ category: 'hosting',
161
+ headers: ['x-vercel-id', 'x-vercel-cache'],
162
+ detect: (headers) => headers.get('x-vercel-id') !== null,
163
+ extract: (headers) => ({
164
+ metadata: {
165
+ requestId: headers.get('x-vercel-id'),
166
+ cache: headers.get('x-vercel-cache')
167
+ }
168
+ })
169
+ },
170
+ {
171
+ name: 'netlify',
172
+ category: 'hosting',
173
+ headers: ['x-nf-request-id', 'x-nf-trace-id'],
174
+ detect: (headers) => headers.get('x-nf-request-id') !== null,
175
+ extract: (headers) => ({
176
+ metadata: {
177
+ requestId: headers.get('x-nf-request-id'),
178
+ traceId: headers.get('x-nf-trace-id')
179
+ }
180
+ })
181
+ },
182
+ {
183
+ name: 'render',
184
+ category: 'hosting',
185
+ headers: ['x-render-origin-server'],
186
+ detect: (headers) => headers.get('x-render-origin-server') !== null,
187
+ extract: (headers) => ({
188
+ metadata: {
189
+ origin: headers.get('x-render-origin-server')
190
+ }
191
+ })
192
+ },
193
+ {
194
+ name: 'datadome',
195
+ category: 'security',
196
+ headers: ['x-datadome', 'x-datadome-cid'],
197
+ detect: (headers) => headers.get('x-datadome') !== null || headers.get('x-datadome-cid') !== null,
198
+ extract: (headers) => ({
199
+ metadata: { cid: headers.get('x-datadome-cid') }
200
+ })
201
+ },
202
+ {
203
+ name: 'incapsula',
204
+ category: 'security',
205
+ headers: ['x-iinfo', 'x-cdn'],
206
+ detect: (headers) => headers.get('x-iinfo') !== null || (headers.get('x-cdn') || '').includes('Incapsula'),
207
+ extract: (headers) => ({
208
+ metadata: { iinfo: headers.get('x-iinfo') }
209
+ })
210
+ },
211
+ {
212
+ name: 'imperva',
213
+ category: 'security',
214
+ headers: ['x-imperva-uuid'],
215
+ detect: (headers) => headers.get('x-imperva-uuid') !== null,
216
+ extract: (headers) => ({
217
+ metadata: { uuid: headers.get('x-imperva-uuid') }
218
+ })
219
+ },
220
+ {
221
+ name: 'aws-waf',
222
+ category: 'security',
223
+ headers: ['x-amzn-waf-action'],
224
+ detect: (headers) => headers.get('server') === 'awselb/2.0',
225
+ extract: (headers) => ({})
226
+ },
227
+ {
228
+ name: 'envoy',
229
+ category: 'proxy',
230
+ headers: ['x-envoy-upstream-service-time'],
231
+ detect: (headers) => headers.get('x-envoy-upstream-service-time') !== null || headers.get('server') === 'envoy',
232
+ extract: (headers) => ({})
233
+ },
234
+ {
235
+ name: 'traefik',
236
+ category: 'proxy',
237
+ headers: [],
238
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('traefik'),
239
+ extract: (headers) => ({})
240
+ },
241
+ {
242
+ name: 'caddy',
243
+ category: 'server',
244
+ headers: [],
245
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('caddy'),
246
+ extract: (headers) => ({})
247
+ },
248
+ {
249
+ name: 'haproxy',
250
+ category: 'proxy',
251
+ headers: [],
252
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('haproxy'),
253
+ extract: (headers) => ({})
254
+ },
255
+ {
256
+ name: 'iis',
257
+ category: 'server',
258
+ headers: ['x-powered-by'],
259
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('iis'),
260
+ extract: (headers) => ({})
261
+ },
262
+ {
263
+ name: 'kestrel',
264
+ category: 'server',
265
+ headers: [],
266
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('kestrel'),
267
+ extract: (headers) => ({})
268
+ },
269
+ {
270
+ name: 'express',
271
+ category: 'framework',
272
+ headers: ['x-powered-by'],
273
+ detect: (headers) => (headers.get('x-powered-by') || '').toLowerCase().includes('express'),
274
+ extract: (headers) => ({})
275
+ },
276
+ {
277
+ name: 'rails',
278
+ category: 'framework',
279
+ headers: ['x-runtime', 'x-request-id'],
280
+ detect: (headers) => headers.get('x-runtime') !== null && !(headers.get('x-powered-by') || '').includes('Express'),
281
+ extract: (headers) => ({
282
+ metadata: { runtime: headers.get('x-runtime') }
283
+ })
284
+ },
285
+ {
286
+ name: 'django',
287
+ category: 'framework',
288
+ headers: [],
289
+ detect: (headers) => (headers.get('server') || '').toLowerCase().includes('wsgi'),
290
+ extract: (headers) => ({})
291
+ },
292
+ {
293
+ name: 'aspnet',
294
+ category: 'framework',
295
+ headers: ['x-aspnet-version', 'x-powered-by'],
296
+ detect: (headers) => headers.get('x-aspnet-version') !== null || (headers.get('x-powered-by') || '').includes('ASP.NET'),
297
+ extract: (headers) => ({
298
+ metadata: { version: headers.get('x-aspnet-version') }
299
+ })
300
+ },
301
+ {
302
+ name: 'php',
303
+ category: 'framework',
304
+ headers: ['x-powered-by'],
305
+ detect: (headers) => (headers.get('x-powered-by') || '').toLowerCase().includes('php'),
306
+ extract: (headers) => ({
307
+ metadata: { version: headers.get('x-powered-by') }
308
+ })
309
+ }
310
+ ];
311
+ export function parsePlatformInfo(headers) {
312
+ const info = {
313
+ metadata: {}
314
+ };
315
+ for (const detector of PLATFORM_DETECTORS) {
316
+ if (detector.detect(headers)) {
317
+ info.provider = detector.name;
318
+ info.category = detector.category;
319
+ const extracted = detector.extract(headers);
320
+ if (extracted.region)
321
+ info.region = extracted.region;
322
+ if (extracted.server)
323
+ info.server = extracted.server;
324
+ if (extracted.metadata) {
325
+ Object.assign(info.metadata, extracted.metadata);
326
+ }
327
+ break;
328
+ }
329
+ }
330
+ if (!info.provider) {
331
+ const server = headers.get('server');
332
+ if (server) {
333
+ info.server = server;
334
+ const serverLower = server.toLowerCase();
335
+ if (serverLower.includes('cloudflare')) {
336
+ info.provider = 'cloudflare';
337
+ info.category = 'cdn';
338
+ }
339
+ else if (serverLower.includes('cloudfront')) {
340
+ info.provider = 'aws-cloudfront';
341
+ info.category = 'cloud';
342
+ }
343
+ else if (serverLower.includes('fastly')) {
344
+ info.provider = 'fastly';
345
+ info.category = 'cdn';
346
+ }
347
+ else if (serverLower.includes('akamai')) {
348
+ info.provider = 'akamai';
349
+ info.category = 'cdn';
350
+ }
351
+ else if (serverLower.includes('nginx')) {
352
+ info.provider = 'nginx';
353
+ info.category = 'server';
354
+ }
355
+ else if (serverLower.includes('apache')) {
356
+ info.provider = 'apache';
357
+ info.category = 'server';
358
+ }
359
+ else if (serverLower.includes('varnish')) {
360
+ info.provider = 'varnish';
361
+ info.category = 'proxy';
362
+ }
363
+ else {
364
+ info.provider = 'unknown';
365
+ info.category = 'unknown';
366
+ }
367
+ }
368
+ }
369
+ if (!info.provider) {
370
+ const via = headers.get('via');
371
+ if (via) {
372
+ const viaLower = via.toLowerCase();
373
+ if (viaLower.includes('cloudflare')) {
374
+ info.provider = 'cloudflare';
375
+ info.category = 'cdn';
376
+ }
377
+ else if (viaLower.includes('akamai')) {
378
+ info.provider = 'akamai';
379
+ info.category = 'cdn';
380
+ }
381
+ else if (viaLower.includes('varnish')) {
382
+ info.provider = 'varnish';
383
+ info.category = 'proxy';
384
+ }
385
+ else if (viaLower.includes('squid')) {
386
+ info.provider = 'squid';
387
+ info.category = 'proxy';
388
+ }
389
+ }
390
+ }
391
+ if (Object.keys(info.metadata || {}).length === 0) {
392
+ delete info.metadata;
393
+ }
394
+ return info;
395
+ }
396
+ export function parseRateLimitInfo(headers, status) {
397
+ const info = { limited: status === 429 };
398
+ const limit = headers.get('x-ratelimit-limit') || headers.get('ratelimit-limit');
399
+ const remaining = headers.get('x-ratelimit-remaining') || headers.get('ratelimit-remaining');
400
+ const reset = headers.get('x-ratelimit-reset') || headers.get('ratelimit-reset');
401
+ if (limit)
402
+ info.limit = parseInt(limit, 10);
403
+ if (remaining)
404
+ info.remaining = parseInt(remaining, 10);
405
+ if (reset) {
406
+ const resetValue = parseInt(reset, 10);
407
+ if (resetValue > 1000000000) {
408
+ info.reset = new Date(resetValue * 1000);
409
+ }
410
+ else {
411
+ info.reset = new Date(Date.now() + resetValue * 1000);
412
+ }
413
+ }
414
+ const retryAfter = headers.get('retry-after');
415
+ if (retryAfter) {
416
+ const seconds = parseInt(retryAfter, 10);
417
+ if (!isNaN(seconds)) {
418
+ info.retryAfter = seconds;
419
+ }
420
+ else {
421
+ const retryDate = new Date(retryAfter);
422
+ if (!isNaN(retryDate.getTime())) {
423
+ info.retryAfter = Math.ceil((retryDate.getTime() - Date.now()) / 1000);
424
+ }
425
+ }
426
+ }
427
+ const policy = headers.get('ratelimit-policy');
428
+ if (policy) {
429
+ info.policy = policy;
430
+ }
431
+ if (info.remaining !== undefined && info.remaining === 0) {
432
+ info.limited = true;
433
+ }
434
+ return info;
435
+ }
436
+ export function parseCompressionInfo(headers) {
437
+ const info = {};
438
+ const encoding = headers.get('content-encoding');
439
+ if (encoding) {
440
+ info.encoding = encoding.toLowerCase();
441
+ }
442
+ const xOriginalSize = headers.get('x-original-size') || headers.get('x-uncompressed-size');
443
+ if (xOriginalSize) {
444
+ info.originalSize = parseInt(xOriginalSize, 10);
445
+ }
446
+ const contentLength = headers.get('content-length');
447
+ if (contentLength) {
448
+ info.compressedSize = parseInt(contentLength, 10);
449
+ }
450
+ if (info.originalSize && info.compressedSize) {
451
+ info.ratio = info.originalSize / info.compressedSize;
452
+ }
453
+ return info;
454
+ }
455
+ export function parseCSPInfo(headers) {
456
+ const info = {
457
+ directives: {},
458
+ reportOnly: false
459
+ };
460
+ let cspHeader = headers.get('content-security-policy');
461
+ if (!cspHeader) {
462
+ cspHeader = headers.get('content-security-policy-report-only');
463
+ if (cspHeader) {
464
+ info.reportOnly = true;
465
+ }
466
+ }
467
+ if (cspHeader) {
468
+ info.policy = cspHeader;
469
+ const directives = cspHeader.split(';').map(d => d.trim()).filter(Boolean);
470
+ for (const directive of directives) {
471
+ const [name, ...values] = directive.split(/\s+/);
472
+ if (name) {
473
+ info.directives[name] = values;
474
+ }
475
+ }
476
+ }
477
+ return info;
478
+ }
479
+ export function parseContentType(headers) {
480
+ const info = {};
481
+ const contentType = headers.get('content-type');
482
+ if (!contentType)
483
+ return info;
484
+ const parts = contentType.split(';').map(p => p.trim());
485
+ if (parts[0]) {
486
+ info.mediaType = parts[0];
487
+ const [type, subtype] = parts[0].split('/');
488
+ if (type)
489
+ info.type = type;
490
+ if (subtype)
491
+ info.subtype = subtype;
492
+ }
493
+ for (let i = 1; i < parts.length; i++) {
494
+ const param = parts[i];
495
+ const [key, value] = param.split('=').map(s => s.trim());
496
+ if (key === 'charset' && value) {
497
+ info.charset = value.replace(/['"]/g, '');
498
+ }
499
+ else if (key === 'boundary' && value) {
500
+ info.boundary = value.replace(/['"]/g, '');
501
+ }
502
+ }
503
+ return info;
504
+ }
505
+ export function parseAcceptInfo(headers) {
506
+ const info = {
507
+ types: [],
508
+ encodings: [],
509
+ languages: []
510
+ };
511
+ const accept = headers.get('accept');
512
+ if (accept) {
513
+ const types = accept.split(',').map(t => t.trim());
514
+ for (const typeStr of types) {
515
+ const [mediaType, ...params] = typeStr.split(';').map(s => s.trim());
516
+ let q = 1.0;
517
+ for (const param of params) {
518
+ if (param.startsWith('q=')) {
519
+ q = parseFloat(param.substring(2)) || 1.0;
520
+ }
521
+ }
522
+ const [type, subtype] = mediaType.split('/');
523
+ info.types.push({
524
+ mediaType,
525
+ q,
526
+ type: type || undefined,
527
+ subtype: subtype || undefined
528
+ });
529
+ }
530
+ info.types.sort((a, b) => b.q - a.q);
531
+ }
532
+ const acceptEncoding = headers.get('accept-encoding');
533
+ if (acceptEncoding) {
534
+ const encodings = acceptEncoding.split(',').map(e => e.trim());
535
+ for (const encodingStr of encodings) {
536
+ const [encoding, ...params] = encodingStr.split(';').map(s => s.trim());
537
+ let q = 1.0;
538
+ for (const param of params) {
539
+ if (param.startsWith('q=')) {
540
+ q = parseFloat(param.substring(2)) || 1.0;
541
+ }
542
+ }
543
+ info.encodings.push({ encoding, q });
544
+ }
545
+ info.encodings.sort((a, b) => b.q - a.q);
546
+ }
547
+ const acceptLanguage = headers.get('accept-language');
548
+ if (acceptLanguage) {
549
+ const languages = acceptLanguage.split(',').map(l => l.trim());
550
+ for (const languageStr of languages) {
551
+ const [language, ...params] = languageStr.split(';').map(s => s.trim());
552
+ let q = 1.0;
553
+ for (const param of params) {
554
+ if (param.startsWith('q=')) {
555
+ q = parseFloat(param.substring(2)) || 1.0;
556
+ }
557
+ }
558
+ info.languages.push({ language, q });
559
+ }
560
+ info.languages.sort((a, b) => b.q - a.q);
561
+ }
562
+ return info;
563
+ }
564
+ export function parseAuthInfo(headers) {
565
+ const info = { methods: [] };
566
+ const wwwAuth = headers.get('www-authenticate');
567
+ if (wwwAuth) {
568
+ if (wwwAuth.toLowerCase().includes('basic'))
569
+ info.methods.push('Basic');
570
+ if (wwwAuth.toLowerCase().includes('bearer'))
571
+ info.methods.push('Bearer');
572
+ if (wwwAuth.toLowerCase().includes('digest'))
573
+ info.methods.push('Digest');
574
+ if (wwwAuth.toLowerCase().includes('negotiate'))
575
+ info.methods.push('Negotiate');
576
+ if (wwwAuth.toLowerCase().includes('aws4-hmac-sha256'))
577
+ info.methods.push('AWS4');
578
+ const realmMatch = wwwAuth.match(/realm="([^"]+)"/);
579
+ if (realmMatch)
580
+ info.realm = realmMatch[1];
581
+ const errorMatch = wwwAuth.match(/error="([^"]+)"/);
582
+ if (errorMatch)
583
+ info.error = errorMatch[1];
584
+ const descMatch = wwwAuth.match(/error_description="([^"]+)"/);
585
+ if (descMatch)
586
+ info.errorDescription = descMatch[1];
587
+ }
588
+ const xAuthError = headers.get('x-auth-error') || headers.get('x-authentication-error');
589
+ if (xAuthError) {
590
+ if (!info.error)
591
+ info.error = xAuthError;
592
+ }
593
+ return info;
594
+ }
595
+ export function parseClockSkew(headers) {
596
+ const dateHeader = headers.get('date');
597
+ if (!dateHeader)
598
+ return {};
599
+ const serverTime = new Date(dateHeader);
600
+ if (isNaN(serverTime.getTime()))
601
+ return {};
602
+ const skewMs = serverTime.getTime() - Date.now();
603
+ return { serverTime, skewMs };
604
+ }
605
+ export function parseHeaders(headers, status) {
606
+ return {
607
+ cache: parseCacheInfo(headers),
608
+ platform: parsePlatformInfo(headers),
609
+ rateLimit: parseRateLimitInfo(headers, status),
610
+ compression: parseCompressionInfo(headers),
611
+ csp: parseCSPInfo(headers),
612
+ contentType: parseContentType(headers),
613
+ accept: parseAcceptInfo(headers),
614
+ auth: parseAuthInfo(headers),
615
+ clockSkew: parseClockSkew(headers)
616
+ };
617
+ }
@@ -0,0 +1 @@
1
+ export declare function cleanHtml(html: string): string;
@@ -0,0 +1,21 @@
1
+ export function cleanHtml(html) {
2
+ if (!html)
3
+ return '';
4
+ let text = html;
5
+ text = text.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, '');
6
+ text = text.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, '');
7
+ text = text.replace(/<!--[\s\S]*?-->/g, '');
8
+ text = text.replace(/<\/(div|p|h[1-6]|li|ul|ol|tr|table|section|article|main|header|footer|nav)>/gi, '\n');
9
+ text = text.replace(/<(br|hr)\s*\/?>/gi, '\n');
10
+ text = text.replace(/<[^>]+>/g, ' ');
11
+ text = text
12
+ .replace(/&nbsp;/g, ' ')
13
+ .replace(/&amp;/g, '&')
14
+ .replace(/&lt;/g, '<')
15
+ .replace(/&gt;/g, '>')
16
+ .replace(/&quot;/g, '"')
17
+ .replace(/&#39;/g, "'");
18
+ text = text.replace(/[ \t]+/g, ' ');
19
+ text = text.replace(/\n\s*\n\s*\n+/g, '\n\n');
20
+ return text.trim();
21
+ }