rezo 1.0.66 → 1.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/adapters/entries/curl.d.ts +5 -0
  2. package/dist/adapters/entries/fetch.d.ts +5 -0
  3. package/dist/adapters/entries/http.d.ts +5 -0
  4. package/dist/adapters/entries/http2.d.ts +5 -0
  5. package/dist/adapters/entries/react-native.d.ts +5 -0
  6. package/dist/adapters/entries/xhr.d.ts +5 -0
  7. package/dist/adapters/index.cjs +6 -6
  8. package/dist/cache/index.cjs +9 -9
  9. package/dist/crawler/crawler.cjs +26 -5
  10. package/dist/crawler/crawler.js +26 -5
  11. package/dist/crawler/index.cjs +40 -40
  12. package/dist/crawler.d.ts +10 -0
  13. package/dist/entries/crawler.cjs +4 -4
  14. package/dist/index.cjs +27 -27
  15. package/dist/index.d.ts +5 -0
  16. package/dist/internal/agents/index.cjs +10 -10
  17. package/dist/platform/browser.d.ts +5 -0
  18. package/dist/platform/bun.d.ts +5 -0
  19. package/dist/platform/deno.d.ts +5 -0
  20. package/dist/platform/node.d.ts +5 -0
  21. package/dist/platform/react-native.d.ts +5 -0
  22. package/dist/platform/worker.d.ts +5 -0
  23. package/dist/proxy/index.cjs +4 -4
  24. package/dist/proxy/manager.cjs +1 -1
  25. package/dist/proxy/manager.js +1 -1
  26. package/dist/queue/index.cjs +8 -8
  27. package/dist/queue/queue.cjs +3 -1
  28. package/dist/queue/queue.js +3 -1
  29. package/dist/responses/universal/index.cjs +11 -11
  30. package/dist/wget/asset-extractor.cjs +556 -0
  31. package/dist/wget/asset-extractor.js +553 -0
  32. package/dist/wget/asset-organizer.cjs +230 -0
  33. package/dist/wget/asset-organizer.js +227 -0
  34. package/dist/wget/download-cache.cjs +221 -0
  35. package/dist/wget/download-cache.js +218 -0
  36. package/dist/wget/downloader.cjs +607 -0
  37. package/dist/wget/downloader.js +604 -0
  38. package/dist/wget/file-writer.cjs +349 -0
  39. package/dist/wget/file-writer.js +346 -0
  40. package/dist/wget/filter-lists.cjs +1330 -0
  41. package/dist/wget/filter-lists.js +1330 -0
  42. package/dist/wget/index.cjs +633 -0
  43. package/dist/wget/index.d.ts +8486 -0
  44. package/dist/wget/index.js +614 -0
  45. package/dist/wget/link-converter.cjs +297 -0
  46. package/dist/wget/link-converter.js +294 -0
  47. package/dist/wget/progress.cjs +271 -0
  48. package/dist/wget/progress.js +266 -0
  49. package/dist/wget/resume.cjs +166 -0
  50. package/dist/wget/resume.js +163 -0
  51. package/dist/wget/robots.cjs +303 -0
  52. package/dist/wget/robots.js +300 -0
  53. package/dist/wget/types.cjs +200 -0
  54. package/dist/wget/types.js +197 -0
  55. package/dist/wget/url-filter.cjs +351 -0
  56. package/dist/wget/url-filter.js +348 -0
  57. package/package.json +6 -1
@@ -2128,6 +2128,8 @@ declare class RezoQueue<T = any> {
2128
2128
  private readonly throughputWindowSize;
2129
2129
  private idlePromise?;
2130
2130
  private emptyPromise?;
2131
+ /** Tracks if queue has ever had work added - ensures onIdle waits for first task */
2132
+ private hasEverBeenActive;
2131
2133
  readonly config: Required<QueueConfig>;
2132
2134
  /**
2133
2135
  * Create a new RezoQueue
@@ -2202,6 +2204,9 @@ declare class RezoQueue<T = any> {
2202
2204
  }) => boolean): number;
2203
2205
  /**
2204
2206
  * Wait for queue to become idle (no running or pending tasks)
2207
+ *
2208
+ * Unlike a simple "isIdle" check, this properly waits for work to be added
2209
+ * and completed if called before any tasks are queued (matches p-queue behavior).
2205
2210
  */
2206
2211
  onIdle(): Promise<void>;
2207
2212
  /**
@@ -1,9 +1,9 @@
1
1
  const { Agent, HttpProxyAgent, HttpsProxyAgent, SocksProxyAgent } = require('../internal/agents/index.cjs');
2
2
  const { parseProxyString } = require('./parse.cjs');
3
- const _mod_8l1pku = require('./manager.cjs');
4
- exports.ProxyManager = _mod_8l1pku.ProxyManager;;
5
- const _mod_ampgfr = require('./parse.cjs');
6
- exports.parseProxyString = _mod_ampgfr.parseProxyString;;
3
+ const _mod_nqcj71 = require('./manager.cjs');
4
+ exports.ProxyManager = _mod_nqcj71.ProxyManager;;
5
+ const _mod_ipg17b = require('./parse.cjs');
6
+ exports.parseProxyString = _mod_ipg17b.parseProxyString;;
7
7
  function createOptions(uri, opts) {
8
8
  if (uri instanceof URL || typeof uri === "string") {
9
9
  return {
@@ -1,4 +1,4 @@
1
- const { parseProxyString } = require('./index.cjs');
1
+ const { parseProxyString } = require('./parse.cjs');
2
2
  function generateProxyId() {
3
3
  return `proxy_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
4
4
  }
@@ -1,4 +1,4 @@
1
- import { parseProxyString } from './index.js';
1
+ import { parseProxyString } from './parse.js';
2
2
  function generateProxyId() {
3
3
  return `proxy_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`;
4
4
  }
@@ -1,8 +1,8 @@
1
- const _mod_3rd9kt = require('./queue.cjs');
2
- exports.RezoQueue = _mod_3rd9kt.RezoQueue;;
3
- const _mod_yfrbwq = require('./http-queue.cjs');
4
- exports.HttpQueue = _mod_yfrbwq.HttpQueue;
5
- exports.extractDomain = _mod_yfrbwq.extractDomain;;
6
- const _mod_2qwp27 = require('./types.cjs');
7
- exports.Priority = _mod_2qwp27.Priority;
8
- exports.HttpMethodPriority = _mod_2qwp27.HttpMethodPriority;;
1
+ const _mod_uimwsu = require('./queue.cjs');
2
+ exports.RezoQueue = _mod_uimwsu.RezoQueue;;
3
+ const _mod_u4qw7d = require('./http-queue.cjs');
4
+ exports.HttpQueue = _mod_u4qw7d.HttpQueue;
5
+ exports.extractDomain = _mod_u4qw7d.extractDomain;;
6
+ const _mod_gb74bt = require('./types.cjs');
7
+ exports.Priority = _mod_gb74bt.Priority;
8
+ exports.HttpMethodPriority = _mod_gb74bt.HttpMethodPriority;;
@@ -25,6 +25,7 @@ class RezoQueue {
25
25
  throughputWindowSize = 60;
26
26
  idlePromise;
27
27
  emptyPromise;
28
+ hasEverBeenActive = false;
28
29
  config;
29
30
  constructor(config = {}) {
30
31
  this.config = {
@@ -92,6 +93,7 @@ class RezoQueue {
92
93
  });
93
94
  this.insertByPriority(task);
94
95
  this.statsData.added++;
96
+ this.hasEverBeenActive = true;
95
97
  this.emit("add", { id: task.id, priority: task.priority });
96
98
  if (this.config.autoStart && !this.isPausedFlag) {
97
99
  this.tryRunNext();
@@ -156,7 +158,7 @@ class RezoQueue {
156
158
  return count;
157
159
  }
158
160
  onIdle() {
159
- if (this.state.isIdle) {
161
+ if (this.hasEverBeenActive && this.state.isIdle) {
160
162
  return Promise.resolve();
161
163
  }
162
164
  if (!this.idlePromise) {
@@ -25,6 +25,7 @@ export class RezoQueue {
25
25
  throughputWindowSize = 60;
26
26
  idlePromise;
27
27
  emptyPromise;
28
+ hasEverBeenActive = false;
28
29
  config;
29
30
  constructor(config = {}) {
30
31
  this.config = {
@@ -92,6 +93,7 @@ export class RezoQueue {
92
93
  });
93
94
  this.insertByPriority(task);
94
95
  this.statsData.added++;
96
+ this.hasEverBeenActive = true;
95
97
  this.emit("add", { id: task.id, priority: task.priority });
96
98
  if (this.config.autoStart && !this.isPausedFlag) {
97
99
  this.tryRunNext();
@@ -156,7 +158,7 @@ export class RezoQueue {
156
158
  return count;
157
159
  }
158
160
  onIdle() {
159
- if (this.state.isIdle) {
161
+ if (this.hasEverBeenActive && this.state.isIdle) {
160
162
  return Promise.resolve();
161
163
  }
162
164
  if (!this.idlePromise) {
@@ -1,11 +1,11 @@
1
- const _mod_v16hwz = require('./event-emitter.cjs');
2
- exports.UniversalEventEmitter = _mod_v16hwz.UniversalEventEmitter;;
3
- const _mod_495yjz = require('./stream.cjs');
4
- exports.UniversalStreamResponse = _mod_495yjz.UniversalStreamResponse;
5
- exports.StreamResponse = _mod_495yjz.StreamResponse;;
6
- const _mod_rc9q4g = require('./download.cjs');
7
- exports.UniversalDownloadResponse = _mod_rc9q4g.UniversalDownloadResponse;
8
- exports.DownloadResponse = _mod_rc9q4g.DownloadResponse;;
9
- const _mod_q7jgxt = require('./upload.cjs');
10
- exports.UniversalUploadResponse = _mod_q7jgxt.UniversalUploadResponse;
11
- exports.UploadResponse = _mod_q7jgxt.UploadResponse;;
1
+ const _mod_n7nj5e = require('./event-emitter.cjs');
2
+ exports.UniversalEventEmitter = _mod_n7nj5e.UniversalEventEmitter;;
3
+ const _mod_r33s45 = require('./stream.cjs');
4
+ exports.UniversalStreamResponse = _mod_r33s45.UniversalStreamResponse;
5
+ exports.StreamResponse = _mod_r33s45.StreamResponse;;
6
+ const _mod_cvx40y = require('./download.cjs');
7
+ exports.UniversalDownloadResponse = _mod_cvx40y.UniversalDownloadResponse;
8
+ exports.DownloadResponse = _mod_cvx40y.DownloadResponse;;
9
+ const _mod_yoj35o = require('./upload.cjs');
10
+ exports.UniversalUploadResponse = _mod_yoj35o.UniversalUploadResponse;
11
+ exports.UploadResponse = _mod_yoj35o.UploadResponse;;
@@ -0,0 +1,556 @@
1
+ const { parseHTML, DOMParser } = require('../dom/index.cjs');
2
+ const HTML_URL_ATTRIBUTES = {
3
+ a: ["href"],
4
+ area: ["href"],
5
+ link: ["href"],
6
+ base: ["href"],
7
+ img: ["src", "srcset", "data-src", "data-srcset", "data-lazy-src"],
8
+ picture: [],
9
+ source: ["src", "srcset"],
10
+ video: ["src", "poster"],
11
+ audio: ["src"],
12
+ track: ["src"],
13
+ script: ["src"],
14
+ style: [],
15
+ iframe: ["src"],
16
+ frame: ["src"],
17
+ embed: ["src"],
18
+ object: ["data", "codebase"],
19
+ form: ["action"],
20
+ input: ["src"],
21
+ button: ["formaction"],
22
+ meta: ["content"],
23
+ body: ["background"],
24
+ table: ["background"],
25
+ td: ["background"],
26
+ th: ["background"],
27
+ blockquote: ["cite"],
28
+ q: ["cite"],
29
+ del: ["cite"],
30
+ ins: ["cite"],
31
+ applet: ["code", "codebase", "archive"]
32
+ };
33
+ const META_URL_PROPERTIES = [
34
+ "og:image",
35
+ "og:image:url",
36
+ "og:image:secure_url",
37
+ "og:video",
38
+ "og:video:url",
39
+ "og:video:secure_url",
40
+ "og:audio",
41
+ "og:audio:url",
42
+ "og:audio:secure_url",
43
+ "og:url",
44
+ "twitter:image",
45
+ "twitter:image:src",
46
+ "twitter:player",
47
+ "twitter:player:stream"
48
+ ];
49
+ const REQUISITE_LINK_RELS = [
50
+ "stylesheet",
51
+ "icon",
52
+ "shortcut icon",
53
+ "apple-touch-icon",
54
+ "apple-touch-icon-precomposed",
55
+ "manifest",
56
+ "preload",
57
+ "modulepreload"
58
+ ];
59
+ function determineAssetType(url, tag, attribute, rel) {
60
+ const lowerTag = tag.toLowerCase();
61
+ const lowerUrl = url.toLowerCase();
62
+ if (lowerTag === "script")
63
+ return "script";
64
+ if (lowerTag === "style")
65
+ return "stylesheet";
66
+ if (lowerTag === "img" || lowerTag === "picture")
67
+ return "image";
68
+ if (lowerTag === "video")
69
+ return "video";
70
+ if (lowerTag === "audio")
71
+ return "audio";
72
+ if (lowerTag === "iframe" || lowerTag === "frame")
73
+ return "iframe";
74
+ if (lowerTag === "embed" || lowerTag === "object")
75
+ return "object";
76
+ if (lowerTag === "link" && rel) {
77
+ const lowerRel = rel.toLowerCase();
78
+ if (lowerRel.includes("stylesheet"))
79
+ return "stylesheet";
80
+ if (lowerRel.includes("icon"))
81
+ return "favicon";
82
+ if (lowerRel.includes("manifest"))
83
+ return "manifest";
84
+ if (lowerRel.includes("preload") || lowerRel.includes("modulepreload")) {
85
+ return "other";
86
+ }
87
+ }
88
+ const ext = getUrlExtension(lowerUrl);
89
+ switch (ext) {
90
+ case "css":
91
+ return "stylesheet";
92
+ case "js":
93
+ case "mjs":
94
+ case "cjs":
95
+ return "script";
96
+ case "png":
97
+ case "jpg":
98
+ case "jpeg":
99
+ case "gif":
100
+ case "webp":
101
+ case "avif":
102
+ case "svg":
103
+ case "ico":
104
+ case "bmp":
105
+ case "tiff":
106
+ case "tif":
107
+ return "image";
108
+ case "mp4":
109
+ case "webm":
110
+ case "ogg":
111
+ case "ogv":
112
+ case "mov":
113
+ case "avi":
114
+ case "mkv":
115
+ return "video";
116
+ case "mp3":
117
+ case "wav":
118
+ case "flac":
119
+ case "aac":
120
+ case "m4a":
121
+ case "oga":
122
+ return "audio";
123
+ case "woff":
124
+ case "woff2":
125
+ case "ttf":
126
+ case "otf":
127
+ case "eot":
128
+ return "font";
129
+ case "html":
130
+ case "htm":
131
+ case "xhtml":
132
+ case "php":
133
+ case "asp":
134
+ case "aspx":
135
+ case "jsp":
136
+ return "document";
137
+ case "json":
138
+ case "xml":
139
+ return "data";
140
+ case "webmanifest":
141
+ return "manifest";
142
+ default:
143
+ if (lowerTag === "a")
144
+ return "document";
145
+ return "other";
146
+ }
147
+ }
148
+ function getUrlExtension(url) {
149
+ try {
150
+ const pathname = new URL(url, "http://localhost").pathname;
151
+ const lastDot = pathname.lastIndexOf(".");
152
+ const lastSlash = pathname.lastIndexOf("/");
153
+ if (lastDot > lastSlash && lastDot < pathname.length - 1) {
154
+ return pathname.slice(lastDot + 1).toLowerCase();
155
+ }
156
+ } catch {
157
+ const match = url.match(/\.([a-zA-Z0-9]+)(?:\?|#|$)/);
158
+ if (match)
159
+ return match[1].toLowerCase();
160
+ }
161
+ return "";
162
+ }
163
+ function isPageRequisite(type, tag, rel) {
164
+ if (["stylesheet", "script", "font", "favicon", "manifest"].includes(type)) {
165
+ return true;
166
+ }
167
+ if (type === "image") {
168
+ return true;
169
+ }
170
+ if (tag.toLowerCase() === "link" && rel) {
171
+ return REQUISITE_LINK_RELS.some((r) => rel.toLowerCase().includes(r));
172
+ }
173
+ return false;
174
+ }
175
+
176
+ class AssetExtractor {
177
+ extractFromHTML(html, baseUrl, options) {
178
+ const assets = [];
179
+ const { document } = parseHTML(html);
180
+ const baseElement = document.querySelector("base[href]");
181
+ if (baseElement) {
182
+ const baseHref = baseElement.getAttribute("href");
183
+ if (baseHref) {
184
+ baseUrl = this.resolveUrl(baseHref, baseUrl) || baseUrl;
185
+ }
186
+ }
187
+ const followTags = options?.followTags ? new Set(options.followTags.map((t) => t.toLowerCase())) : null;
188
+ const ignoreTags = options?.ignoreTags ? new Set(options.ignoreTags.map((t) => t.toLowerCase())) : null;
189
+ for (const [tag, attributes] of Object.entries(HTML_URL_ATTRIBUTES)) {
190
+ const lowerTag = tag.toLowerCase();
191
+ if (followTags && !followTags.has(lowerTag))
192
+ continue;
193
+ if (ignoreTags && ignoreTags.has(lowerTag))
194
+ continue;
195
+ const elements = Array.from(document.querySelectorAll(tag));
196
+ for (const element of elements) {
197
+ const rel = element.getAttribute("rel");
198
+ for (const attr of attributes) {
199
+ const value = element.getAttribute(attr);
200
+ if (!value)
201
+ continue;
202
+ if (attr === "srcset" || attr === "data-srcset") {
203
+ const srcsetUrls = this.parseSrcset(value, baseUrl);
204
+ for (const url of srcsetUrls) {
205
+ assets.push({
206
+ url,
207
+ type: "image",
208
+ source: "html",
209
+ tag: lowerTag,
210
+ attribute: attr,
211
+ required: true,
212
+ inline: false
213
+ });
214
+ }
215
+ continue;
216
+ }
217
+ if (lowerTag === "meta" && attr === "content") {
218
+ const property = element.getAttribute("property") || element.getAttribute("name");
219
+ if (!property || !META_URL_PROPERTIES.includes(property.toLowerCase())) {
220
+ continue;
221
+ }
222
+ }
223
+ const resolvedUrl = this.resolveUrl(value, baseUrl);
224
+ if (!resolvedUrl)
225
+ continue;
226
+ const assetType = determineAssetType(resolvedUrl, lowerTag, attr, rel);
227
+ const required = isPageRequisite(assetType, lowerTag, rel);
228
+ assets.push({
229
+ url: resolvedUrl,
230
+ type: assetType,
231
+ source: "html",
232
+ tag: lowerTag,
233
+ attribute: attr,
234
+ required,
235
+ inline: false
236
+ });
237
+ }
238
+ const styleAttr = element.getAttribute("style");
239
+ if (styleAttr) {
240
+ const cssAssets = this.extractUrlsFromCSSText(styleAttr, baseUrl);
241
+ for (const cssAsset of cssAssets) {
242
+ assets.push({
243
+ ...cssAsset,
244
+ source: "html",
245
+ tag: lowerTag,
246
+ attribute: "style",
247
+ inline: true
248
+ });
249
+ }
250
+ }
251
+ }
252
+ }
253
+ const styleTags = Array.from(document.querySelectorAll("style"));
254
+ for (const styleTag of styleTags) {
255
+ const cssContent = styleTag.textContent;
256
+ if (cssContent) {
257
+ const cssAssets = this.extractFromCSS(cssContent, baseUrl);
258
+ for (const asset of cssAssets) {
259
+ assets.push({
260
+ ...asset,
261
+ source: "html",
262
+ tag: "style",
263
+ inline: true
264
+ });
265
+ }
266
+ }
267
+ }
268
+ return assets;
269
+ }
270
+ extractFromCSS(css, baseUrl) {
271
+ const assets = [];
272
+ const importRegex = /@import\s+(?:url\s*\(\s*)?['"]?([^'"\)\s;]+)['"]?\s*\)?[^;]*;/gi;
273
+ let match;
274
+ while ((match = importRegex.exec(css)) !== null) {
275
+ const url = this.resolveUrl(match[1], baseUrl);
276
+ if (url) {
277
+ assets.push({
278
+ url,
279
+ type: "stylesheet",
280
+ source: "css",
281
+ required: true,
282
+ inline: false
283
+ });
284
+ }
285
+ }
286
+ const urlAssets = this.extractUrlsFromCSSText(css, baseUrl);
287
+ assets.push(...urlAssets);
288
+ return assets;
289
+ }
290
+ extractUrlsFromCSSText(css, baseUrl) {
291
+ const assets = [];
292
+ const urlRegex = /url\s*\(\s*(['"]?)([^'"\)\s]+)\1\s*\)/gi;
293
+ let match;
294
+ while ((match = urlRegex.exec(css)) !== null) {
295
+ const urlValue = match[2].trim();
296
+ if (urlValue.startsWith("data:")) {
297
+ continue;
298
+ }
299
+ if (!urlValue || urlValue.startsWith("#")) {
300
+ continue;
301
+ }
302
+ const resolvedUrl = this.resolveUrl(urlValue, baseUrl);
303
+ if (!resolvedUrl)
304
+ continue;
305
+ const type = this.guessAssetTypeFromUrl(resolvedUrl);
306
+ assets.push({
307
+ url: resolvedUrl,
308
+ type,
309
+ source: "css",
310
+ required: true,
311
+ inline: false
312
+ });
313
+ }
314
+ return assets;
315
+ }
316
+ extractFromXML(xml, baseUrl) {
317
+ const assets = [];
318
+ try {
319
+ const parser = new DOMParser;
320
+ const doc = parser.parseFromString(xml, "text/xml");
321
+ const isSVG = doc.documentElement?.tagName.toLowerCase() === "svg";
322
+ const source = isSVG ? "svg" : "xml";
323
+ const allElements = Array.from(doc.querySelectorAll("*"));
324
+ for (const el of allElements) {
325
+ for (const attr of ["href", "src", "xlink:href"]) {
326
+ const value = el.getAttribute(attr);
327
+ if (value && !value.startsWith("#") && !value.startsWith("data:")) {
328
+ const resolvedUrl = this.resolveUrl(value, baseUrl);
329
+ if (resolvedUrl) {
330
+ if (!assets.some((a) => a.url === resolvedUrl)) {
331
+ const tagName = el.tagName.toLowerCase();
332
+ let assetType = this.guessAssetTypeFromUrl(resolvedUrl);
333
+ if (isSVG) {
334
+ if (tagName === "image")
335
+ assetType = "image";
336
+ else if (tagName === "use")
337
+ assetType = "image";
338
+ }
339
+ assets.push({
340
+ url: resolvedUrl,
341
+ type: assetType,
342
+ source,
343
+ tag: tagName,
344
+ attribute: attr,
345
+ required: isSVG && (tagName === "image" || tagName === "use"),
346
+ inline: false
347
+ });
348
+ }
349
+ }
350
+ }
351
+ }
352
+ }
353
+ } catch (error) {
354
+ console.warn("Failed to parse XML/SVG:", error);
355
+ }
356
+ return assets;
357
+ }
358
+ extractFromJS(js, baseUrl) {
359
+ const assets = [];
360
+ const seen = new Set;
361
+ const patterns = [
362
+ /['"`](https?:\/\/[^'"`\s]+)['"`]/gi,
363
+ /['"`](\/[a-zA-Z0-9._\-/]+\.[a-zA-Z0-9]+)['"`]/gi,
364
+ /['"`](\.\/[a-zA-Z0-9._\-/]+\.[a-zA-Z0-9]+)['"`]/gi
365
+ ];
366
+ for (const pattern of patterns) {
367
+ let match;
368
+ while ((match = pattern.exec(js)) !== null) {
369
+ const urlCandidate = match[1];
370
+ if (seen.has(urlCandidate))
371
+ continue;
372
+ seen.add(urlCandidate);
373
+ if (urlCandidate.startsWith("data:"))
374
+ continue;
375
+ const resolvedUrl = this.resolveUrl(urlCandidate, baseUrl);
376
+ if (!resolvedUrl)
377
+ continue;
378
+ const ext = getUrlExtension(resolvedUrl);
379
+ if (ext && ["js", "css", "png", "jpg", "jpeg", "gif", "svg", "webp", "json", "html"].includes(ext)) {
380
+ assets.push({
381
+ url: resolvedUrl,
382
+ type: this.guessAssetTypeFromUrl(resolvedUrl),
383
+ source: "js",
384
+ required: false,
385
+ inline: false
386
+ });
387
+ }
388
+ }
389
+ }
390
+ return assets;
391
+ }
392
+ parseSrcset(srcset, baseUrl) {
393
+ const urls = [];
394
+ const candidates = srcset.split(/,\s*(?=[^\s])/);
395
+ for (const candidate of candidates) {
396
+ const parts = candidate.trim().split(/\s+/);
397
+ if (parts.length > 0 && parts[0]) {
398
+ const url = this.resolveUrl(parts[0], baseUrl);
399
+ if (url) {
400
+ urls.push(url);
401
+ }
402
+ }
403
+ }
404
+ return urls;
405
+ }
406
+ resolveUrl(url, baseUrl) {
407
+ if (!url)
408
+ return null;
409
+ url = url.trim();
410
+ if (!url || url.startsWith("#") || url.startsWith("javascript:") || url.startsWith("data:") || url.startsWith("mailto:") || url.startsWith("tel:")) {
411
+ return null;
412
+ }
413
+ try {
414
+ const resolved = new URL(url, baseUrl);
415
+ if (resolved.protocol !== "http:" && resolved.protocol !== "https:") {
416
+ return null;
417
+ }
418
+ return resolved.href;
419
+ } catch {
420
+ return null;
421
+ }
422
+ }
423
+ guessAssetTypeFromUrl(url) {
424
+ const ext = getUrlExtension(url);
425
+ switch (ext) {
426
+ case "css":
427
+ return "stylesheet";
428
+ case "js":
429
+ case "mjs":
430
+ case "cjs":
431
+ return "script";
432
+ case "png":
433
+ case "jpg":
434
+ case "jpeg":
435
+ case "gif":
436
+ case "webp":
437
+ case "avif":
438
+ case "svg":
439
+ case "ico":
440
+ case "bmp":
441
+ return "image";
442
+ case "mp4":
443
+ case "webm":
444
+ case "ogg":
445
+ case "ogv":
446
+ return "video";
447
+ case "mp3":
448
+ case "wav":
449
+ case "flac":
450
+ case "aac":
451
+ return "audio";
452
+ case "woff":
453
+ case "woff2":
454
+ case "ttf":
455
+ case "otf":
456
+ case "eot":
457
+ return "font";
458
+ case "html":
459
+ case "htm":
460
+ case "xhtml":
461
+ return "document";
462
+ case "json":
463
+ case "xml":
464
+ return "data";
465
+ default:
466
+ return "other";
467
+ }
468
+ }
469
+ filterAssets(assets, options) {
470
+ return assets.filter((asset) => {
471
+ if (options.acceptAssetTypes && options.acceptAssetTypes.length > 0) {
472
+ if (!options.acceptAssetTypes.includes(asset.type)) {
473
+ return false;
474
+ }
475
+ }
476
+ if (options.rejectAssetTypes && options.rejectAssetTypes.length > 0) {
477
+ if (options.rejectAssetTypes.includes(asset.type)) {
478
+ return false;
479
+ }
480
+ }
481
+ if (options.followTags && asset.tag) {
482
+ if (!options.followTags.includes(asset.tag)) {
483
+ return false;
484
+ }
485
+ }
486
+ if (options.ignoreTags && asset.tag) {
487
+ if (options.ignoreTags.includes(asset.tag)) {
488
+ return false;
489
+ }
490
+ }
491
+ if (options.accept) {
492
+ const patterns = Array.isArray(options.accept) ? options.accept : options.accept.split(",");
493
+ const matches = patterns.some((p) => this.matchGlob(asset.url, p.trim()));
494
+ if (!matches)
495
+ return false;
496
+ }
497
+ if (options.reject) {
498
+ const patterns = Array.isArray(options.reject) ? options.reject : options.reject.split(",");
499
+ const matches = patterns.some((p) => this.matchGlob(asset.url, p.trim()));
500
+ if (matches)
501
+ return false;
502
+ }
503
+ if (options.acceptRegex) {
504
+ const regex = options.acceptRegex instanceof RegExp ? options.acceptRegex : new RegExp(options.acceptRegex);
505
+ if (!regex.test(asset.url))
506
+ return false;
507
+ }
508
+ if (options.rejectRegex) {
509
+ const regex = options.rejectRegex instanceof RegExp ? options.rejectRegex : new RegExp(options.rejectRegex);
510
+ if (regex.test(asset.url))
511
+ return false;
512
+ }
513
+ if (options.excludeExtensions && options.excludeExtensions.length > 0) {
514
+ const ext = getUrlExtension(asset.url);
515
+ if (ext) {
516
+ const normalizedExt = "." + ext.toLowerCase();
517
+ const excluded = options.excludeExtensions.some((excludeExt) => {
518
+ const normalizedExclude = excludeExt.startsWith(".") ? excludeExt.toLowerCase() : ("." + excludeExt).toLowerCase();
519
+ return normalizedExt === normalizedExclude;
520
+ });
521
+ if (excluded)
522
+ return false;
523
+ }
524
+ }
525
+ return true;
526
+ });
527
+ }
528
+ matchGlob(url, pattern) {
529
+ const regexStr = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
530
+ const regex = new RegExp(`^${regexStr}$|${regexStr}`, "i");
531
+ return regex.test(url);
532
+ }
533
+ extract(content, mimeType, baseUrl, options) {
534
+ const lowerMime = mimeType.toLowerCase();
535
+ if (lowerMime.includes("html") || lowerMime.includes("xhtml")) {
536
+ return this.extractFromHTML(content, baseUrl, options);
537
+ }
538
+ if (lowerMime.includes("css")) {
539
+ return this.extractFromCSS(content, baseUrl);
540
+ }
541
+ if (lowerMime.includes("svg")) {
542
+ return this.extractFromXML(content, baseUrl);
543
+ }
544
+ if (lowerMime.includes("xml")) {
545
+ return this.extractFromXML(content, baseUrl);
546
+ }
547
+ if (lowerMime.includes("javascript") || lowerMime.includes("ecmascript")) {
548
+ return this.extractFromJS(content, baseUrl);
549
+ }
550
+ return [];
551
+ }
552
+ }
553
+
554
+ exports.AssetExtractor = AssetExtractor;
555
+ exports.default = AssetExtractor;
556
+ module.exports = Object.assign(AssetExtractor, exports);