rezo 1.0.66 → 1.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/adapters/entries/curl.d.ts +5 -0
  2. package/dist/adapters/entries/fetch.d.ts +5 -0
  3. package/dist/adapters/entries/http.d.ts +5 -0
  4. package/dist/adapters/entries/http2.d.ts +5 -0
  5. package/dist/adapters/entries/react-native.d.ts +5 -0
  6. package/dist/adapters/entries/xhr.d.ts +5 -0
  7. package/dist/adapters/index.cjs +6 -6
  8. package/dist/cache/index.cjs +9 -9
  9. package/dist/crawler/crawler.cjs +26 -5
  10. package/dist/crawler/crawler.js +26 -5
  11. package/dist/crawler/index.cjs +40 -40
  12. package/dist/crawler.d.ts +10 -0
  13. package/dist/entries/crawler.cjs +4 -4
  14. package/dist/index.cjs +27 -27
  15. package/dist/index.d.ts +5 -0
  16. package/dist/internal/agents/index.cjs +10 -10
  17. package/dist/platform/browser.d.ts +5 -0
  18. package/dist/platform/bun.d.ts +5 -0
  19. package/dist/platform/deno.d.ts +5 -0
  20. package/dist/platform/node.d.ts +5 -0
  21. package/dist/platform/react-native.d.ts +5 -0
  22. package/dist/platform/worker.d.ts +5 -0
  23. package/dist/proxy/index.cjs +4 -4
  24. package/dist/proxy/manager.cjs +1 -1
  25. package/dist/proxy/manager.js +1 -1
  26. package/dist/queue/index.cjs +8 -8
  27. package/dist/queue/queue.cjs +3 -1
  28. package/dist/queue/queue.js +3 -1
  29. package/dist/responses/universal/index.cjs +11 -11
  30. package/dist/wget/asset-extractor.cjs +556 -0
  31. package/dist/wget/asset-extractor.js +553 -0
  32. package/dist/wget/asset-organizer.cjs +230 -0
  33. package/dist/wget/asset-organizer.js +227 -0
  34. package/dist/wget/download-cache.cjs +221 -0
  35. package/dist/wget/download-cache.js +218 -0
  36. package/dist/wget/downloader.cjs +607 -0
  37. package/dist/wget/downloader.js +604 -0
  38. package/dist/wget/file-writer.cjs +349 -0
  39. package/dist/wget/file-writer.js +346 -0
  40. package/dist/wget/filter-lists.cjs +1330 -0
  41. package/dist/wget/filter-lists.js +1330 -0
  42. package/dist/wget/index.cjs +633 -0
  43. package/dist/wget/index.d.ts +8486 -0
  44. package/dist/wget/index.js +614 -0
  45. package/dist/wget/link-converter.cjs +297 -0
  46. package/dist/wget/link-converter.js +294 -0
  47. package/dist/wget/progress.cjs +271 -0
  48. package/dist/wget/progress.js +266 -0
  49. package/dist/wget/resume.cjs +166 -0
  50. package/dist/wget/resume.js +163 -0
  51. package/dist/wget/robots.cjs +303 -0
  52. package/dist/wget/robots.js +300 -0
  53. package/dist/wget/types.cjs +200 -0
  54. package/dist/wget/types.js +197 -0
  55. package/dist/wget/url-filter.cjs +351 -0
  56. package/dist/wget/url-filter.js +348 -0
  57. package/package.json +6 -1
@@ -0,0 +1,614 @@
1
+ export { WgetError } from './types.js';
2
+ export { AssetExtractor } from './asset-extractor.js';
3
+ export { UrlFilter } from './url-filter.js';
4
+ export { FileWriter } from './file-writer.js';
5
+ export { RobotsHandler } from './robots.js';
6
+ export { ResumeHandler } from './resume.js';
7
+ export { ProgressReporter, ProgressTracker, parseSize } from './progress.js';
8
+ export { LinkConverter } from './link-converter.js';
9
+ export { Downloader } from './downloader.js';
10
+ export { AssetOrganizer, DEFAULT_ASSET_FOLDERS } from './asset-organizer.js';
11
+ export { DownloadCache } from './download-cache.js';
12
+ export {
13
+ EXECUTABLE_EXTENSIONS,
14
+ ARCHIVE_EXTENSIONS,
15
+ DOCUMENT_EXTENSIONS,
16
+ IMAGE_EXTENSIONS,
17
+ VIDEO_EXTENSIONS,
18
+ AUDIO_EXTENSIONS,
19
+ FONT_EXTENSIONS,
20
+ WEB_ASSET_EXTENSIONS,
21
+ DATA_EXTENSIONS,
22
+ EXECUTABLE_MIME_TYPES,
23
+ ARCHIVE_MIME_TYPES,
24
+ DOCUMENT_MIME_TYPES,
25
+ IMAGE_MIME_TYPES,
26
+ VIDEO_MIME_TYPES,
27
+ AUDIO_MIME_TYPES,
28
+ FONT_MIME_TYPES,
29
+ WEB_ASSET_MIME_TYPES,
30
+ DATA_MIME_TYPES,
31
+ SAFE_WEB_PRESET,
32
+ DOCUMENTS_ONLY_PRESET,
33
+ NO_MEDIA_PRESET,
34
+ MINIMAL_MIRROR_PRESET,
35
+ TEXT_ONLY_PRESET
36
+ } from './filter-lists.js';
37
+ import { Downloader } from './downloader.js';
38
+ import rezo from '../index.js';
39
+ import { promises as fs } from "node:fs";
40
+ import { flattenWgetOptions } from './types.js';
41
+ import { parseProxyString } from '../proxy/parse.js';
42
+
43
+ export class Wget {
44
+ options = {};
45
+ downloader = null;
46
+ eventHandlers = new Map;
47
+ http;
48
+ constructor(options = {}) {
49
+ this.options = JSON.parse(JSON.stringify(options));
50
+ this.options.proxy = this.parseProxyString(this.options.proxy || "");
51
+ const timeout = this.options.download?.timeout ?? 30;
52
+ const maxRedirects = this.options.http?.maxRedirects;
53
+ const userAgent = this.options.http?.userAgent ?? "Rezo-Wget/1.0";
54
+ const headers = this.options.http?.headers ?? {};
55
+ const noCheckCert = this.options.http?.noCheckCertificate ?? false;
56
+ const proxyConfig = this.options.proxy;
57
+ const concurrency = this.options.download?.concurrency ?? 1;
58
+ const wait = this.options.download?.wait ?? 0;
59
+ this.http = rezo.create({
60
+ timeout: timeout * 1000,
61
+ maxRedirects,
62
+ headers: {
63
+ "User-Agent": userAgent,
64
+ ...headers
65
+ },
66
+ rejectUnauthorized: !noCheckCert,
67
+ proxy: proxyConfig,
68
+ queueOptions: {
69
+ enable: true,
70
+ options: {
71
+ concurrency,
72
+ ...wait > 0 ? {
73
+ interval: wait * 1000,
74
+ intervalCap: 1
75
+ } : {}
76
+ }
77
+ }
78
+ });
79
+ }
80
+ parseProxyString(proxy) {
81
+ if (!proxy)
82
+ return;
83
+ if (typeof proxy === "string") {
84
+ return parseProxyString(proxy) || undefined;
85
+ }
86
+ return proxy;
87
+ }
88
+ on(event, handler) {
89
+ if (!this.eventHandlers.has(event)) {
90
+ this.eventHandlers.set(event, []);
91
+ }
92
+ this.eventHandlers.get(event).push(handler);
93
+ return this;
94
+ }
95
+ off(event, handler) {
96
+ const handlers = this.eventHandlers.get(event);
97
+ if (handlers) {
98
+ const index = handlers.indexOf(handler);
99
+ if (index !== -1) {
100
+ handlers.splice(index, 1);
101
+ }
102
+ }
103
+ return this;
104
+ }
105
+ onProgress(callback) {
106
+ return this.on("progress", callback);
107
+ }
108
+ onDownload(callback) {
109
+ return this.on("complete", callback);
110
+ }
111
+ onError(callback) {
112
+ return this.on("error", (event) => callback(event.error, event.url));
113
+ }
114
+ onComplete(callback) {
115
+ return this.on("finish", (event) => callback(event.stats));
116
+ }
117
+ async get(url, options) {
118
+ const mergedOptions = this.mergeOptions(this.options, options);
119
+ const flatOptions = flattenWgetOptions(mergedOptions);
120
+ this.downloader = new Downloader(flatOptions, this.http);
121
+ this.attachEventHandlers();
122
+ return this.downloader.download(url);
123
+ }
124
+ async getAll(urls, options) {
125
+ const mergedOptions = this.mergeOptions(this.options, options);
126
+ const flatOptions = flattenWgetOptions(mergedOptions);
127
+ this.downloader = new Downloader(flatOptions, this.http);
128
+ this.attachEventHandlers();
129
+ return this.downloader.download(urls);
130
+ }
131
+ async mirror(url, options) {
132
+ const mirrorOptions = {
133
+ recursive: {
134
+ enabled: true,
135
+ mirror: true,
136
+ depth: 1 / 0
137
+ },
138
+ download: {
139
+ timestamping: true
140
+ },
141
+ ...options
142
+ };
143
+ return this.get(url, mirrorOptions);
144
+ }
145
+ async fromFile(inputFile, options) {
146
+ const content = await fs.readFile(inputFile, "utf-8");
147
+ const urls = content.split(`
148
+ `).map((line) => line.trim()).filter((line) => line && !line.startsWith("#"));
149
+ return this.getAll(urls, options);
150
+ }
151
+ recursive(depth) {
152
+ this.options.recursive = { ...this.options.recursive, enabled: true };
153
+ if (depth !== undefined) {
154
+ this.options.recursive.depth = depth;
155
+ }
156
+ return this;
157
+ }
158
+ depth(depth) {
159
+ this.options.recursive = { ...this.options.recursive, depth };
160
+ return this;
161
+ }
162
+ pageRequisites() {
163
+ this.options.recursive = { ...this.options.recursive, pageRequisites: true };
164
+ return this;
165
+ }
166
+ convertLinks() {
167
+ this.options.recursive = { ...this.options.recursive, convertLinks: true };
168
+ return this;
169
+ }
170
+ noParent() {
171
+ this.options.filter = { ...this.options.filter, noParent: true };
172
+ return this;
173
+ }
174
+ domains(...domains) {
175
+ this.options.filter = { ...this.options.filter, domains, spanHosts: true };
176
+ return this;
177
+ }
178
+ accept(...patterns) {
179
+ this.options.filter = { ...this.options.filter, accept: patterns };
180
+ return this;
181
+ }
182
+ reject(...patterns) {
183
+ this.options.filter = { ...this.options.filter, reject: patterns };
184
+ return this;
185
+ }
186
+ wait(seconds) {
187
+ this.options.download = { ...this.options.download, wait: seconds };
188
+ return this;
189
+ }
190
+ randomWait() {
191
+ this.options.download = { ...this.options.download, randomWait: true };
192
+ return this;
193
+ }
194
+ limitRate(rate) {
195
+ this.options.download = { ...this.options.download, limitRate: rate };
196
+ return this;
197
+ }
198
+ userAgent(ua) {
199
+ this.options.http = { ...this.options.http, userAgent: ua };
200
+ return this;
201
+ }
202
+ header(name, value) {
203
+ const headers = { ...this.options.http?.headers, [name]: value };
204
+ this.options.http = { ...this.options.http, headers };
205
+ return this;
206
+ }
207
+ outputDir(dir) {
208
+ this.options.download = { ...this.options.download, outputDir: dir };
209
+ return this;
210
+ }
211
+ output(filename) {
212
+ this.options.download = { ...this.options.download, output: filename };
213
+ return this;
214
+ }
215
+ continue() {
216
+ this.options.download = { ...this.options.download, continue: true };
217
+ return this;
218
+ }
219
+ timestamping() {
220
+ this.options.download = { ...this.options.download, timestamping: true };
221
+ return this;
222
+ }
223
+ concurrency(n) {
224
+ this.options.download = { ...this.options.download, concurrency: n };
225
+ return this;
226
+ }
227
+ noRobots() {
228
+ this.options.robots = { ...this.options.robots, enabled: false };
229
+ return this;
230
+ }
231
+ quiet() {
232
+ this.options.logging = { ...this.options.logging, quiet: true };
233
+ return this;
234
+ }
235
+ verbose() {
236
+ this.options.logging = { ...this.options.logging, verbose: true };
237
+ return this;
238
+ }
239
+ debug() {
240
+ this.options.logging = { ...this.options.logging, debug: true };
241
+ return this;
242
+ }
243
+ setProxy(proxy) {
244
+ this.options.proxy = this.parseProxyString(proxy || "");
245
+ return this;
246
+ }
247
+ timeout(seconds) {
248
+ this.options.download = { ...this.options.download, timeout: seconds };
249
+ return this;
250
+ }
251
+ tries(n) {
252
+ this.options.download = { ...this.options.download, tries: n };
253
+ return this;
254
+ }
255
+ spanHosts() {
256
+ this.options.filter = { ...this.options.filter, spanHosts: true };
257
+ return this;
258
+ }
259
+ noCheckCertificate() {
260
+ this.options.http = { ...this.options.http, noCheckCertificate: true };
261
+ return this;
262
+ }
263
+ cache(enabled = true) {
264
+ this.options.cache = enabled;
265
+ return this;
266
+ }
267
+ noCache() {
268
+ this.options.cache = false;
269
+ return this;
270
+ }
271
+ organizeAssets(enabled = true) {
272
+ this.options.directories = { ...this.options.directories, organizeAssets: enabled };
273
+ return this;
274
+ }
275
+ assetFolders(folders) {
276
+ this.options.directories = { ...this.options.directories, assetFolders: folders };
277
+ return this;
278
+ }
279
+ excludeDirectories(directories) {
280
+ this.options.filter = { ...this.options.filter, excludeDirectories: directories };
281
+ return this;
282
+ }
283
+ includeDirectories(directories) {
284
+ this.options.filter = { ...this.options.filter, includeDirectories: directories };
285
+ return this;
286
+ }
287
+ excludeExtensions(extensions) {
288
+ this.options.filter = { ...this.options.filter, excludeExtensions: extensions };
289
+ return this;
290
+ }
291
+ excludeMimeTypes(mimeTypes) {
292
+ this.options.filter = { ...this.options.filter, excludeMimeTypes: mimeTypes };
293
+ return this;
294
+ }
295
+ includeTypes(types) {
296
+ this.options.filter = { ...this.options.filter, acceptAssetTypes: types };
297
+ return this;
298
+ }
299
+ excludeTypes(types) {
300
+ this.options.filter = { ...this.options.filter, rejectAssetTypes: types };
301
+ return this;
302
+ }
303
+ maxFileSize(bytes) {
304
+ this.options.filter = { ...this.options.filter, maxFileSize: bytes };
305
+ return this;
306
+ }
307
+ minFileSize(bytes) {
308
+ this.options.filter = { ...this.options.filter, minFileSize: bytes };
309
+ return this;
310
+ }
311
+ abort() {
312
+ if (this.downloader) {
313
+ this.downloader.abort();
314
+ }
315
+ }
316
+ getOptions() {
317
+ return { ...this.options };
318
+ }
319
+ getUrlMap() {
320
+ return this.downloader?.getUrlMap() || null;
321
+ }
322
+ async destroy() {
323
+ if (this.downloader) {
324
+ await this.downloader.destroy();
325
+ this.downloader = null;
326
+ }
327
+ this.eventHandlers.clear();
328
+ }
329
+ attachEventHandlers() {
330
+ if (!this.downloader)
331
+ return;
332
+ for (const [event, handlers] of Array.from(this.eventHandlers.entries())) {
333
+ for (const handler of handlers) {
334
+ this.downloader.on(event, handler);
335
+ }
336
+ }
337
+ }
338
+ mergeOptions(base, override) {
339
+ if (!override)
340
+ return { ...base };
341
+ const result = {};
342
+ const categories = [
343
+ "logging",
344
+ "download",
345
+ "directories",
346
+ "http",
347
+ "recursive",
348
+ "filter",
349
+ "robots",
350
+ "proxy",
351
+ "network",
352
+ "input",
353
+ "misc"
354
+ ];
355
+ for (const cat of categories) {
356
+ const baseVal = base[cat];
357
+ const overrideVal = override[cat];
358
+ if (baseVal || overrideVal) {
359
+ result[cat] = {
360
+ ...baseVal || {},
361
+ ...overrideVal || {}
362
+ };
363
+ }
364
+ }
365
+ return result;
366
+ }
367
+ }
368
+ export async function wget(url, options) {
369
+ const instance = new Wget(options);
370
+ return instance.get(url);
371
+ }
372
+ export async function wgetAll(urls, options) {
373
+ const instance = new Wget(options);
374
+ return instance.getAll(urls);
375
+ }
376
+ export function parseWgetArgs(args) {
377
+ const options = {
378
+ logging: {},
379
+ download: {},
380
+ directories: {},
381
+ http: {},
382
+ recursive: {},
383
+ filter: {},
384
+ robots: {},
385
+ proxy: undefined,
386
+ network: {},
387
+ input: {},
388
+ misc: {}
389
+ };
390
+ const urls = [];
391
+ for (let i = 0;i < args.length; i++) {
392
+ const arg = args[i];
393
+ if (!arg.startsWith("-")) {
394
+ urls.push(arg);
395
+ continue;
396
+ }
397
+ if (arg.includes("=")) {
398
+ const [key, value] = arg.split("=");
399
+ setOption(options, key, value);
400
+ continue;
401
+ }
402
+ switch (arg) {
403
+ case "-r":
404
+ case "--recursive":
405
+ options.recursive.enabled = true;
406
+ break;
407
+ case "-l":
408
+ case "--level":
409
+ options.recursive.depth = parseInt(args[++i], 10);
410
+ break;
411
+ case "-p":
412
+ case "--page-requisites":
413
+ options.recursive.pageRequisites = true;
414
+ break;
415
+ case "-k":
416
+ case "--convert-links":
417
+ options.recursive.convertLinks = true;
418
+ break;
419
+ case "-K":
420
+ case "--backup-converted":
421
+ options.recursive.backupConverted = true;
422
+ break;
423
+ case "-m":
424
+ case "--mirror":
425
+ options.recursive.mirror = true;
426
+ break;
427
+ case "-c":
428
+ case "--continue":
429
+ options.download.continue = true;
430
+ break;
431
+ case "-N":
432
+ case "--timestamping":
433
+ options.download.timestamping = true;
434
+ break;
435
+ case "-w":
436
+ case "--wait":
437
+ options.download.wait = parseFloat(args[++i]);
438
+ break;
439
+ case "--random-wait":
440
+ options.download.randomWait = true;
441
+ break;
442
+ case "-T":
443
+ case "--timeout":
444
+ options.download.timeout = parseInt(args[++i], 10);
445
+ break;
446
+ case "-t":
447
+ case "--tries":
448
+ options.download.tries = parseInt(args[++i], 10);
449
+ break;
450
+ case "-O":
451
+ case "--output-document":
452
+ options.download.output = args[++i];
453
+ break;
454
+ case "-P":
455
+ case "--directory-prefix":
456
+ options.download.outputDir = args[++i];
457
+ break;
458
+ case "-nc":
459
+ case "--no-clobber":
460
+ options.download.noClobber = true;
461
+ break;
462
+ case "-E":
463
+ case "--adjust-extension":
464
+ options.download.adjustExtension = true;
465
+ break;
466
+ case "-np":
467
+ case "--no-parent":
468
+ options.filter.noParent = true;
469
+ break;
470
+ case "-H":
471
+ case "--span-hosts":
472
+ options.filter.spanHosts = true;
473
+ break;
474
+ case "-A":
475
+ case "--accept":
476
+ options.filter.accept = args[++i];
477
+ break;
478
+ case "-R":
479
+ case "--reject":
480
+ options.filter.reject = args[++i];
481
+ break;
482
+ case "-D":
483
+ case "--domains":
484
+ options.filter.domains = args[++i];
485
+ break;
486
+ case "-q":
487
+ case "--quiet":
488
+ options.logging.quiet = true;
489
+ break;
490
+ case "-v":
491
+ case "--verbose":
492
+ options.logging.verbose = true;
493
+ break;
494
+ case "-d":
495
+ case "--debug":
496
+ options.logging.debug = true;
497
+ break;
498
+ case "-nv":
499
+ case "--no-verbose":
500
+ options.logging.noVerbose = true;
501
+ break;
502
+ case "-nd":
503
+ case "--no-directories":
504
+ options.directories.noDirectories = true;
505
+ break;
506
+ case "-x":
507
+ case "--force-directories":
508
+ options.directories.forceDirectories = true;
509
+ break;
510
+ case "-nH":
511
+ case "--no-host-directories":
512
+ options.directories.noHostDirectories = true;
513
+ break;
514
+ case "--no-check-certificate":
515
+ options.http.noCheckCertificate = true;
516
+ break;
517
+ case "-U":
518
+ case "--user-agent":
519
+ options.http.userAgent = args[++i];
520
+ break;
521
+ case "-i":
522
+ case "--input-file":
523
+ options.input.file = args[++i];
524
+ break;
525
+ default:
526
+ setOption(options, arg, args[++i]);
527
+ }
528
+ }
529
+ for (const key of Object.keys(options)) {
530
+ if (options[key] && Object.keys(options[key]).length === 0) {
531
+ delete options[key];
532
+ }
533
+ }
534
+ return { options, urls };
535
+ }
536
+ function setOption(options, key, value) {
537
+ const cleanKey = key.replace(/^--?/, "");
538
+ options.download = options.download || {};
539
+ options.recursive = options.recursive || {};
540
+ options.directories = options.directories || {};
541
+ options.http = options.http || {};
542
+ options.filter = options.filter || {};
543
+ options.proxy = options.proxy;
544
+ options.input = options.input || {};
545
+ switch (cleanKey) {
546
+ case "limit-rate":
547
+ options.download.limitRate = value;
548
+ break;
549
+ case "wait":
550
+ options.download.wait = parseFloat(value);
551
+ break;
552
+ case "timeout":
553
+ options.download.timeout = parseInt(value, 10);
554
+ break;
555
+ case "tries":
556
+ options.download.tries = parseInt(value, 10);
557
+ break;
558
+ case "quota":
559
+ options.download.quota = value;
560
+ break;
561
+ case "output-document":
562
+ options.download.output = value;
563
+ break;
564
+ case "directory-prefix":
565
+ options.download.outputDir = value;
566
+ break;
567
+ case "level":
568
+ options.recursive.depth = parseInt(value, 10);
569
+ break;
570
+ case "cut-dirs":
571
+ options.directories.cutDirs = parseInt(value, 10);
572
+ break;
573
+ case "user-agent":
574
+ options.http.userAgent = value;
575
+ break;
576
+ case "referer":
577
+ options.http.referer = value;
578
+ break;
579
+ case "http-user":
580
+ options.http.user = value;
581
+ break;
582
+ case "http-password":
583
+ options.http.password = value;
584
+ break;
585
+ case "accept":
586
+ options.filter.accept = value;
587
+ break;
588
+ case "reject":
589
+ options.filter.reject = value;
590
+ break;
591
+ case "domains":
592
+ options.filter.domains = value;
593
+ break;
594
+ case "exclude-domains":
595
+ options.filter.excludeDomains = value.split(",");
596
+ break;
597
+ case "include-directories":
598
+ options.filter.includeDirectories = value.split(",");
599
+ break;
600
+ case "exclude-directories":
601
+ options.filter.excludeDirectories = value.split(",");
602
+ break;
603
+ case "proxy":
604
+ options.proxy = value;
605
+ break;
606
+ case "input-file":
607
+ options.input.file = value;
608
+ break;
609
+ case "base":
610
+ options.input.base = value;
611
+ break;
612
+ }
613
+ }
614
+ export default Wget;