wayback-dl 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # wayback-dl
2
+
3
+ Download archived websites from the [Internet Archive Wayback Machine](https://web.archive.org/).
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install -g wayback-dl
9
+ ```
10
+
11
+ Or run without installing:
12
+
13
+ ```bash
14
+ npx wayback-dl https://example.com
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```bash
20
+ wayback-dl <url> [options]
21
+ ```
22
+
23
+ ### Examples
24
+
25
+ ```bash
26
+ # Download a site to ./websites/example.com/
27
+ wayback-dl https://example.com
28
+
29
+ # Download to a custom directory
30
+ wayback-dl https://example.com -d ./my-archive/
31
+
32
+ # Use 20 concurrent downloads
33
+ wayback-dl https://example.com -c 20
34
+
35
+ # Only download images
36
+ wayback-dl https://example.com -o "/\.(jpg|png|gif)$/i"
37
+
38
+ # Exclude certain paths
39
+ wayback-dl https://example.com -x "/ads/"
40
+
41
+ # Restrict by timestamp (YYYYMMDDHHmmss)
42
+ wayback-dl https://example.com -f 20060101000000 -t 20101231235959
43
+
44
+ # List files as JSON without downloading
45
+ wayback-dl https://example.com -l
46
+ ```
47
+
48
+ ### Options
49
+
50
+ | Flag | Description |
51
+ |------|-------------|
52
+ | `-d, --directory <path>` | Output directory (default: `./websites/{domain}/`) |
53
+ | `-c, --concurrency <n>` | Parallel downloads (default: 5) |
54
+ | `-f, --from <timestamp>` | Only snapshots from this timestamp |
55
+ | `-t, --to <timestamp>` | Only snapshots up to this timestamp |
56
+ | `-o, --only <filter>` | Only download URLs matching filter (use `/regex/` for regex) |
57
+ | `-x, --exclude <filter>` | Exclude URLs matching filter |
58
+ | `-a, --all` | Include non-200 responses (errors, redirects) |
59
+ | `-s, --all-timestamps` | Download all snapshot versions |
60
+ | `-e, --exact-url` | Download only the exact URL, not the full site |
61
+ | `-l, --list` | List files as JSON without downloading |
62
+ | `--max-pages <n>` | Max CDX API pages (default: 100) |
63
+ | `--overwrite` | Re-download existing files |
64
+ | `--retry <n>` | Max retries per file (default: 5) |
65
+ | `--no-color` | Disable colored output |
66
+
67
+ ## Requirements
68
+
69
+ - Node.js 18+
70
+
71
+ ## License
72
+
73
+ MIT
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+
3
+ import '../dist/cli.js';
package/dist/cdx.d.ts ADDED
@@ -0,0 +1,24 @@
1
+ import type { Snapshot } from './types.js';
2
+ export interface CdxOptions {
3
+ url: string;
4
+ fromTimestamp?: number;
5
+ toTimestamp?: number;
6
+ all?: boolean;
7
+ page?: number;
8
+ }
9
+ /**
10
+ * Fetch a single page of snapshots from the CDX API.
11
+ */
12
+ export declare function fetchCdxPage(options: CdxOptions): Promise<Snapshot[]>;
13
+ /**
14
+ * Fetch all snapshots for a URL, paginating through the CDX API.
15
+ */
16
+ export declare function fetchAllSnapshots(baseUrl: string, options?: {
17
+ exactUrl?: boolean;
18
+ fromTimestamp?: number;
19
+ toTimestamp?: number;
20
+ all?: boolean;
21
+ maxPages?: number;
22
+ onPage?: (page: number, count: number) => void;
23
+ }): Promise<Snapshot[]>;
24
+ //# sourceMappingURL=cdx.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cdx.d.ts","sourceRoot":"","sources":["../src/cdx.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAK3C,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AA2BD;;GAEG;AACH,wBAAsB,YAAY,CAAC,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CA4D3E;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,MAAM,EACf,OAAO,GAAE;IACP,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC3C,GACL,OAAO,CAAC,QAAQ,EAAE,CAAC,CAkCrB"}
package/dist/cdx.js ADDED
@@ -0,0 +1,113 @@
1
+ import { retry } from './retry.js';
2
+ const CDX_BASE = 'https://web.archive.org/cdx/search/xd';
3
+ function buildCdxUrl(options) {
4
+ const params = new URLSearchParams({
5
+ output: 'json',
6
+ url: options.url,
7
+ fl: 'timestamp,original',
8
+ collapse: 'digest',
9
+ gzip: 'false',
10
+ });
11
+ if (!options.all) {
12
+ params.set('filter', 'statuscode:200');
13
+ }
14
+ if (options.fromTimestamp !== undefined && options.fromTimestamp !== 0) {
15
+ params.set('from', String(options.fromTimestamp));
16
+ }
17
+ if (options.toTimestamp !== undefined && options.toTimestamp !== 0) {
18
+ params.set('to', String(options.toTimestamp));
19
+ }
20
+ if (options.page !== undefined) {
21
+ params.set('page', String(options.page));
22
+ }
23
+ return `${CDX_BASE}?${params.toString()}`;
24
+ }
25
+ /**
26
+ * Fetch a single page of snapshots from the CDX API.
27
+ */
28
+ export async function fetchCdxPage(options) {
29
+ const url = buildCdxUrl(options);
30
+ let response;
31
+ try {
32
+ response = await retry(async () => {
33
+ const res = await fetch(url);
34
+ if (res.status === 400 || res.status === 404) {
35
+ return '';
36
+ }
37
+ if (!res.ok) {
38
+ throw new Error(`CDX API error: ${res.status} ${res.statusText}`);
39
+ }
40
+ return res.text();
41
+ }, {
42
+ retries: 5,
43
+ baseDelay: 1000,
44
+ maxDelay: 30000,
45
+ });
46
+ }
47
+ catch {
48
+ return [];
49
+ }
50
+ if (response === '') {
51
+ return [];
52
+ }
53
+ let json;
54
+ try {
55
+ json = JSON.parse(response);
56
+ }
57
+ catch {
58
+ return [];
59
+ }
60
+ if (!Array.isArray(json) || json.length === 0) {
61
+ return [];
62
+ }
63
+ const header = json[0];
64
+ if (Array.isArray(header) &&
65
+ header[0] === 'timestamp' &&
66
+ header[1] === 'original') {
67
+ json.shift();
68
+ }
69
+ const snapshots = [];
70
+ for (const row of json) {
71
+ if (Array.isArray(row) && row.length >= 2) {
72
+ snapshots.push({
73
+ timestamp: String(row[0]),
74
+ url: String(row[1]),
75
+ });
76
+ }
77
+ }
78
+ return snapshots;
79
+ }
80
+ /**
81
+ * Fetch all snapshots for a URL, paginating through the CDX API.
82
+ */
83
+ export async function fetchAllSnapshots(baseUrl, options = {}) {
84
+ const maxPages = options.maxPages ?? 100;
85
+ const all = [];
86
+ const baseOptions = {
87
+ url: baseUrl,
88
+ fromTimestamp: options.fromTimestamp,
89
+ toTimestamp: options.toTimestamp,
90
+ all: options.all,
91
+ };
92
+ const firstPage = await fetchCdxPage(baseOptions);
93
+ all.push(...firstPage);
94
+ options.onPage?.(0, firstPage.length);
95
+ if (options.exactUrl || firstPage.length === 0) {
96
+ return all;
97
+ }
98
+ const wildcardUrl = baseUrl.endsWith('/') ? `${baseUrl}*` : `${baseUrl}/*`;
99
+ for (let page = 0; page < maxPages; page++) {
100
+ const pageSnapshots = await fetchCdxPage({
101
+ ...baseOptions,
102
+ url: wildcardUrl,
103
+ page,
104
+ });
105
+ if (pageSnapshots.length === 0) {
106
+ break;
107
+ }
108
+ all.push(...pageSnapshots);
109
+ options.onPage?.(page + 1, pageSnapshots.length);
110
+ }
111
+ return all;
112
+ }
113
+ //# sourceMappingURL=cdx.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cdx.js","sourceRoot":"","sources":["../src/cdx.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAEnC,MAAM,QAAQ,GAAG,uCAAuC,CAAC;AAUzD,SAAS,WAAW,CAAC,OAAmB;IACtC,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;QACjC,MAAM,EAAE,MAAM;QACd,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,EAAE,EAAE,oBAAoB;QACxB,QAAQ,EAAE,QAAQ;QAClB,IAAI,EAAE,OAAO;KACd,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;QACjB,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;IACzC,CAAC;IACD,IAAI,OAAO,CAAC,aAAa,KAAK,SAAS,IAAI,OAAO,CAAC,aAAa,KAAK,CAAC,EAAE,CAAC;QACvE,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC;IACpD,CAAC;IACD,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,IAAI,OAAO,CAAC,WAAW,KAAK,CAAC,EAAE,CAAC;QACnE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC;IAChD,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC/B,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO,GAAG,QAAQ,IAAI,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;AAC5C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,OAAmB;IACpD,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAEjC,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,KAAK,CACpB,KAAK,IAAI,EAAE;YACT,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAC7B,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC7C,OAAO,EAAE,CAAC;YACZ,CAAC;YACD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,kBAAkB,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;YACpE,CAAC;YACD,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;QACpB,CAAC,EACD;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,IAAI;YACf,QAAQ,EAAE,KAAK;SAChB,CACF,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,QAAQ,KAAK,EAAE,EAAE,CAAC;QACpB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,IAAa,CAAC;IAClB,IAAI,CAAC;QACH,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACvB,IACE,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;QACrB,MAAM,CAAC,CAAC,CAAC,KAAK,WAAW;QACzB,MAAM,CAAC,CAAC,CAAC,KAAK,UAAU,EACxB,CAAC;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC1C,SAAS,CAAC,IAAI,CAAC;gBACb,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBACzB,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aACpB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,OAAe,EACf,UAOI,EAAE;IAEN,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IACzC,MAAM,GAAG,GAAe,EAAE,CAAC;IAE3B,MAAM,WAAW,GAA6B;QAC5C,GAAG,EAAE,OAAO;QACZ,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,GAAG,EAAE,OAAO,CAAC,GAAG;KACjB,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,CAAC;IAClD,GAAG,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;IACvB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;IAEtC,IAAI,OAAO,CAAC,QAAQ,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/C,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,CAAC;IAC3E,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC;QAC3C,MAAM,aAAa,GAAG,MAAM,YAAY,CAAC;YACvC,GAAG,WAAW;YACd,GAAG,EAAE,WAAW;YAChB,IAAI;SACL,CAAC,CAAC;QACH,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,MAAM;QACR,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAC3B,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,GAAG,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC"}
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
package/dist/cli.js ADDED
@@ -0,0 +1,105 @@
1
+ import { createRequire } from 'node:module';
2
+ import { Command } from 'commander';
3
+ import chalk from 'chalk';
4
+ import { download, listFiles } from './downloader.js';
5
+ import { createProgressBar, createSnapshotProgressBar, printSummary } from './progress.js';
6
+ import { getBackupName } from './utils.js';
7
+ const require = createRequire(import.meta.url);
8
+ const pkg = require('../package.json');
9
+ const program = new Command();
10
+ program
11
+ .name('wayback-dl')
12
+ .description('Download archived websites from the Internet Archive Wayback Machine')
13
+ .version(pkg.version)
14
+ .argument('<url>', 'Website URL to download (e.g. https://example.com)')
15
+ .option('-d, --directory <path>', 'Output directory (default: ./websites/{domain}/)')
16
+ .option('-c, --concurrency <n>', 'Number of parallel downloads', '5')
17
+ .option('-f, --from <timestamp>', 'Only snapshots from this timestamp (e.g. 20060716231334)')
18
+ .option('-t, --to <timestamp>', 'Only snapshots up to this timestamp')
19
+ .option('-o, --only <filter>', 'Only download URLs matching filter (use /pattern/ for regex)')
20
+ .option('-x, --exclude <filter>', 'Exclude URLs matching filter')
21
+ .option('-a, --all', 'Include non-200 responses (errors, redirects)')
22
+ .option('-s, --all-timestamps', 'Download all snapshot versions')
23
+ .option('-e, --exact-url', 'Download only the exact URL, not the full site')
24
+ .option('-l, --list', 'List files as JSON without downloading')
25
+ .option('--max-pages <n>', 'Max CDX API pages to fetch', '100')
26
+ .option('--overwrite', 'Re-download existing files')
27
+ .option('--retry <n>', 'Max retries per file', '5')
28
+ .option('--no-color', 'Disable colored output')
29
+ .action(async (url, opts) => {
30
+ const useColors = opts.color !== false;
31
+ const options = {
32
+ baseUrl: url,
33
+ directory: opts.directory,
34
+ exactUrl: opts.exactUrl === true,
35
+ allTimestamps: opts.allTimestamps === true,
36
+ fromTimestamp: opts.from ? parseInt(String(opts.from), 10) : undefined,
37
+ toTimestamp: opts.to ? parseInt(String(opts.to), 10) : undefined,
38
+ onlyFilter: opts.only,
39
+ excludeFilter: opts.exclude,
40
+ all: opts.all === true,
41
+ maxPages: parseInt(String(opts.maxPages), 10) || 100,
42
+ concurrency: parseInt(String(opts.concurrency), 10) || 5,
43
+ overwrite: opts.overwrite === true,
44
+ maxRetries: parseInt(String(opts.retry), 10) || 5,
45
+ };
46
+ try {
47
+ if (opts.list === true) {
48
+ const files = await listFiles(options);
49
+ console.log(JSON.stringify(files, null, 2));
50
+ return;
51
+ }
52
+ const backupPath = options.directory ?? `websites/${getBackupName(url)}/`;
53
+ console.log(chalk.cyan(`Downloading ${chalk.bold(url)} to ${chalk.bold(backupPath)} from Wayback Machine.`));
54
+ console.log();
55
+ const snapshotBar = createSnapshotProgressBar(useColors);
56
+ snapshotBar.start();
57
+ const snapshotBarUpdate = (page, count) => {
58
+ snapshotBar.update(`... page ${page + 1} (${count} snapshots)`);
59
+ };
60
+ const progress = createProgressBar(useColors);
61
+ const { files, stats } = await download({
62
+ ...options,
63
+ onSnapshotPage: snapshotBarUpdate,
64
+ onFileListReady: (count) => {
65
+ snapshotBar.stop();
66
+ if (count > 0) {
67
+ console.log(chalk.cyan(`${count} files to download.`));
68
+ console.log();
69
+ progress.start(count);
70
+ }
71
+ },
72
+ onProgress: (s) => {
73
+ progress.update(s.downloaded + s.skipped + s.errors, s.downloaded, s.skipped, s.errors);
74
+ },
75
+ });
76
+ if (files.length === 0) {
77
+ snapshotBar.stop();
78
+ console.log(chalk.yellow('No files to download.'));
79
+ console.log(chalk.gray('Possible reasons:'));
80
+ console.log(chalk.gray(' • Site is not in Wayback Machine Archive.'));
81
+ if (options.fromTimestamp) {
82
+ console.log(chalk.gray(' • From timestamp too far in the future.'));
83
+ }
84
+ if (options.toTimestamp) {
85
+ console.log(chalk.gray(' • To timestamp too far in the past.'));
86
+ }
87
+ if (options.onlyFilter) {
88
+ console.log(chalk.gray(` • Only filter too restrictive: ${options.onlyFilter}`));
89
+ }
90
+ if (options.excludeFilter) {
91
+ console.log(chalk.gray(` • Exclude filter too wide: ${options.excludeFilter}`));
92
+ }
93
+ return;
94
+ }
95
+ progress.stop();
96
+ printSummary(stats);
97
+ }
98
+ catch (err) {
99
+ const message = err instanceof Error ? err.message : String(err);
100
+ console.error(chalk.red(`\nError: ${message}`));
101
+ process.exit(1);
102
+ }
103
+ });
104
+ program.parse();
105
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,yBAAyB,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC3F,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,GAAG,GAAG,OAAO,CAAC,iBAAiB,CAAwB,CAAC;AAE9D,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,sEAAsE,CAAC;KACnF,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC;KACpB,QAAQ,CAAC,OAAO,EAAE,oDAAoD,CAAC;KACvE,MAAM,CAAC,wBAAwB,EAAE,kDAAkD,CAAC;KACpF,MAAM,CAAC,uBAAuB,EAAE,8BAA8B,EAAE,GAAG,CAAC;KACpE,MAAM,CAAC,wBAAwB,EAAE,0DAA0D,CAAC;KAC5F,MAAM,CAAC,sBAAsB,EAAE,qCAAqC,CAAC;KACrE,MAAM,CAAC,qBAAqB,EAAE,8DAA8D,CAAC;KAC7F,MAAM,CAAC,wBAAwB,EAAE,8BAA8B,CAAC;KAChE,MAAM,CAAC,WAAW,EAAE,+CAA+C,CAAC;KACpE,MAAM,CAAC,sBAAsB,EAAE,gCAAgC,CAAC;KAChE,MAAM,CAAC,iBAAiB,EAAE,gDAAgD,CAAC;KAC3E,MAAM,CAAC,YAAY,EAAE,wCAAwC,CAAC;KAC9D,MAAM,CAAC,iBAAiB,EAAE,4BAA4B,EAAE,KAAK,CAAC;KAC9D,MAAM,CAAC,aAAa,EAAE,4BAA4B,CAAC;KACnD,MAAM,CAAC,aAAa,EAAE,sBAAsB,EAAE,GAAG,CAAC;KAClD,MAAM,CAAC,YAAY,EAAE,wBAAwB,CAAC;KAC9C,MAAM,CAAC,KAAK,EAAE,GAAW,EAAE,IAAkD,EAAE,EAAE;IAChF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,KAAK,KAAK,CAAC;IAEvC,MAAM,OAAO,GAAG;QACd,OAAO,EAAE,GAAG;QACZ,SAAS,EAAE,IAAI,CAAC,SAA+B;QAC/C,QAAQ,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;QAChC,aAAa,EAAE,IAAI,CAAC,aAAa,KAAK,IAAI;QAC1C,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;QACtE,WAAW,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;QAChE,UAAU,EAAE,IAAI,CAAC,IAA0B;QAC3C,aAAa,EAAE,IAAI,CAAC,OAA6B;QACjD,GAAG,EAAE,IAAI,CAAC,GAAG,KAAK,IAAI;QACtB,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG;QACpD,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC;QACxD,SAAS,EAAE,IAAI,CAAC,SAAS,KAAK,IAAI;QAClC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC;KAClD,CAAC;IAEF,IAAI,CAAC;QACH,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,OAAO,CAAC,SAAS,IAAI,YAAY,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;QAC1E,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,wBAAwB,CAAC,CAChG,CAAC;QACF,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,MAAM,WAAW,GAAG,yBAAyB,CAAC,SAAS,CAAC,CAAC;QACzD,WAAW,CAAC,KAAK,EAAE,CAAC;QACpB,MAAM,iBAAiB,GAAG,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;YACxD,WAAW,CAAC,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC,KAAK,KAAK,aAAa,CAAC,CAAC;QAClE,CAAC,CAAC;QAEF,MAAM,QAAQ,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAE9C,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,MAAM,QAAQ,CAAC;YACtC,GAAG,OAAO;YACV,cAAc,EAAE,iBAAiB;YACjC,eAAe,EAAE,CAAC,KAAK,EAAE,EAAE;gBACzB,WAAW,CAAC,IAAI,EAAE,CAAC;gBACnB,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;oBACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,qBAAqB,CAAC,CAAC,CAAC;oBACvD,OAAO,CAAC,GAAG,EAAE,CAAC;oBACd,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YACD,UAAU,EAAE,CAAC,CAAC,EAAE,EAAE;gBAChB,QAAQ,CAAC,MAAM,CACb,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,MAAM,EACnC,CAAC,CAAC,UAAU,EACZ,CAAC,CAAC,OAAO,EACT,CAAC,CAAC,MAAM,CACT,CAAC;YACJ,CAAC;SACF,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,WAAW,CAAC,IAAI,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,uBAAuB,CAAC,CAAC,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC,CAAC;YACvE,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC,CAAC;YACvE,CAAC;YACD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;gBACxB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC,CAAC;YACnE,CAAC;YACD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,oCAAoC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YACpF,CAAC;YACD,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;gBAC1B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,gCAAgC,OAAO,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;YACnF,CAAC;YACD,OAAO;QACT,CAAC;QAED,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChB,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,OAAO,EAAE,CAAC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,19 @@
1
+ import type { FileToDownload, DownloadOptions } from './types.js';
2
+ import type { ProgressStats } from './progress.js';
3
+ export interface DownloadResult {
4
+ files: FileToDownload[];
5
+ stats: ProgressStats;
6
+ }
7
+ export type ProgressCallback = (stats: {
8
+ downloaded: number;
9
+ skipped: number;
10
+ errors: number;
11
+ }) => void;
12
+ export type SnapshotPageCallback = (page: number, count: number) => void;
13
+ export declare function download(options: DownloadOptions & {
14
+ onProgress?: ProgressCallback;
15
+ onSnapshotPage?: SnapshotPageCallback;
16
+ onFileListReady?: (count: number) => void;
17
+ }): Promise<DownloadResult>;
18
+ export declare function listFiles(options: DownloadOptions): Promise<FileToDownload[]>;
19
+ //# sourceMappingURL=downloader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"downloader.d.ts","sourceRoot":"","sources":["../src/downloader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAY,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAU5E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAEnD,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,cAAc,EAAE,CAAC;IACxB,KAAK,EAAE,aAAa,CAAC;CACtB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE;IACrC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,KAAK,IAAI,CAAC;AAEX,MAAM,MAAM,oBAAoB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;AAyDzE,wBAAsB,QAAQ,CAC5B,OAAO,EAAE,eAAe,GAAG;IACzB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,cAAc,CAAC,EAAE,oBAAoB,CAAC;IACtC,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC3C,GACA,OAAO,CAAC,cAAc,CAAC,CAmEzB;AAED,wBAAsB,SAAS,CAAC,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAcnF"}
@@ -0,0 +1,118 @@
1
+ import PQueue from 'p-queue';
2
+ import { fetchAllSnapshots } from './cdx.js';
3
+ import { downloadFile } from './file-manager.js';
4
+ import { getBackupName, extractFileId, tidyBytes, matchOnlyFilter, matchExcludeFilter, } from './utils.js';
5
+ function getBackupPath(directory, baseUrl) {
6
+ if (directory) {
7
+ return directory.endsWith('/') ? directory : `${directory}/`;
8
+ }
9
+ return `websites/${getBackupName(baseUrl)}/`;
10
+ }
11
+ function curateFileList(snapshots, options) {
12
+ const map = new Map();
13
+ for (const { timestamp, url } of snapshots) {
14
+ const fileId = extractFileId(url);
15
+ if (fileId === null) {
16
+ continue;
17
+ }
18
+ const tidiedId = fileId === '' ? fileId : tidyBytes(fileId);
19
+ if (matchExcludeFilter(url, options.excludeFilter)) {
20
+ continue;
21
+ }
22
+ if (!matchOnlyFilter(url, options.onlyFilter)) {
23
+ continue;
24
+ }
25
+ const key = options.allTimestamps ? `${timestamp}/${tidiedId}` : tidiedId;
26
+ const existing = map.get(key);
27
+ if (existing) {
28
+ if (!options.allTimestamps && existing.timestamp < timestamp) {
29
+ map.set(key, { fileUrl: url, timestamp });
30
+ }
31
+ }
32
+ else {
33
+ map.set(key, { fileUrl: url, timestamp });
34
+ }
35
+ }
36
+ const result = [];
37
+ for (const [key, { fileUrl, timestamp }] of map) {
38
+ result.push({
39
+ fileId: key,
40
+ fileUrl,
41
+ timestamp,
42
+ });
43
+ }
44
+ return options.allTimestamps
45
+ ? result
46
+ : result.sort((a, b) => (b.timestamp > a.timestamp ? 1 : -1));
47
+ }
48
+ export async function download(options) {
49
+ const backupPath = getBackupPath(options.directory, options.baseUrl);
50
+ const concurrency = options.concurrency ?? 5;
51
+ const maxRetries = options.maxRetries ?? 5;
52
+ const snapshots = await fetchAllSnapshots(options.baseUrl, {
53
+ exactUrl: options.exactUrl,
54
+ fromTimestamp: options.fromTimestamp,
55
+ toTimestamp: options.toTimestamp,
56
+ all: options.all,
57
+ maxPages: options.maxPages,
58
+ onPage: options.onSnapshotPage,
59
+ });
60
+ const files = curateFileList(snapshots, {
61
+ onlyFilter: options.onlyFilter,
62
+ excludeFilter: options.excludeFilter,
63
+ allTimestamps: options.allTimestamps,
64
+ });
65
+ const stats = {
66
+ total: files.length,
67
+ downloaded: 0,
68
+ skipped: 0,
69
+ errors: 0,
70
+ retries: 0,
71
+ durationMs: 0,
72
+ };
73
+ options.onFileListReady?.(files.length);
74
+ const startTime = Date.now();
75
+ const queue = new PQueue({ concurrency });
76
+ await Promise.all(files.map((file) => queue.add(async () => {
77
+ const result = await downloadFile(file, {
78
+ backupPath,
79
+ overwrite: options.overwrite,
80
+ maxRetries,
81
+ saveErrors: options.all,
82
+ onRetry: () => {
83
+ stats.retries++;
84
+ },
85
+ });
86
+ if (result.status === 'downloaded') {
87
+ stats.downloaded++;
88
+ }
89
+ else if (result.status === 'skipped') {
90
+ stats.skipped++;
91
+ }
92
+ else {
93
+ stats.errors++;
94
+ }
95
+ options.onProgress?.({
96
+ downloaded: stats.downloaded,
97
+ skipped: stats.skipped,
98
+ errors: stats.errors,
99
+ });
100
+ })));
101
+ stats.durationMs = Date.now() - startTime;
102
+ return { files, stats };
103
+ }
104
+ export async function listFiles(options) {
105
+ const snapshots = await fetchAllSnapshots(options.baseUrl, {
106
+ exactUrl: options.exactUrl,
107
+ fromTimestamp: options.fromTimestamp,
108
+ toTimestamp: options.toTimestamp,
109
+ all: options.all,
110
+ maxPages: options.maxPages,
111
+ });
112
+ return curateFileList(snapshots, {
113
+ onlyFilter: options.onlyFilter,
114
+ excludeFilter: options.excludeFilter,
115
+ allTimestamps: options.allTimestamps,
116
+ });
117
+ }
118
+ //# sourceMappingURL=downloader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"downloader.js","sourceRoot":"","sources":["../src/downloader.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAE7B,OAAO,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EACL,aAAa,EACb,aAAa,EACb,SAAS,EACT,eAAe,EACf,kBAAkB,GACnB,MAAM,YAAY,CAAC;AAgBpB,SAAS,aAAa,CAAC,SAA6B,EAAE,OAAe;IACnE,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,CAAC;IAC/D,CAAC;IACD,OAAO,YAAY,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC;AAC/C,CAAC;AAED,SAAS,cAAc,CACrB,SAAqB,EACrB,OAGC;IAED,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkD,CAAC;IAEtE,KAAK,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,IAAI,SAAS,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACpB,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC5D,IAAI,kBAAkB,CAAC,GAAG,EAAE,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;YACnD,SAAS;QACX,CAAC;QACD,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9C,SAAS;QACX,CAAC;QAED,MAAM,GAAG,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,SAAS,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC1E,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,OAAO,CAAC,aAAa,IAAI,QAAQ,CAAC,SAAS,GAAG,SAAS,EAAE,CAAC;gBAC7D,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,KAAK,MAAM,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC;YACV,MAAM,EAAE,GAAG;YACX,OAAO;YACP,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,aAAa;QAC1B,CAAC,CAAC,MAAM;QACR,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAClE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,OAIC;IAED,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IACrE,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAC7C,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,OAAO,CAAC,OAAO,EAAE;QACzD,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,MAAM,EAAE,OAAO,CAAC,cAAc;KAC/B,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,cAAc,CAAC,SAAS,EAAE;QACtC,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,aAAa,EAAE,OAAO,CAAC,aAAa;KACrC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAkB;QAC3B,KAAK,EAAE,KAAK,CAAC,MAAM;QACnB,UAAU,EAAE,CAAC;QACb,OAAO,EAAE,CAAC;QACV,MAAM,EAAE,CAAC;QACT,OAAO,EAAE,CAAC;QACV,UAAU,EAAE,CAAC;KACd,CAAC;IAEF,OAAO,CAAC,eAAe,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAExC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC;IAE1C,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACjB,KAAK,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE;QACnB,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE;YACtC,UAAU;YACV,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,UAAU;YACV,UAAU,EAAE,OAAO,CAAC,GAAG;YACvB,OAAO,EAAE,GAAG,EAAE;gBACZ,KAAK,CAAC,OAAO,EAAE,CAAC;YAClB,CAAC;SACF,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACnC,KAAK,CAAC,UAAU,EAAE,CAAC;QACrB,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACvC,KAAK,CAAC,OAAO,EAAE,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,CAAC;QACD,OAAO,CAAC,UAAU,EAAE,CAAC;YACnB,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC,CAAC;IACL,CAAC,CAAC,CACH,CACF,CAAC;IAEF,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;IAE1C,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,OAAwB;IACtD,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,OAAO,CAAC,OAAO,EAAE;QACzD,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,WAAW,EAAE,OAAO,CAAC,WAAW;QAChC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,QAAQ,EAAE,OAAO,CAAC,QAAQ;KAC3B,CAAC,CAAC;IAEH,OAAO,cAAc,CAAC,SAAS,EAAE;QAC/B,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,aAAa,EAAE,OAAO,CAAC,aAAa;QACpC,aAAa,EAAE,OAAO,CAAC,aAAa;KACrC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { FileToDownload } from './types.js';
2
+ export interface FileManagerOptions {
3
+ backupPath: string;
4
+ overwrite?: boolean;
5
+ maxRetries?: number;
6
+ saveErrors?: boolean;
7
+ onRetry?: (url: string, error: Error, attempt: number) => void;
8
+ }
9
+ export interface DownloadResult {
10
+ status: 'downloaded' | 'skipped' | 'error';
11
+ filePath?: string;
12
+ error?: Error;
13
+ }
14
+ export declare function downloadFile(file: FileToDownload, options: FileManagerOptions): Promise<DownloadResult>;
15
+ //# sourceMappingURL=file-manager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file-manager.d.ts","sourceRoot":"","sources":["../src/file-manager.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAMjD,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CAChE;AA0DD,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,YAAY,GAAG,SAAS,GAAG,OAAO,CAAC;IAC3C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,wBAAsB,YAAY,CAChC,IAAI,EAAE,cAAc,EACpB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,cAAc,CAAC,CAsEzB"}
@@ -0,0 +1,119 @@
1
+ import { mkdir, writeFile, stat, unlink, rename } from 'node:fs/promises';
2
+ import { retry } from './retry.js';
3
+ import { sanitizePath } from './utils.js';
4
+ const WAYBACK_BASE = 'https://web.archive.org/web';
5
+ function buildWaybackUrl(timestamp, originalUrl) {
6
+ return `${WAYBACK_BASE}/${timestamp}id_/${originalUrl}`;
7
+ }
8
+ function resolvePaths(backupPath, fileId, fileUrl) {
9
+ const pathElements = fileId.split('/');
10
+ const isDir = fileUrl.endsWith('/') || !pathElements[pathElements.length - 1]?.includes('.');
11
+ let dirPath;
12
+ let filePath;
13
+ if (fileId === '') {
14
+ dirPath = backupPath;
15
+ filePath = `${backupPath}index.html`;
16
+ }
17
+ else if (isDir) {
18
+ dirPath = `${backupPath}${pathElements.join('/')}`;
19
+ filePath = `${dirPath}/index.html`;
20
+ }
21
+ else {
22
+ dirPath = `${backupPath}${pathElements.slice(0, -1).join('/')}`;
23
+ filePath = `${backupPath}${pathElements.join('/')}`;
24
+ }
25
+ if (process.platform === 'win32') {
26
+ dirPath = sanitizePath(dirPath);
27
+ filePath = sanitizePath(filePath);
28
+ }
29
+ return { dirPath, filePath };
30
+ }
31
+ async function ensureDirectory(dirPath) {
32
+ try {
33
+ await mkdir(dirPath, { recursive: true });
34
+ }
35
+ catch (err) {
36
+ const error = err;
37
+ if (error.code === 'EEXIST') {
38
+ const existingPath = error.path ?? dirPath;
39
+ const stats = await stat(existingPath);
40
+ if (stats.isFile()) {
41
+ const tempPath = `${existingPath}.temp`;
42
+ const permanentPath = `${existingPath}/index.html`;
43
+ await rename(existingPath, tempPath);
44
+ await mkdir(existingPath, { recursive: true });
45
+ await rename(tempPath, permanentPath);
46
+ await ensureDirectory(dirPath);
47
+ }
48
+ }
49
+ else {
50
+ throw err;
51
+ }
52
+ }
53
+ }
54
+ export async function downloadFile(file, options) {
55
+ const { backupPath, overwrite = false, maxRetries = 5, saveErrors = false } = options;
56
+ const { dirPath, filePath } = resolvePaths(backupPath, file.fileId, file.fileUrl);
57
+ if (!overwrite) {
58
+ try {
59
+ await stat(filePath);
60
+ return { status: 'skipped', filePath };
61
+ }
62
+ catch {
63
+ // File doesn't exist, proceed
64
+ }
65
+ }
66
+ await ensureDirectory(dirPath);
67
+ try {
68
+ await retry(async () => {
69
+ const url = buildWaybackUrl(file.timestamp, file.fileUrl);
70
+ const response = await fetch(url, {
71
+ headers: { 'Accept-Encoding': 'identity' },
72
+ });
73
+ if (!response.ok && !saveErrors) {
74
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
75
+ }
76
+ const buffer = Buffer.from(await response.arrayBuffer());
77
+ await writeFile(filePath, buffer);
78
+ }, {
79
+ retries: maxRetries,
80
+ baseDelay: 1000,
81
+ maxDelay: 30000,
82
+ onRetry: (err, attempt) => {
83
+ options.onRetry?.(file.fileUrl, err, attempt);
84
+ },
85
+ });
86
+ }
87
+ catch (err) {
88
+ try {
89
+ const s = await stat(filePath);
90
+ if (s.size === 0 && !saveErrors) {
91
+ await unlink(filePath);
92
+ }
93
+ }
94
+ catch {
95
+ // Ignore
96
+ }
97
+ return {
98
+ status: 'error',
99
+ filePath,
100
+ error: err instanceof Error ? err : new Error(String(err)),
101
+ };
102
+ }
103
+ try {
104
+ const s = await stat(filePath);
105
+ if (s.size === 0 && !saveErrors) {
106
+ await unlink(filePath);
107
+ return {
108
+ status: 'error',
109
+ filePath,
110
+ error: new Error('Downloaded file was empty'),
111
+ };
112
+ }
113
+ }
114
+ catch {
115
+ // Ignore
116
+ }
117
+ return { status: 'downloaded', filePath };
118
+ }
119
+ //# sourceMappingURL=file-manager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file-manager.js","sourceRoot":"","sources":["../src/file-manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAG1E,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,MAAM,YAAY,GAAG,6BAA6B,CAAC;AAUnD,SAAS,eAAe,CAAC,SAAiB,EAAE,WAAmB;IAC7D,OAAO,GAAG,YAAY,IAAI,SAAS,OAAO,WAAW,EAAE,CAAC;AAC1D,CAAC;AAED,SAAS,YAAY,CACnB,UAAkB,EAClB,MAAc,EACd,OAAe;IAEf,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC;IAE7F,IAAI,OAAe,CAAC;IACpB,IAAI,QAAgB,CAAC;IAErB,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QAClB,OAAO,GAAG,UAAU,CAAC;QACrB,QAAQ,GAAG,GAAG,UAAU,YAAY,CAAC;IACvC,CAAC;SAAM,IAAI,KAAK,EAAE,CAAC;QACjB,OAAO,GAAG,GAAG,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACnD,QAAQ,GAAG,GAAG,OAAO,aAAa,CAAC;IACrC,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,GAAG,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAChE,QAAQ,GAAG,GAAG,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IACtD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;QACjC,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;QAChC,QAAQ,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACpC,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;AAC/B,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,OAAe;IAC5C,IAAI,CAAC;QACH,MAAM,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,KAAK,GAAG,GAA4B,CAAC;QAC3C,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,IAAI,OAAO,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,CAAC;YACvC,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBACnB,MAAM,QAAQ,GAAG,GAAG,YAAY,OAAO,CAAC;gBACxC,MAAM,aAAa,GAAG,GAAG,YAAY,aAAa,CAAC;gBACnD,MAAM,MAAM,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;gBACrC,MAAM,KAAK,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC/C,MAAM,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;gBACtC,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;AACH,CAAC;AAQD,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAoB,EACpB,OAA2B;IAE3B,MAAM,EAAE,UAAU,EAAE,SAAS,GAAG,KAAK,EAAE,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC;IACtF,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,YAAY,CAAC,UAAU,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IAElF,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;QACzC,CAAC;QAAC,MAAM,CAAC;YACP,8BAA8B;QAChC,CAAC;IACH,CAAC;IAED,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC;IAE/B,IAAI,CAAC;QACH,MAAM,KAAK,CACT,KAAK,IAAI,EAAE;YACT,MAAM,GAAG,GAAG,eAAe,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;YAC1D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO,EAAE,EAAE,iBAAiB,EAAE,UAAU,EAAE;aAC3C,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,KAAK,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YACrE,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YACzD,MAAM,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACpC,CAAC,EACD;YACE,OAAO,EAAE,UAAU;YACnB,SAAS,EAAE,IAAI;YACf,QAAQ,EAAE,KAAK;YACf,OAAO,EAAE,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE;gBACxB,OAAO,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;SACF,CACF,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC/B,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChC,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QACD,OAAO;YACL,MAAM,EAAE,OAAO;YACf,QAAQ;YACR,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;SAC3D,CAAC;IACJ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YAChC,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;YACvB,OAAO;gBACL,MAAM,EAAE,OAAO;gBACf,QAAQ;gBACR,KAAK,EAAE,IAAI,KAAK,CAAC,2BAA2B,CAAC;aAC9C,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,SAAS;IACX,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC;AAC5C,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { download, listFiles } from './downloader.js';
2
+ export type { DownloadOptions, FileToDownload, Snapshot } from './types.js';
3
+ export type { DownloadResult, ProgressCallback, SnapshotPageCallback } from './downloader.js';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACtD,YAAY,EAAE,eAAe,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAC5E,YAAY,EAAE,cAAc,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export { download, listFiles } from './downloader.js';
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,30 @@
1
+ export interface ProgressCallbacks {
2
+ onStart?: (total: number) => void;
3
+ onFileStart?: (url: string) => void;
4
+ onFileComplete?: (url: string, status: 'downloaded' | 'skipped') => void;
5
+ onFileError?: (url: string, error: Error) => void;
6
+ onFileRetry?: (url: string, attempt: number) => void;
7
+ onComplete?: (stats: ProgressStats) => void;
8
+ }
9
+ export interface ProgressStats {
10
+ total: number;
11
+ downloaded: number;
12
+ skipped: number;
13
+ errors: number;
14
+ retries: number;
15
+ durationMs: number;
16
+ }
17
+ export declare function createProgressBar(useColors?: boolean): {
18
+ start: (total: number) => void;
19
+ update: (completed: number, downloaded: number, skipped: number, errors: number) => void;
20
+ stop: () => void;
21
+ increment: (status: 'downloaded' | 'skipped' | 'error') => void;
22
+ log: (message: string, type?: 'success' | 'warn' | 'error' | 'info') => void;
23
+ };
24
+ export declare function createSnapshotProgressBar(useColors?: boolean): {
25
+ start: () => void;
26
+ update: (message: string) => void;
27
+ stop: () => void;
28
+ };
29
+ export declare function printSummary(stats: ProgressStats): void;
30
+ //# sourceMappingURL=progress.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../src/progress.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,iBAAiB;IAChC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAClC,WAAW,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;IACpC,cAAc,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,SAAS,KAAK,IAAI,CAAC;IACzE,WAAW,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;IAClD,WAAW,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACrD,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,CAAC;CAC7C;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,iBAAiB,CAAC,SAAS,UAAO,GAAG;IACnD,KAAK,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IAC/B,MAAM,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IACzF,IAAI,EAAE,MAAM,IAAI,CAAC;IACjB,SAAS,EAAE,CAAC,MAAM,EAAE,YAAY,GAAG,SAAS,GAAG,OAAO,KAAK,IAAI,CAAC;IAChE,GAAG,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,KAAK,IAAI,CAAC;CAC9E,CA6EA;AAED,wBAAgB,yBAAyB,CAAC,SAAS,UAAO,GAAG;IAC3D,KAAK,EAAE,MAAM,IAAI,CAAC;IAClB,MAAM,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IAClC,IAAI,EAAE,MAAM,IAAI,CAAC;CAClB,CA8BA;AAED,wBAAgB,YAAY,CAAC,KAAK,EAAE,aAAa,GAAG,IAAI,CAYvD"}
@@ -0,0 +1,115 @@
1
+ import cliProgress from 'cli-progress';
2
+ import chalk from 'chalk';
3
+ export function createProgressBar(useColors = true) {
4
+ let multibar = null;
5
+ let mainBar = null;
6
+ let total = 0;
7
+ let downloaded = 0;
8
+ let skipped = 0;
9
+ let errors = 0;
10
+ let startTime = 0;
11
+ const format = useColors
12
+ ? ` ${chalk.cyan('{bar}')} | {percentage}% | {value}/{total} files | ${chalk.green('✓')} {downloaded} ${chalk.gray('○')} {skipped} ${chalk.red('✗')} {errors}`
13
+ : ' {bar} | {percentage}% | {value}/{total} files | ✓ {downloaded} ○ {skipped} ✗ {errors}';
14
+ return {
15
+ start(t) {
16
+ total = t;
17
+ startTime = Date.now();
18
+ multibar = new cliProgress.MultiBar({
19
+ clearOnComplete: false,
20
+ hideCursor: true,
21
+ format,
22
+ barCompleteChar: useColors ? chalk.green('█') : '█',
23
+ barIncompleteChar: useColors ? chalk.gray('░') : '░',
24
+ });
25
+ mainBar = multibar.create(total, 0, {
26
+ downloaded: 0,
27
+ skipped: 0,
28
+ errors: 0,
29
+ });
30
+ mainBar.setTotal(total);
31
+ },
32
+ update(completed, d, s, e) {
33
+ downloaded = d;
34
+ skipped = s;
35
+ errors = e;
36
+ mainBar?.update(completed, { downloaded: d, skipped: s, errors: e });
37
+ },
38
+ increment(status) {
39
+ if (status === 'downloaded')
40
+ downloaded++;
41
+ else if (status === 'skipped')
42
+ skipped++;
43
+ else
44
+ errors++;
45
+ const completed = downloaded + skipped + errors;
46
+ mainBar?.increment(1, { downloaded, skipped, errors });
47
+ },
48
+ stop() {
49
+ multibar?.stop();
50
+ multibar = null;
51
+ mainBar = null;
52
+ },
53
+ log(message, type = 'info') {
54
+ if (mainBar) {
55
+ multibar?.log(`${getPrefix(type)} ${message}\n`);
56
+ }
57
+ else {
58
+ console.log(`${getPrefix(type)} ${message}`);
59
+ }
60
+ },
61
+ };
62
+ function getPrefix(t) {
63
+ if (!useColors) {
64
+ return t === 'error' ? '[ERROR]' : t === 'warn' ? '[WARN]' : t === 'success' ? '[OK]' : '[INFO]';
65
+ }
66
+ switch (t) {
67
+ case 'success':
68
+ return chalk.green('✓');
69
+ case 'warn':
70
+ return chalk.yellow('⚠');
71
+ case 'error':
72
+ return chalk.red('✗');
73
+ default:
74
+ return chalk.blue('ℹ');
75
+ }
76
+ }
77
+ }
78
+ export function createSnapshotProgressBar(useColors = true) {
79
+ let bar = null;
80
+ const format = useColors
81
+ ? ` ${chalk.cyan('Fetching snapshots')} {message}`
82
+ : ' Fetching snapshots {message}';
83
+ return {
84
+ start() {
85
+ bar = new cliProgress.SingleBar({
86
+ format,
87
+ barCompleteChar: ' ',
88
+ barIncompleteChar: ' ',
89
+ hideCursor: true,
90
+ }, cliProgress.Presets.shades_classic);
91
+ bar.start(1, 0, { message: '' });
92
+ },
93
+ update(message) {
94
+ bar?.update(1, { message });
95
+ },
96
+ stop() {
97
+ bar?.stop();
98
+ bar = null;
99
+ },
100
+ };
101
+ }
102
+ export function printSummary(stats) {
103
+ const durationSec = (stats.durationMs / 1000).toFixed(2);
104
+ console.log();
105
+ console.log(chalk.bold('Download complete'));
106
+ console.log(chalk.gray(` Duration: ${durationSec}s`));
107
+ console.log(chalk.green(` Downloaded: ${stats.downloaded}`));
108
+ if (stats.skipped > 0) {
109
+ console.log(chalk.gray(` Skipped (already exist): ${stats.skipped}`));
110
+ }
111
+ if (stats.errors > 0) {
112
+ console.log(chalk.red(` Errors: ${stats.errors}`));
113
+ }
114
+ }
115
+ //# sourceMappingURL=progress.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progress.js","sourceRoot":"","sources":["../src/progress.ts"],"names":[],"mappings":"AAAA,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,MAAM,OAAO,CAAC;AAoB1B,MAAM,UAAU,iBAAiB,CAAC,SAAS,GAAG,IAAI;IAOhD,IAAI,QAAQ,GAAgC,IAAI,CAAC;IACjD,IAAI,OAAO,GAAiC,IAAI,CAAC;IACjD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,MAAM,GAAG,SAAS;QACtB,CAAC,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,8CAA8C,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,iBAAiB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,cAAc,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW;QAC/J,CAAC,CAAC,yFAAyF,CAAC;IAE9F,OAAO;QACL,KAAK,CAAC,CAAS;YACb,KAAK,GAAG,CAAC,CAAC;YACV,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,QAAQ,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC;gBAClC,eAAe,EAAE,KAAK;gBACtB,UAAU,EAAE,IAAI;gBAChB,MAAM;gBACN,eAAe,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG;gBACnD,iBAAiB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG;aACrD,CAAC,CAAC;YACH,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,EAAE;gBAClC,UAAU,EAAE,CAAC;gBACb,OAAO,EAAE,CAAC;gBACV,MAAM,EAAE,CAAC;aACV,CAAC,CAAC;YACH,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC1B,CAAC;QAED,MAAM,CAAC,SAAiB,EAAE,CAAS,EAAE,CAAS,EAAE,CAAS;YACvD,UAAU,GAAG,CAAC,CAAC;YACf,OAAO,GAAG,CAAC,CAAC;YACZ,MAAM,GAAG,CAAC,CAAC;YACX,OAAO,EAAE,MAAM,CAAC,SAAS,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;QAED,SAAS,CAAC,MAA0C;YAClD,IAAI,MAAM,KAAK,YAAY;gBAAE,UAAU,EAAE,CAAC;iBACrC,IAAI,MAAM,KAAK,SAAS;gBAAE,OAAO,EAAE,CAAC;;gBACpC,MAAM,EAAE,CAAC;YACd,MAAM,SAAS,GAAG,UAAU,GAAG,OAAO,GAAG,MAAM,CAAC;YAChD,OAAO,EAAE,SAAS,CAAC,CAAC,EAAE,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,IAAI;YACF,QAAQ,EAAE,IAAI,EAAE,CAAC;YACjB,QAAQ,GAAG,IAAI,CAAC;YAChB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;QAED,GAAG,CAAC,OAAe,EAAE,OAA8C,MAAM;YACvE,IAAI,OAAO,EAAE,CAAC;gBACZ,QAAQ,EAAE,GAAG,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,CAAC;YACnD,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;YAC/C,CAAC;QACH,CAAC;KACF,CAAC;IAEF,SAAS,SAAS,CAAC,CAAwC;QACzD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACnG,CAAC;QACD,QAAQ,CAAC,EAAE,CAAC;YACV,KAAK,SAAS;gBACZ,OAAO,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC1B,KAAK,MAAM;gBACT,OAAO,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC3B,KAAK,OAAO;gBACV,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACxB;gBACE,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,SAAS,GAAG,IAAI;IAKxD,IAAI,GAAG,GAAiC,IAAI,CAAC;IAE7C,MAAM,MAAM,GAAG,SAAS;QACtB,CAAC,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,YAAY;QACnD,CAAC,CAAC,gCAAgC,CAAC;IAErC,OAAO;QACL,KAAK;YACH,GAAG,GAAG,IAAI,WAAW,CAAC,SAAS,CAC7B;gBACE,MAAM;gBACN,eAAe,EAAE,GAAG;gBACpB,iBAAiB,EAAE,GAAG;gBACtB,UAAU,EAAE,IAAI;aACjB,EACD,WAAW,CAAC,OAAO,CAAC,cAAc,CACnC,CAAC;YACF,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QAED,MAAM,CAAC,OAAe;YACpB,GAAG,EAAE,MAAM,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QAC9B,CAAC;QAED,IAAI;YACF,GAAG,EAAE,IAAI,EAAE,CAAC;YACZ,GAAG,GAAG,IAAI,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAoB;IAC/C,MAAM,WAAW,GAAG,CAAC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IACzD,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,eAAe,WAAW,GAAG,CAAC,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,iBAAiB,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IAC9D,IAAI,KAAK,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,8BAA8B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IACzE,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACtD,CAAC;AACH,CAAC"}
@@ -0,0 +1,11 @@
1
+ export interface RetryOptions {
2
+ retries?: number;
3
+ baseDelay?: number;
4
+ maxDelay?: number;
5
+ onRetry?: (error: Error, attempt: number) => void;
6
+ }
7
+ /**
8
+ * Execute an async function with exponential backoff on failure.
9
+ */
10
+ export declare function retry<T>(fn: () => Promise<T>, options?: RetryOptions): Promise<T>;
11
+ //# sourceMappingURL=retry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"retry.d.ts","sourceRoot":"","sources":["../src/retry.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACnD;AAqBD;;GAEG;AACH,wBAAsB,KAAK,CAAC,CAAC,EAC3B,EAAE,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACpB,OAAO,GAAE,YAAiB,GACzB,OAAO,CAAC,CAAC,CAAC,CAsBZ"}
package/dist/retry.js ADDED
@@ -0,0 +1,43 @@
1
+ const DEFAULT_RETRIES = 5;
2
+ const DEFAULT_BASE_DELAY = 1000;
3
+ const DEFAULT_MAX_DELAY = 30000;
4
+ /**
5
+ * Sleep for a given number of milliseconds.
6
+ */
7
+ function sleep(ms) {
8
+ return new Promise((resolve) => setTimeout(resolve, ms));
9
+ }
10
+ /**
11
+ * Add jitter to a delay to avoid thundering herd.
12
+ */
13
+ function jitter(delay) {
14
+ const jitterRange = delay * 0.2;
15
+ return delay + (Math.random() * 2 - 1) * jitterRange;
16
+ }
17
+ /**
18
+ * Execute an async function with exponential backoff on failure.
19
+ */
20
+ export async function retry(fn, options = {}) {
21
+ const retries = options.retries ?? DEFAULT_RETRIES;
22
+ const baseDelay = options.baseDelay ?? DEFAULT_BASE_DELAY;
23
+ const maxDelay = options.maxDelay ?? DEFAULT_MAX_DELAY;
24
+ const onRetry = options.onRetry;
25
+ let lastError;
26
+ for (let attempt = 0; attempt <= retries; attempt++) {
27
+ try {
28
+ return await fn();
29
+ }
30
+ catch (err) {
31
+ lastError = err instanceof Error ? err : new Error(String(err));
32
+ if (attempt === retries) {
33
+ throw lastError;
34
+ }
35
+ const delay = Math.min(baseDelay * Math.pow(2, attempt), maxDelay);
36
+ const delayWithJitter = Math.max(0, jitter(delay));
37
+ onRetry?.(lastError, attempt + 1);
38
+ await sleep(delayWithJitter);
39
+ }
40
+ }
41
+ throw lastError;
42
+ }
43
+ //# sourceMappingURL=retry.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"retry.js","sourceRoot":"","sources":["../src/retry.ts"],"names":[],"mappings":"AAOA,MAAM,eAAe,GAAG,CAAC,CAAC;AAC1B,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAChC,MAAM,iBAAiB,GAAG,KAAK,CAAC;AAEhC;;GAEG;AACH,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED;;GAEG;AACH,SAAS,MAAM,CAAC,KAAa;IAC3B,MAAM,WAAW,GAAG,KAAK,GAAG,GAAG,CAAC;IAChC,OAAO,KAAK,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,EAAoB,EACpB,UAAwB,EAAE;IAE1B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,eAAe,CAAC;IACnD,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,kBAAkB,CAAC;IAC1D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACvD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;IAEhC,IAAI,SAA4B,CAAC;IACjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAChE,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;gBACxB,MAAM,SAAS,CAAC;YAClB,CAAC;YACD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,QAAQ,CAAC,CAAC;YACnE,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACnD,OAAO,EAAE,CAAC,SAAS,EAAE,OAAO,GAAG,CAAC,CAAC,CAAC;YAClC,MAAM,KAAK,CAAC,eAAe,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IACD,MAAM,SAAS,CAAC;AAClB,CAAC"}
@@ -0,0 +1,45 @@
1
+ /**
2
+ * A single snapshot from the Wayback Machine CDX API.
3
+ */
4
+ export interface Snapshot {
5
+ timestamp: string;
6
+ url: string;
7
+ }
8
+ /**
9
+ * A file to download, derived from a snapshot after filtering and deduplication.
10
+ */
11
+ export interface FileToDownload {
12
+ fileId: string;
13
+ fileUrl: string;
14
+ timestamp: string;
15
+ }
16
+ /**
17
+ * Options for the download orchestrator.
18
+ */
19
+ export interface DownloadOptions {
20
+ baseUrl: string;
21
+ directory?: string;
22
+ exactUrl?: boolean;
23
+ allTimestamps?: boolean;
24
+ fromTimestamp?: number;
25
+ toTimestamp?: number;
26
+ onlyFilter?: string;
27
+ excludeFilter?: string;
28
+ all?: boolean;
29
+ maxPages?: number;
30
+ concurrency?: number;
31
+ overwrite?: boolean;
32
+ maxRetries?: number;
33
+ }
34
+ /**
35
+ * Progress event emitted during download.
36
+ */
37
+ export interface ProgressEvent {
38
+ type: 'started' | 'completed' | 'skipped' | 'error' | 'retry';
39
+ fileUrl?: string;
40
+ filePath?: string;
41
+ error?: Error;
42
+ completed: number;
43
+ total: number;
44
+ }
45
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,SAAS,GAAG,WAAW,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,CAAC;IAC9D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;CACf"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Extract the domain/host from a URL for use as backup name.
3
+ * e.g. "https://example.com/path" -> "example.com"
4
+ */
5
+ export declare function getBackupName(url: string): string;
6
+ /**
7
+ * Extract the file path from a full URL (path after domain).
8
+ * e.g. "https://example.com/foo/bar.html" -> "foo/bar.html"
9
+ */
10
+ export declare function extractFileId(url: string): string | null;
11
+ /**
12
+ * Fix invalid UTF-8 bytes (similar to Ruby tidy_bytes).
13
+ * Interprets string as latin1 and re-encodes to UTF-8.
14
+ */
15
+ export declare function tidyBytes(str: string): string;
16
+ /**
17
+ * Parse a filter string into a RegExp or null for literal matching.
18
+ * Supports /pattern/ and /pattern/i notation.
19
+ */
20
+ export declare function parseFilterToRegex(filter: string): RegExp | null;
21
+ /**
22
+ * Check if a URL matches the "only" filter (include).
23
+ */
24
+ export declare function matchOnlyFilter(fileUrl: string, onlyFilter: string | undefined): boolean;
25
+ /**
26
+ * Check if a URL matches the "exclude" filter.
27
+ */
28
+ export declare function matchExcludeFilter(fileUrl: string, excludeFilter: string | undefined): boolean;
29
+ /**
30
+ * Sanitize a path for the filesystem (Windows-incompatible chars).
31
+ */
32
+ export declare function sanitizePath(path: string): string;
33
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAMjD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAexD;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAO7C;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAehE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,CASxF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,CAS9F;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEjD"}
package/dist/utils.js ADDED
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Extract the domain/host from a URL for use as backup name.
3
+ * e.g. "https://example.com/path" -> "example.com"
4
+ */
5
+ export function getBackupName(url) {
6
+ if (url.includes('//')) {
7
+ const parts = url.split('/');
8
+ return parts[2] ?? url;
9
+ }
10
+ return url;
11
+ }
12
+ /**
13
+ * Extract the file path from a full URL (path after domain).
14
+ * e.g. "https://example.com/foo/bar.html" -> "foo/bar.html"
15
+ */
16
+ export function extractFileId(url) {
17
+ if (!url.includes('/')) {
18
+ return null;
19
+ }
20
+ const parts = url.split('/');
21
+ const pathParts = parts.slice(3);
22
+ if (pathParts.length === 0) {
23
+ return '';
24
+ }
25
+ const joined = pathParts.join('/');
26
+ try {
27
+ return decodeURIComponent(joined);
28
+ }
29
+ catch {
30
+ return tidyBytes(joined);
31
+ }
32
+ }
33
+ /**
34
+ * Fix invalid UTF-8 bytes (similar to Ruby tidy_bytes).
35
+ * Interprets string as latin1 and re-encodes to UTF-8.
36
+ */
37
+ export function tidyBytes(str) {
38
+ try {
39
+ const buf = Buffer.from(str, 'latin1');
40
+ return buf.toString('utf8');
41
+ }
42
+ catch {
43
+ return str;
44
+ }
45
+ }
46
+ /**
47
+ * Parse a filter string into a RegExp or null for literal matching.
48
+ * Supports /pattern/ and /pattern/i notation.
49
+ */
50
+ export function parseFilterToRegex(filter) {
51
+ if (!filter || filter.length < 2) {
52
+ return null;
53
+ }
54
+ if (filter.startsWith('/') && filter.includes('/')) {
55
+ const lastSlash = filter.lastIndexOf('/');
56
+ const pattern = filter.slice(1, lastSlash).replace(/\\\//g, '/');
57
+ const flags = filter.slice(lastSlash + 1);
58
+ try {
59
+ return new RegExp(pattern, flags);
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ return null;
66
+ }
67
+ /**
68
+ * Check if a URL matches the "only" filter (include).
69
+ */
70
+ export function matchOnlyFilter(fileUrl, onlyFilter) {
71
+ if (!onlyFilter) {
72
+ return true;
73
+ }
74
+ const regex = parseFilterToRegex(onlyFilter);
75
+ if (regex !== null) {
76
+ return regex.test(fileUrl);
77
+ }
78
+ return fileUrl.toLowerCase().includes(onlyFilter.toLowerCase());
79
+ }
80
+ /**
81
+ * Check if a URL matches the "exclude" filter.
82
+ */
83
+ export function matchExcludeFilter(fileUrl, excludeFilter) {
84
+ if (!excludeFilter) {
85
+ return false;
86
+ }
87
+ const regex = parseFilterToRegex(excludeFilter);
88
+ if (regex !== null) {
89
+ return regex.test(fileUrl);
90
+ }
91
+ return fileUrl.toLowerCase().includes(excludeFilter.toLowerCase());
92
+ }
93
+ /**
94
+ * Sanitize a path for the filesystem (Windows-incompatible chars).
95
+ */
96
+ export function sanitizePath(path) {
97
+ return path.replace(/[:*?&=<>\\|]/g, (s) => '%' + s.charCodeAt(0).toString(16));
98
+ }
99
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IACzB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnC,IAAI,CAAC;QACH,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACvC,OAAO,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAc;IAC/C,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACnD,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjE,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;QAC1C,IAAI,CAAC;YACH,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe,EAAE,UAA8B;IAC7E,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IAC7C,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC;AAClE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAe,EAAE,aAAiC;IACnF,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,KAAK,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC;IAChD,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;AACrE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;AAClF,CAAC"}
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "wayback-dl",
3
+ "version": "1.0.0",
4
+ "description": "Download archived websites from the Internet Archive Wayback Machine",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "bin": {
9
+ "wayback-dl": "bin/wayback-dl.js"
10
+ },
11
+ "files": [
12
+ "dist",
13
+ "bin"
14
+ ],
15
+ "scripts": {
16
+ "build": "tsc",
17
+ "prepare": "pnpm run build",
18
+ "test": "node --import tsx --test test/*.test.ts",
19
+ "start": "node --import tsx bin/wayback-dl.js",
20
+ "dev": "node --import tsx src/cli.ts"
21
+ },
22
+ "keywords": [
23
+ "wayback-machine",
24
+ "internet-archive",
25
+ "archive",
26
+ "download",
27
+ "cli"
28
+ ],
29
+ "author": "Scott Weaver <scottmweaver@gmail.com>",
30
+ "license": "MIT",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "https://github.com/tdlm/wayback-machine-downloader.git"
34
+ },
35
+ "engines": {
36
+ "node": ">=18.0.0"
37
+ },
38
+ "dependencies": {
39
+ "chalk": "^5.3.0",
40
+ "cli-progress": "^3.12.0",
41
+ "commander": "^12.1.0",
42
+ "p-queue": "^8.0.1"
43
+ },
44
+ "devDependencies": {
45
+ "@types/cli-progress": "^3.11.6",
46
+ "@types/node": "^22.10.1",
47
+ "tsx": "^4.19.2",
48
+ "typescript": "^5.7.2"
49
+ }
50
+ }