@d-zero/replicator 0.6.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +3 -0
- package/dist/index.js +6 -4
- package/dist/resource-downloader.d.ts +12 -1
- package/dist/resource-downloader.js +23 -11
- package/dist/types.d.ts +2 -0
- package/package.json +9 -9
package/dist/cli.js
CHANGED
|
@@ -18,6 +18,8 @@ const { options, args } = createCLI({
|
|
|
18
18
|
' -t, --timeout <ms> Request timeout in milliseconds (default: 30000)',
|
|
19
19
|
' -d, --devices <devices> Device presets (comma-separated, default: desktop-compact,mobile)',
|
|
20
20
|
' -l, --limit <number> Parallel execution limit (default: 3)',
|
|
21
|
+
' --interval <ms> Interval between parallel executions (default: none)',
|
|
22
|
+
' Format: number or "min-max" for random range',
|
|
21
23
|
' --only <type> Download only specified type: page or resource',
|
|
22
24
|
' -v, --verbose Enable verbose logging',
|
|
23
25
|
'',
|
|
@@ -65,6 +67,7 @@ try {
|
|
|
65
67
|
devices,
|
|
66
68
|
limit: options.limit,
|
|
67
69
|
only: options.only,
|
|
70
|
+
interval: options.interval,
|
|
68
71
|
});
|
|
69
72
|
}
|
|
70
73
|
catch (error) {
|
package/dist/index.js
CHANGED
|
@@ -54,9 +54,10 @@ function collectPageUrlsOnly(urls, progress) {
|
|
|
54
54
|
* @param verbose - Enable verbose logging
|
|
55
55
|
* @param limit - Parallel execution limit
|
|
56
56
|
* @param progress - Progress logger function
|
|
57
|
+
* @param interval
|
|
57
58
|
* @returns Set of encoded URLs
|
|
58
59
|
*/
|
|
59
|
-
async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress) {
|
|
60
|
+
async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval) {
|
|
60
61
|
progress(c.bold.yellow('📡 Phase 1: Collecting resource metadata...'));
|
|
61
62
|
const results = [];
|
|
62
63
|
await deal(urls.map((url) => ({ id: null, url })), (_, done, total) => {
|
|
@@ -68,6 +69,7 @@ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit
|
|
|
68
69
|
}, {}), {
|
|
69
70
|
verbose,
|
|
70
71
|
limit,
|
|
72
|
+
interval,
|
|
71
73
|
each: (result) => {
|
|
72
74
|
results.push(result);
|
|
73
75
|
},
|
|
@@ -119,7 +121,7 @@ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit
|
|
|
119
121
|
* @param options - Replication options
|
|
120
122
|
*/
|
|
121
123
|
export async function replicate(options) {
|
|
122
|
-
const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, only, } = options;
|
|
124
|
+
const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, only, interval, } = options;
|
|
123
125
|
if (urls.length === 0) {
|
|
124
126
|
throw new Error('At least one URL is required');
|
|
125
127
|
}
|
|
@@ -154,7 +156,7 @@ export async function replicate(options) {
|
|
|
154
156
|
}
|
|
155
157
|
case 'resource':
|
|
156
158
|
case undefined: {
|
|
157
|
-
allEncodedUrls = await collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress);
|
|
159
|
+
allEncodedUrls = await collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit, progress, interval);
|
|
158
160
|
break;
|
|
159
161
|
}
|
|
160
162
|
default: {
|
|
@@ -168,7 +170,7 @@ export async function replicate(options) {
|
|
|
168
170
|
// Use the first URL as base URL for constructing full URLs
|
|
169
171
|
const baseUrl = urls[0];
|
|
170
172
|
// Download all resources
|
|
171
|
-
await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only);
|
|
173
|
+
await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only, interval);
|
|
172
174
|
progress('');
|
|
173
175
|
progress(c.bold.green(`✅ Replication complete!`));
|
|
174
176
|
progress(c.gray(` All resources saved to: ${outputDir}`));
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { DelayOptions } from '@d-zero/shared/delay';
|
|
1
2
|
/**
|
|
2
3
|
* Download and save resources to disk
|
|
3
4
|
* @param encodedPaths - Array of encoded pathnames
|
|
@@ -7,4 +8,14 @@
|
|
|
7
8
|
* @param verbose - Enable verbose output
|
|
8
9
|
* @param only - Download only specified type: page or resource
|
|
9
10
|
*/
|
|
10
|
-
|
|
11
|
+
/**
|
|
12
|
+
*
|
|
13
|
+
* @param encodedPaths
|
|
14
|
+
* @param baseUrl
|
|
15
|
+
* @param outputDir
|
|
16
|
+
* @param logger
|
|
17
|
+
* @param verbose
|
|
18
|
+
* @param only
|
|
19
|
+
* @param interval
|
|
20
|
+
*/
|
|
21
|
+
export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource', interval?: number | DelayOptions): Promise<void>;
|
|
@@ -35,7 +35,17 @@ function parseEncodedPath(encodedPath, baseUrl) {
|
|
|
35
35
|
* @param verbose - Enable verbose output
|
|
36
36
|
* @param only - Download only specified type: page or resource
|
|
37
37
|
*/
|
|
38
|
-
|
|
38
|
+
/**
|
|
39
|
+
*
|
|
40
|
+
* @param encodedPaths
|
|
41
|
+
* @param baseUrl
|
|
42
|
+
* @param outputDir
|
|
43
|
+
* @param logger
|
|
44
|
+
* @param verbose
|
|
45
|
+
* @param only
|
|
46
|
+
* @param interval
|
|
47
|
+
*/
|
|
48
|
+
export async function downloadResources(encodedPaths, baseUrl, outputDir, logger, verbose = false, only, interval) {
|
|
39
49
|
const uniqueResources = new Map();
|
|
40
50
|
// Parse all encoded pathnames
|
|
41
51
|
for (const encodedPath of encodedPaths) {
|
|
@@ -67,13 +77,14 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
|
|
|
67
77
|
logger('');
|
|
68
78
|
let downloaded = 0;
|
|
69
79
|
let failed = 0;
|
|
70
|
-
await deal(tasks, (task, update, index) => {
|
|
80
|
+
await deal(tasks, (task, update, index, setLineHeader) => {
|
|
71
81
|
const fileId = index.toString().padStart(4, '0');
|
|
72
82
|
const lineHeader = `%braille% ${c.bgWhite(` ${fileId} `)} ${c.gray(task.localPath)}: `;
|
|
83
|
+
setLineHeader(lineHeader);
|
|
73
84
|
return async () => {
|
|
74
|
-
update(
|
|
85
|
+
update('Fetching%dots%');
|
|
75
86
|
const response = await fetch(task.url).catch((error) => {
|
|
76
|
-
update(
|
|
87
|
+
update(c.red(`❌ Fetch failed: ${error.message}`));
|
|
77
88
|
failed++;
|
|
78
89
|
return null;
|
|
79
90
|
});
|
|
@@ -81,30 +92,30 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
|
|
|
81
92
|
return;
|
|
82
93
|
}
|
|
83
94
|
if (!response.ok) {
|
|
84
|
-
update(
|
|
95
|
+
update(c.red(`❌ HTTP ${response.status}`));
|
|
85
96
|
failed++;
|
|
86
97
|
return;
|
|
87
98
|
}
|
|
88
|
-
update(
|
|
99
|
+
update('Reading content%dots%');
|
|
89
100
|
const content = Buffer.from(await response.arrayBuffer());
|
|
90
101
|
const fullPath = path.join(outputDir, task.localPath);
|
|
91
102
|
const dir = path.dirname(fullPath);
|
|
92
|
-
update(
|
|
103
|
+
update('Creating directory%dots%');
|
|
93
104
|
const mkdirSuccess = await mkdir(dir, { recursive: true })
|
|
94
105
|
.then(() => true)
|
|
95
106
|
.catch((error) => {
|
|
96
|
-
update(
|
|
107
|
+
update(c.red(`❌ Failed to create directory: ${error.message}`));
|
|
97
108
|
failed++;
|
|
98
109
|
return false;
|
|
99
110
|
});
|
|
100
111
|
if (!mkdirSuccess) {
|
|
101
112
|
return;
|
|
102
113
|
}
|
|
103
|
-
update(
|
|
114
|
+
update('Writing file%dots%');
|
|
104
115
|
const writeSuccess = await writeFile(fullPath, content)
|
|
105
116
|
.then(() => true)
|
|
106
117
|
.catch((error) => {
|
|
107
|
-
update(
|
|
118
|
+
update(c.red(`❌ Failed to write: ${error.message}`));
|
|
108
119
|
failed++;
|
|
109
120
|
return false;
|
|
110
121
|
});
|
|
@@ -112,11 +123,12 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
|
|
|
112
123
|
return;
|
|
113
124
|
}
|
|
114
125
|
downloaded++;
|
|
115
|
-
update(
|
|
126
|
+
update(c.green('✅ Downloaded'));
|
|
116
127
|
};
|
|
117
128
|
}, {
|
|
118
129
|
limit: 10,
|
|
119
130
|
verbose,
|
|
131
|
+
interval,
|
|
120
132
|
header: (progress, done, total, limit) => {
|
|
121
133
|
const percentage = Math.round(progress * 100);
|
|
122
134
|
if (progress === 1) {
|
package/dist/types.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { DelayOptions } from '@d-zero/shared/delay';
|
|
1
2
|
export interface ReplicateOptions {
|
|
2
3
|
urls: string[];
|
|
3
4
|
outputDir: string;
|
|
@@ -9,6 +10,7 @@ export interface ReplicateOptions {
|
|
|
9
10
|
}>;
|
|
10
11
|
limit?: number;
|
|
11
12
|
only?: 'page' | 'resource';
|
|
13
|
+
interval?: number | DelayOptions;
|
|
12
14
|
}
|
|
13
15
|
export interface Resource {
|
|
14
16
|
url: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/replicator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "Replicate web pages with all their resources to local directories",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,18 +24,18 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/cli-core": "1.1
|
|
28
|
-
"@d-zero/dealer": "1.
|
|
29
|
-
"@d-zero/puppeteer-dealer": "0.5.
|
|
30
|
-
"@d-zero/puppeteer-page-scan": "4.2.
|
|
31
|
-
"@d-zero/puppeteer-scroll": "3.
|
|
32
|
-
"@d-zero/shared": "0.
|
|
27
|
+
"@d-zero/cli-core": "1.2.1",
|
|
28
|
+
"@d-zero/dealer": "1.4.1",
|
|
29
|
+
"@d-zero/puppeteer-dealer": "0.5.11",
|
|
30
|
+
"@d-zero/puppeteer-page-scan": "4.2.7",
|
|
31
|
+
"@d-zero/puppeteer-scroll": "3.1.1",
|
|
32
|
+
"@d-zero/shared": "0.14.0",
|
|
33
33
|
"ansi-colors": "4.1.3",
|
|
34
34
|
"minimist": "1.2.8",
|
|
35
|
-
"puppeteer": "24.
|
|
35
|
+
"puppeteer": "24.27.0"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/minimist": "1.2.5"
|
|
39
39
|
},
|
|
40
|
-
"gitHead": "
|
|
40
|
+
"gitHead": "2397932a3e3fe5f0631c0d540bdcac732fc71f19"
|
|
41
41
|
}
|