@d-zero/replicator 0.9.6 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/cli.js +4 -1
- package/dist/index.js +3 -2
- package/dist/resource-downloader.d.ts +2 -1
- package/dist/resource-downloader.js +3 -2
- package/dist/types.d.ts +1 -0
- package/package.json +8 -8
package/README.md
CHANGED
|
@@ -22,7 +22,8 @@ npx @d-zero/replicator <url...> -o <output-directory> [options]
|
|
|
22
22
|
- `-o, --output <dir>`: 出力ディレクトリ(必須)
|
|
23
23
|
- `-t, --timeout <ms>`: リクエストタイムアウト(ミリ秒、デフォルト: 30000)
|
|
24
24
|
- `-d, --devices <devices>`: デバイスプリセット(カンマ区切り、デフォルト: desktop-compact,mobile)
|
|
25
|
-
- `-l, --limit <number>`:
|
|
25
|
+
- `-l, --limit <number>`: ページスキャン(Phase 1)の並列処理数の上限(デフォルト: 3)
|
|
26
|
+
- `--download-limit <number>`: リソースダウンロード(Phase 2)の並列処理数の上限(デフォルト: 10)
|
|
26
27
|
- `--interval <ms>`: 並列実行間の間隔(デフォルト: なし)
|
|
27
28
|
- 数値または"min-max"形式でランダム範囲を指定可能
|
|
28
29
|
- `--only <type>`: ダウンロード対象を限定(`page` または `resource`)
|
|
@@ -92,7 +93,8 @@ await replicate({
|
|
|
92
93
|
'https://example.com/page3',
|
|
93
94
|
],
|
|
94
95
|
outputDir: './output',
|
|
95
|
-
limit: 2, //
|
|
96
|
+
limit: 2, // ページスキャン(Phase 1)は最大2並列
|
|
97
|
+
downloadLimit: 5, // リソースダウンロード(Phase 2)は最大5並列
|
|
96
98
|
});
|
|
97
99
|
|
|
98
100
|
// カスタムデバイス
|
package/dist/cli.js
CHANGED
|
@@ -23,7 +23,8 @@ const { options, args } = createCLI({
|
|
|
23
23
|
' -o, --output <dir> Output directory (required)',
|
|
24
24
|
' -t, --timeout <ms> Request timeout in milliseconds (default: 30000)',
|
|
25
25
|
' -d, --devices <devices> Device presets (comma-separated, default: desktop-compact,mobile)',
|
|
26
|
-
' -l, --limit <number> Parallel execution limit (default: 3)',
|
|
26
|
+
' -l, --limit <number> Parallel execution limit for page scan (default: 3)',
|
|
27
|
+
' --download-limit <number> Parallel execution limit for resource download (default: 10)',
|
|
27
28
|
' --interval <ms> Interval between parallel executions (default: none)',
|
|
28
29
|
' Format: number or "min-max" for random range',
|
|
29
30
|
' --only <type> Download only specified type: page or resource',
|
|
@@ -47,6 +48,7 @@ const { options, args } = createCLI({
|
|
|
47
48
|
timeout: cli.timeout ? Number(cli.timeout) : undefined,
|
|
48
49
|
devices: cli.devices,
|
|
49
50
|
limit: cli.limit ? Number(cli.limit) : undefined,
|
|
51
|
+
downloadLimit: cli['download-limit'] ? Number(cli['download-limit']) : undefined,
|
|
50
52
|
only: cli.only,
|
|
51
53
|
auth: cli.auth,
|
|
52
54
|
}),
|
|
@@ -77,6 +79,7 @@ try {
|
|
|
77
79
|
timeout: options.timeout,
|
|
78
80
|
devices,
|
|
79
81
|
limit: options.limit,
|
|
82
|
+
downloadLimit: options.downloadLimit,
|
|
80
83
|
only: options.only,
|
|
81
84
|
interval: options.interval,
|
|
82
85
|
username,
|
package/dist/index.js
CHANGED
|
@@ -103,7 +103,7 @@ async function collectAllResourceUrls(urls, targetSizes, timeout, verbose, limit
|
|
|
103
103
|
* @param options - Replication options
|
|
104
104
|
*/
|
|
105
105
|
export async function replicate(options) {
|
|
106
|
-
const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, only, interval, username, password, } = options;
|
|
106
|
+
const { urls, outputDir, verbose = false, timeout = 30_000, devices, limit = 3, downloadLimit = 10, only, interval, username, password, } = options;
|
|
107
107
|
if (urls.length === 0) {
|
|
108
108
|
throw new Error('At least one URL is required');
|
|
109
109
|
}
|
|
@@ -128,6 +128,7 @@ export async function replicate(options) {
|
|
|
128
128
|
progress(c.bold.cyan(`🌐 Replicating ${urls.length} URL(s)`));
|
|
129
129
|
progress(c.gray(` Output: ${outputDir}`));
|
|
130
130
|
progress(c.gray(` Parallel limit: ${limit}`));
|
|
131
|
+
progress(c.gray(` Download limit: ${downloadLimit}`));
|
|
131
132
|
progress('');
|
|
132
133
|
// Phase 1: Collect resource metadata from all URLs
|
|
133
134
|
let allEncodedUrls;
|
|
@@ -152,7 +153,7 @@ export async function replicate(options) {
|
|
|
152
153
|
// Use the first URL as base URL for constructing full URLs
|
|
153
154
|
const baseUrl = urls[0];
|
|
154
155
|
// Download all resources
|
|
155
|
-
await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only, interval, username, password);
|
|
156
|
+
await downloadResources([...allEncodedUrls], baseUrl, outputDir, progress, verbose, only, interval, username, password, downloadLimit);
|
|
156
157
|
progress('');
|
|
157
158
|
progress(c.bold.green(`✅ Replication complete!`));
|
|
158
159
|
progress(c.gray(` All resources saved to: ${outputDir}`));
|
|
@@ -19,5 +19,6 @@ import type { DelayOptions } from '@d-zero/shared/delay';
|
|
|
19
19
|
* @param interval
|
|
20
20
|
* @param username
|
|
21
21
|
* @param password
|
|
22
|
+
* @param limit
|
|
22
23
|
*/
|
|
23
|
-
export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource', interval?: number | DelayOptions, username?: string, password?: string): Promise<void>;
|
|
24
|
+
export declare function downloadResources(encodedPaths: string[], baseUrl: string, outputDir: string, logger: (message: string) => void, verbose?: boolean, only?: 'page' | 'resource', interval?: number | DelayOptions, username?: string, password?: string, limit?: number): Promise<void>;
|
|
@@ -23,8 +23,9 @@ import c from 'ansi-colors';
|
|
|
23
23
|
* @param interval
|
|
24
24
|
* @param username
|
|
25
25
|
* @param password
|
|
26
|
+
* @param limit
|
|
26
27
|
*/
|
|
27
|
-
export async function downloadResources(encodedPaths, baseUrl, outputDir, logger, verbose = false, only, interval, username, password) {
|
|
28
|
+
export async function downloadResources(encodedPaths, baseUrl, outputDir, logger, verbose = false, only, interval, username, password, limit = 10) {
|
|
28
29
|
const uniqueResources = new Map();
|
|
29
30
|
// Parse all encoded pathnames
|
|
30
31
|
for (const encodedPath of encodedPaths) {
|
|
@@ -110,7 +111,7 @@ export async function downloadResources(encodedPaths, baseUrl, outputDir, logger
|
|
|
110
111
|
update(c.green('✅ Downloaded'));
|
|
111
112
|
};
|
|
112
113
|
}, {
|
|
113
|
-
limit
|
|
114
|
+
limit,
|
|
114
115
|
verbose,
|
|
115
116
|
interval,
|
|
116
117
|
header: (progress, done, total, limit) => {
|
package/dist/types.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/replicator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.1",
|
|
4
4
|
"description": "Replicate web pages with all their resources to local directories",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,12 +24,12 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/cli-core": "1.3.
|
|
28
|
-
"@d-zero/dealer": "1.7.
|
|
29
|
-
"@d-zero/puppeteer-dealer": "0.7.
|
|
30
|
-
"@d-zero/puppeteer-page-scan": "4.
|
|
31
|
-
"@d-zero/puppeteer-scroll": "
|
|
32
|
-
"@d-zero/shared": "0.
|
|
27
|
+
"@d-zero/cli-core": "1.3.9",
|
|
28
|
+
"@d-zero/dealer": "1.7.5",
|
|
29
|
+
"@d-zero/puppeteer-dealer": "0.7.11",
|
|
30
|
+
"@d-zero/puppeteer-page-scan": "4.5.0",
|
|
31
|
+
"@d-zero/puppeteer-scroll": "4.0.0",
|
|
32
|
+
"@d-zero/shared": "0.22.0",
|
|
33
33
|
"ansi-colors": "4.1.3",
|
|
34
34
|
"minimist": "1.2.8",
|
|
35
35
|
"puppeteer": "24.37.5"
|
|
@@ -42,5 +42,5 @@
|
|
|
42
42
|
"url": "https://github.com/d-zero-dev/tools.git",
|
|
43
43
|
"directory": "packages/@d-zero/replicator"
|
|
44
44
|
},
|
|
45
|
-
"gitHead": "
|
|
45
|
+
"gitHead": "2d24e08c0cb516b7ea9d07a4301eb991193cca11"
|
|
46
46
|
}
|