@d-zero/replicator 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -12
- package/dist/cli.js +27 -2
- package/dist/index.js +138 -46
- package/package.json +7 -5
package/README.md
CHANGED
|
@@ -1,36 +1,80 @@
|
|
|
1
|
-
#
|
|
1
|
+
# `@d-zero/replicator`
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
ウェブページとそのリソースをレスポンシブ画像対応でローカルディレクトリに複製するツールです。
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## インストール
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
npm install @d-zero/replicator
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## 使い方
|
|
12
12
|
|
|
13
13
|
### CLI
|
|
14
14
|
|
|
15
15
|
```bash
|
|
16
|
-
npx @d-zero/replicator <url> -o <output-directory>
|
|
16
|
+
npx @d-zero/replicator <url> -o <output-directory> [options]
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
#### オプション
|
|
20
|
+
|
|
21
|
+
- `-o, --output <dir>`: 出力ディレクトリ(必須)
|
|
22
|
+
- `-t, --timeout <ms>`: リクエストタイムアウト(ミリ秒、デフォルト: 30000)
|
|
23
|
+
- `-d, --devices <devices>`: デバイスプリセット(カンマ区切り、デフォルト: desktop-compact,mobile)
|
|
24
|
+
- `-v, --verbose`: 詳細ログモード
|
|
25
|
+
|
|
26
|
+
#### 利用可能なデバイスプリセット
|
|
27
|
+
|
|
28
|
+
- `desktop`: 1400px幅
|
|
29
|
+
- `tablet`: 768px幅
|
|
30
|
+
- `mobile`: 375px幅(2倍解像度)
|
|
31
|
+
- `desktop-hd`: 1920px幅
|
|
32
|
+
- `desktop-compact`: 1280px幅
|
|
33
|
+
- `mobile-large`: 414px幅(3倍解像度)
|
|
34
|
+
- `mobile-small`: 320px幅(2倍解像度)
|
|
35
|
+
|
|
36
|
+
#### 使用例
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# デフォルトデバイス(desktop-compact, mobile)
|
|
40
|
+
npx @d-zero/replicator https://example.com -o ./output
|
|
41
|
+
|
|
42
|
+
# カスタムデバイス指定
|
|
43
|
+
npx @d-zero/replicator https://example.com -o ./output --devices desktop,tablet,mobile
|
|
44
|
+
|
|
45
|
+
# タイムアウト指定
|
|
46
|
+
npx @d-zero/replicator https://example.com -o ./output --timeout 60000
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### プログラマティック使用
|
|
20
50
|
|
|
21
51
|
```typescript
|
|
22
52
|
import { replicate } from '@d-zero/replicator';
|
|
23
53
|
|
|
54
|
+
// デフォルトデバイス
|
|
24
55
|
await replicate('https://example.com', './output');
|
|
56
|
+
|
|
57
|
+
// カスタムデバイス
|
|
58
|
+
await replicate('https://example.com', './output', {
|
|
59
|
+
devices: {
|
|
60
|
+
desktop: { width: 1400 },
|
|
61
|
+
mobile: { width: 375, resolution: 2 },
|
|
62
|
+
},
|
|
63
|
+
timeout: 30000,
|
|
64
|
+
verbose: true,
|
|
65
|
+
});
|
|
25
66
|
```
|
|
26
67
|
|
|
27
|
-
##
|
|
68
|
+
## 機能
|
|
28
69
|
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
-
|
|
33
|
-
-
|
|
70
|
+
- **レスポンシブ画像対応**: 複数のデバイス幅で`<picture>`要素やメディアクエリのリソースを取得
|
|
71
|
+
- **遅延読み込み対応**: ページを自動スクロールして`loading=lazy`や`IntersectionObserver`ベースのコンテンツを取得
|
|
72
|
+
- **マルチデバイスシミュレーション**: 様々なデバイス幅と解像度をシミュレートして包括的なリソース取得を実現
|
|
73
|
+
- HTMLページのディレクトリ構造を保持してダウンロード
|
|
74
|
+
- 関連するすべてのリソース(CSS、JS、画像など)を取得
|
|
75
|
+
- リソース間の相対リンクを維持
|
|
76
|
+
- 同一ホストのリソースのみサポート
|
|
77
|
+
- 元のファイル拡張子とパスを保持
|
|
34
78
|
|
|
35
79
|
## License
|
|
36
80
|
|
package/dist/cli.js
CHANGED
|
@@ -1,15 +1,36 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { createCLI, parseCommonOptions } from '@d-zero/cli-core';
|
|
2
|
+
import { createCLI, parseCommonOptions, parseList } from '@d-zero/cli-core';
|
|
3
|
+
import { parseDevicesOption } from '@d-zero/puppeteer-page-scan';
|
|
3
4
|
import { replicate } from './index.js';
|
|
4
5
|
const { options, args } = createCLI({
|
|
5
6
|
aliases: {
|
|
6
7
|
o: 'output',
|
|
7
8
|
v: 'verbose',
|
|
9
|
+
t: 'timeout',
|
|
10
|
+
d: 'devices',
|
|
8
11
|
},
|
|
9
|
-
usage: [
|
|
12
|
+
usage: [
|
|
13
|
+
'Usage: replicator <url> -o <output-directory> [options]',
|
|
14
|
+
'',
|
|
15
|
+
'Options:',
|
|
16
|
+
' -o, --output <dir> Output directory (required)',
|
|
17
|
+
' -t, --timeout <ms> Request timeout in milliseconds (default: 30000)',
|
|
18
|
+
' -d, --devices <devices> Device presets (comma-separated, default: desktop-compact,mobile)',
|
|
19
|
+
' -v, --verbose Enable verbose logging',
|
|
20
|
+
'',
|
|
21
|
+
'Available device presets:',
|
|
22
|
+
' desktop, tablet, mobile, desktop-hd, desktop-compact, mobile-large, mobile-small',
|
|
23
|
+
'',
|
|
24
|
+
'Examples:',
|
|
25
|
+
' replicator https://example.com -o ./output',
|
|
26
|
+
' replicator https://example.com -o ./output --devices desktop,tablet',
|
|
27
|
+
' replicator https://example.com -o ./output --timeout 60000',
|
|
28
|
+
],
|
|
10
29
|
parseArgs: (cli) => ({
|
|
11
30
|
...parseCommonOptions(cli),
|
|
12
31
|
output: cli.output,
|
|
32
|
+
timeout: cli.timeout ? Number(cli.timeout) : undefined,
|
|
33
|
+
devices: cli.devices,
|
|
13
34
|
}),
|
|
14
35
|
validateArgs: (options, cli) => {
|
|
15
36
|
return !!(cli._.length > 0 && options.output);
|
|
@@ -23,8 +44,12 @@ if (!url || typeof url !== 'string') {
|
|
|
23
44
|
process.exit(1);
|
|
24
45
|
}
|
|
25
46
|
try {
|
|
47
|
+
const deviceNames = options.devices ? parseList(options.devices) : undefined;
|
|
48
|
+
const devices = parseDevicesOption(deviceNames);
|
|
26
49
|
await replicate(url, outputDir, {
|
|
27
50
|
verbose: options.verbose ?? false,
|
|
51
|
+
timeout: options.timeout,
|
|
52
|
+
devices,
|
|
28
53
|
});
|
|
29
54
|
// eslint-disable-next-line no-console
|
|
30
55
|
console.log(`✅ Successfully replicated ${url} to ${outputDir}`);
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { promises as fs } from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { URL } from 'node:url';
|
|
4
|
+
import { beforePageScan, devicePresets } from '@d-zero/puppeteer-page-scan';
|
|
4
5
|
import { launch } from 'puppeteer';
|
|
5
6
|
/**
|
|
6
7
|
*
|
|
@@ -9,7 +10,12 @@ import { launch } from 'puppeteer';
|
|
|
9
10
|
* @param options
|
|
10
11
|
*/
|
|
11
12
|
export async function replicate(url, outputDir, options = {}) {
|
|
12
|
-
const { verbose = false,
|
|
13
|
+
const { verbose = false, timeout = 30_000, devices } = options;
|
|
14
|
+
const defaultSizes = {
|
|
15
|
+
'desktop-compact': devicePresets['desktop-compact'],
|
|
16
|
+
mobile: devicePresets.mobile,
|
|
17
|
+
};
|
|
18
|
+
const targetSizes = devices ?? defaultSizes;
|
|
13
19
|
const log = (message) => {
|
|
14
20
|
if (verbose) {
|
|
15
21
|
// eslint-disable-next-line no-console
|
|
@@ -22,39 +28,105 @@ export async function replicate(url, outputDir, options = {}) {
|
|
|
22
28
|
console.log(message);
|
|
23
29
|
};
|
|
24
30
|
const baseUrl = new URL(url);
|
|
25
|
-
const
|
|
31
|
+
const allResources = [];
|
|
26
32
|
progress(`🚀 Starting replication of ${url}`);
|
|
27
33
|
log(` Output directory: ${outputDir}`);
|
|
34
|
+
log(` Device sizes: ${Object.keys(targetSizes).join(', ')}`);
|
|
28
35
|
progress(`🌐 Launching browser...`);
|
|
29
36
|
const browser = await launch({
|
|
30
37
|
headless: true,
|
|
31
38
|
timeout,
|
|
32
39
|
});
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
try {
|
|
41
|
+
// Process each device size
|
|
42
|
+
for (const [sizeName, sizeConfig] of Object.entries(targetSizes)) {
|
|
43
|
+
const { width } = sizeConfig;
|
|
44
|
+
const resolution = 'resolution' in sizeConfig ? sizeConfig.resolution : undefined;
|
|
45
|
+
progress(`📱 Processing ${sizeName} (${width}px${resolution ? `, ${resolution}x` : ''})...`);
|
|
46
|
+
const page = await browser.newPage();
|
|
47
|
+
const sizeResources = [];
|
|
48
|
+
try {
|
|
49
|
+
await processPageForSize(page, url, baseUrl, sizeResources, {
|
|
50
|
+
sizeName,
|
|
51
|
+
width,
|
|
52
|
+
resolution,
|
|
53
|
+
timeout,
|
|
54
|
+
log,
|
|
55
|
+
progress,
|
|
56
|
+
});
|
|
57
|
+
// Merge resources, avoiding duplicates
|
|
58
|
+
for (const resource of sizeResources) {
|
|
59
|
+
const existing = allResources.find((r) => r.url === resource.url);
|
|
60
|
+
if (!existing) {
|
|
61
|
+
allResources.push(resource);
|
|
62
|
+
}
|
|
63
|
+
else if (!existing.content && resource.content) {
|
|
64
|
+
existing.content = resource.content;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
finally {
|
|
69
|
+
await page.close().catch((error) => {
|
|
70
|
+
log(`⚠️ Warning: Failed to close page for ${sizeName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const resourceCount = allResources.length;
|
|
75
|
+
const downloadedCount = allResources.filter((r) => r.content).length;
|
|
76
|
+
progress(`📄 Found ${resourceCount} total resources (${downloadedCount} downloaded successfully)`);
|
|
77
|
+
// Ensure output directory exists
|
|
78
|
+
progress(`📁 Creating output directory...`);
|
|
79
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
80
|
+
// Save all resources
|
|
81
|
+
progress(`💾 Saving files to disk...`);
|
|
82
|
+
const savedCount = await saveResources(allResources, outputDir, log, progress);
|
|
83
|
+
progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
|
|
84
|
+
}
|
|
85
|
+
finally {
|
|
86
|
+
progress(`🔧 Cleaning up browser...`);
|
|
87
|
+
await browser.close().catch((error) => {
|
|
88
|
+
// Log browser close errors but don't throw them
|
|
89
|
+
log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
|
|
90
|
+
});
|
|
38
91
|
}
|
|
39
|
-
|
|
40
|
-
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Process a page for a specific device size
|
|
95
|
+
* @param page
|
|
96
|
+
* @param url
|
|
97
|
+
* @param baseUrl
|
|
98
|
+
* @param resources
|
|
99
|
+
* @param options
|
|
100
|
+
* @param options.sizeName
|
|
101
|
+
* @param options.width
|
|
102
|
+
* @param options.resolution
|
|
103
|
+
* @param options.timeout
|
|
104
|
+
* @param options.log
|
|
105
|
+
* @param options.progress
|
|
106
|
+
*/
|
|
107
|
+
async function processPageForSize(page, url, baseUrl, resources, options) {
|
|
108
|
+
const { sizeName, width, resolution, log, progress } = options;
|
|
41
109
|
const requestPromises = [];
|
|
110
|
+
// Set up resource detection
|
|
42
111
|
page.on('request', (request) => {
|
|
43
112
|
const requestUrl = request.url();
|
|
44
113
|
const requestUrlObj = new URL(requestUrl);
|
|
45
114
|
// Only handle same-host resources
|
|
46
115
|
if (requestUrlObj.hostname === baseUrl.hostname) {
|
|
47
|
-
log(`📥 Intercepting: ${requestUrl}`);
|
|
116
|
+
log(`📥 [${sizeName}] Intercepting: ${requestUrl}`);
|
|
48
117
|
const localPath = urlToLocalPath(requestUrl);
|
|
49
118
|
const resourceType = getResourceType(requestUrl);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
119
|
+
// Check if this resource is already tracked
|
|
120
|
+
if (!resources.some((r) => r.url === requestUrl)) {
|
|
121
|
+
resources.push({
|
|
122
|
+
url: requestUrl,
|
|
123
|
+
localPath,
|
|
124
|
+
type: resourceType,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
55
127
|
}
|
|
56
128
|
else {
|
|
57
|
-
log(`🚫 Skipping external resource: ${requestUrl}`);
|
|
129
|
+
log(`🚫 [${sizeName}] Skipping external resource: ${requestUrl}`);
|
|
58
130
|
}
|
|
59
131
|
});
|
|
60
132
|
page.on('response', (response) => {
|
|
@@ -69,47 +141,67 @@ export async function replicate(url, outputDir, options = {}) {
|
|
|
69
141
|
.buffer()
|
|
70
142
|
.then((buffer) => {
|
|
71
143
|
resource.content = buffer;
|
|
72
|
-
log(`✅ Downloaded: ${responseUrl}`);
|
|
144
|
+
log(`✅ [${sizeName}] Downloaded: ${responseUrl}`);
|
|
73
145
|
})
|
|
74
146
|
.catch((error) => {
|
|
75
147
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
76
|
-
log(`❌ Failed to download: ${responseUrl} - ${errorMessage}`);
|
|
77
|
-
// Don't rethrow here as this would break the entire operation
|
|
78
|
-
// Individual resource failures should not stop the whole process
|
|
148
|
+
log(`❌ [${sizeName}] Failed to download: ${responseUrl} - ${errorMessage}`);
|
|
79
149
|
});
|
|
80
150
|
}
|
|
81
151
|
else if (resource) {
|
|
82
|
-
log(`❌ Resource failed (${response.status()}): ${responseUrl}`);
|
|
152
|
+
log(`❌ [${sizeName}] Resource failed (${response.status()}): ${responseUrl}`);
|
|
83
153
|
}
|
|
84
154
|
})();
|
|
85
155
|
requestPromises.push(promise);
|
|
86
156
|
}
|
|
87
157
|
});
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
158
|
+
// Set viewport and navigate using beforePageScan (which includes scrolling)
|
|
159
|
+
progress(`📡 [${sizeName}] Setting viewport and navigating...`);
|
|
160
|
+
await beforePageScan(page, url, {
|
|
161
|
+
name: sizeName,
|
|
162
|
+
width,
|
|
163
|
+
resolution,
|
|
164
|
+
listener: (phase, data) => {
|
|
165
|
+
switch (phase) {
|
|
166
|
+
case 'setViewport': {
|
|
167
|
+
const setViewportData = data;
|
|
168
|
+
log(`📱 [${sizeName}] Viewport set: ${setViewportData.width}px${setViewportData.resolution ? ` @ ${setViewportData.resolution}x` : ''}`);
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
case 'load': {
|
|
172
|
+
const loadData = data;
|
|
173
|
+
log(`📄 [${sizeName}] Page loaded (${loadData.type})`);
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
case 'scroll': {
|
|
177
|
+
const scrollData = data;
|
|
178
|
+
switch (scrollData.message) {
|
|
179
|
+
case 'Start scrolling': {
|
|
180
|
+
log(`📜 [${sizeName}] Starting scroll to trigger lazy loading...`);
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
case 'End of page': {
|
|
184
|
+
log(`📜 [${sizeName}] Scroll completed (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
case 'Scrolling': {
|
|
188
|
+
const progress = Math.round((scrollData.scrollY / scrollData.scrollHeight) * 100);
|
|
189
|
+
log(`📜 [${sizeName}] Scrolling progress: ${progress}% (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
// No default
|
|
193
|
+
}
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
progress(`⏳ [${sizeName}] Waiting for all resources to load...`);
|
|
200
|
+
// Wait for all downloads to complete
|
|
201
|
+
await Promise.all(requestPromises);
|
|
202
|
+
const resourceCount = resources.length;
|
|
203
|
+
const downloadedCount = resources.filter((r) => r.content).length;
|
|
204
|
+
progress(`📄 [${sizeName}] Found ${resourceCount} resources (${downloadedCount} downloaded)`);
|
|
113
205
|
}
|
|
114
206
|
/**
|
|
115
207
|
*
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/replicator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Replicate web pages with all their resources to local directories",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,14 +24,16 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/cli-core": "1.1.
|
|
28
|
-
"@d-zero/
|
|
27
|
+
"@d-zero/cli-core": "1.1.1",
|
|
28
|
+
"@d-zero/puppeteer-page-scan": "4.1.0",
|
|
29
|
+
"@d-zero/puppeteer-scroll": "3.0.6",
|
|
30
|
+
"@d-zero/shared": "0.9.2",
|
|
29
31
|
"ansi-colors": "4.1.3",
|
|
30
32
|
"minimist": "1.2.8",
|
|
31
|
-
"puppeteer": "24.
|
|
33
|
+
"puppeteer": "24.18.0"
|
|
32
34
|
},
|
|
33
35
|
"devDependencies": {
|
|
34
36
|
"@types/minimist": "1.2.5"
|
|
35
37
|
},
|
|
36
|
-
"gitHead": "
|
|
38
|
+
"gitHead": "8081edac801400fed7c0b7ebeccc0ce66ccfe131"
|
|
37
39
|
}
|