@d-zero/replicator 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,36 +1,80 @@
1
- # @d-zero/replicator
1
+ # `@d-zero/replicator`
2
2
 
3
- Replicate web pages with all their resources to local directories
3
+ ウェブページとそのリソースをレスポンシブ画像対応でローカルディレクトリに複製するツールです。
4
4
 
5
- ## Installation
5
+ ## インストール
6
6
 
7
7
  ```bash
8
8
  npm install @d-zero/replicator
9
9
  ```
10
10
 
11
- ## Usage
11
+ ## 使い方
12
12
 
13
13
  ### CLI
14
14
 
15
15
  ```bash
16
- npx @d-zero/replicator <url> -o <output-directory>
16
+ npx @d-zero/replicator <url> -o <output-directory> [options]
17
17
  ```
18
18
 
19
- ### Programmatic
19
+ #### オプション
20
+
21
+ - `-o, --output <dir>`: 出力ディレクトリ(必須)
22
+ - `-t, --timeout <ms>`: リクエストタイムアウト(ミリ秒、デフォルト: 30000)
23
+ - `-d, --devices <devices>`: デバイスプリセット(カンマ区切り、デフォルト: desktop-compact,mobile)
24
+ - `-v, --verbose`: 詳細ログモード
25
+
26
+ #### 利用可能なデバイスプリセット
27
+
28
+ - `desktop`: 1400px幅
29
+ - `tablet`: 768px幅
30
+ - `mobile`: 375px幅(2倍解像度)
31
+ - `desktop-hd`: 1920px幅
32
+ - `desktop-compact`: 1280px幅
33
+ - `mobile-large`: 414px幅(3倍解像度)
34
+ - `mobile-small`: 320px幅(2倍解像度)
35
+
36
+ #### 使用例
37
+
38
+ ```bash
39
+ # デフォルトデバイス(desktop-compact, mobile)
40
+ npx @d-zero/replicator https://example.com -o ./output
41
+
42
+ # カスタムデバイス指定
43
+ npx @d-zero/replicator https://example.com -o ./output --devices desktop,tablet,mobile
44
+
45
+ # タイムアウト指定
46
+ npx @d-zero/replicator https://example.com -o ./output --timeout 60000
47
+ ```
48
+
49
+ ### プログラマティック使用
20
50
 
21
51
  ```typescript
22
52
  import { replicate } from '@d-zero/replicator';
23
53
 
54
+ // デフォルトデバイス
24
55
  await replicate('https://example.com', './output');
56
+
57
+ // カスタムデバイス
58
+ await replicate('https://example.com', './output', {
59
+ devices: {
60
+ desktop: { width: 1400 },
61
+ mobile: { width: 375, resolution: 2 },
62
+ },
63
+ timeout: 30000,
64
+ verbose: true,
65
+ });
25
66
  ```
26
67
 
27
- ## Features
68
+ ## 機能
28
69
 
29
- - Download HTML pages preserving directory structure
30
- - Fetch all related resources (CSS, JS, images, etc.)
31
- - Maintain relative links between resources
32
- - Support for same-host resources only
33
- - Preserve original file extensions and paths
70
+ - **レスポンシブ画像対応**: 複数のデバイス幅で`<picture>`要素やメディアクエリのリソースを取得
71
+ - **遅延読み込み対応**: ページを自動スクロールして`loading=lazy`や`IntersectionObserver`ベースのコンテンツを取得
72
+ - **マルチデバイスシミュレーション**: 様々なデバイス幅と解像度をシミュレートして包括的なリソース取得を実現
73
+ - HTMLページのディレクトリ構造を保持してダウンロード
74
+ - 関連するすべてのリソース(CSS、JS、画像など)を取得
75
+ - リソース間の相対リンクを維持
76
+ - 同一ホストのリソースのみサポート
77
+ - 元のファイル拡張子とパスを保持
34
78
 
35
79
  ## License
36
80
 
package/dist/cli.js CHANGED
@@ -1,15 +1,36 @@
1
1
  #!/usr/bin/env node
2
- import { createCLI, parseCommonOptions } from '@d-zero/cli-core';
2
+ import { createCLI, parseCommonOptions, parseList } from '@d-zero/cli-core';
3
+ import { parseDevicesOption } from '@d-zero/puppeteer-page-scan';
3
4
  import { replicate } from './index.js';
4
5
  const { options, args } = createCLI({
5
6
  aliases: {
6
7
  o: 'output',
7
8
  v: 'verbose',
9
+ t: 'timeout',
10
+ d: 'devices',
8
11
  },
9
- usage: ['Usage: replicator <url> -o <output-directory> [--verbose]'],
12
+ usage: [
13
+ 'Usage: replicator <url> -o <output-directory> [options]',
14
+ '',
15
+ 'Options:',
16
+ ' -o, --output <dir> Output directory (required)',
17
+ ' -t, --timeout <ms> Request timeout in milliseconds (default: 30000)',
18
+ ' -d, --devices <devices> Device presets (comma-separated, default: desktop-compact,mobile)',
19
+ ' -v, --verbose Enable verbose logging',
20
+ '',
21
+ 'Available device presets:',
22
+ ' desktop, tablet, mobile, desktop-hd, desktop-compact, mobile-large, mobile-small',
23
+ '',
24
+ 'Examples:',
25
+ ' replicator https://example.com -o ./output',
26
+ ' replicator https://example.com -o ./output --devices desktop,tablet',
27
+ ' replicator https://example.com -o ./output --timeout 60000',
28
+ ],
10
29
  parseArgs: (cli) => ({
11
30
  ...parseCommonOptions(cli),
12
31
  output: cli.output,
32
+ timeout: cli.timeout ? Number(cli.timeout) : undefined,
33
+ devices: cli.devices,
13
34
  }),
14
35
  validateArgs: (options, cli) => {
15
36
  return !!(cli._.length > 0 && options.output);
@@ -23,8 +44,12 @@ if (!url || typeof url !== 'string') {
23
44
  process.exit(1);
24
45
  }
25
46
  try {
47
+ const deviceNames = options.devices ? parseList(options.devices) : undefined;
48
+ const devices = parseDevicesOption(deviceNames);
26
49
  await replicate(url, outputDir, {
27
50
  verbose: options.verbose ?? false,
51
+ timeout: options.timeout,
52
+ devices,
28
53
  });
29
54
  // eslint-disable-next-line no-console
30
55
  console.log(`✅ Successfully replicated ${url} to ${outputDir}`);
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { promises as fs } from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { URL } from 'node:url';
4
+ import { beforePageScan, devicePresets } from '@d-zero/puppeteer-page-scan';
4
5
  import { launch } from 'puppeteer';
5
6
  /**
6
7
  *
@@ -9,7 +10,12 @@ import { launch } from 'puppeteer';
9
10
  * @param options
10
11
  */
11
12
  export async function replicate(url, outputDir, options = {}) {
12
- const { verbose = false, userAgent, timeout = 30_000 } = options;
13
+ const { verbose = false, timeout = 30_000, devices } = options;
14
+ const defaultSizes = {
15
+ 'desktop-compact': devicePresets['desktop-compact'],
16
+ mobile: devicePresets.mobile,
17
+ };
18
+ const targetSizes = devices ?? defaultSizes;
13
19
  const log = (message) => {
14
20
  if (verbose) {
15
21
  // eslint-disable-next-line no-console
@@ -22,39 +28,105 @@ export async function replicate(url, outputDir, options = {}) {
22
28
  console.log(message);
23
29
  };
24
30
  const baseUrl = new URL(url);
25
- const resources = [];
31
+ const allResources = [];
26
32
  progress(`🚀 Starting replication of ${url}`);
27
33
  log(` Output directory: ${outputDir}`);
34
+ log(` Device sizes: ${Object.keys(targetSizes).join(', ')}`);
28
35
  progress(`🌐 Launching browser...`);
29
36
  const browser = await launch({
30
37
  headless: true,
31
38
  timeout,
32
39
  });
33
- progress(`📄 Creating new page...`);
34
- const page = await browser.newPage();
35
- if (userAgent) {
36
- log(` Setting user agent: ${userAgent}`);
37
- await page.setUserAgent(userAgent);
40
+ try {
41
+ // Process each device size
42
+ for (const [sizeName, sizeConfig] of Object.entries(targetSizes)) {
43
+ const { width } = sizeConfig;
44
+ const resolution = 'resolution' in sizeConfig ? sizeConfig.resolution : undefined;
45
+ progress(`📱 Processing ${sizeName} (${width}px${resolution ? `, ${resolution}x` : ''})...`);
46
+ const page = await browser.newPage();
47
+ const sizeResources = [];
48
+ try {
49
+ await processPageForSize(page, url, baseUrl, sizeResources, {
50
+ sizeName,
51
+ width,
52
+ resolution,
53
+ timeout,
54
+ log,
55
+ progress,
56
+ });
57
+ // Merge resources, avoiding duplicates
58
+ for (const resource of sizeResources) {
59
+ const existing = allResources.find((r) => r.url === resource.url);
60
+ if (!existing) {
61
+ allResources.push(resource);
62
+ }
63
+ else if (!existing.content && resource.content) {
64
+ existing.content = resource.content;
65
+ }
66
+ }
67
+ }
68
+ finally {
69
+ await page.close().catch((error) => {
70
+ log(`⚠️ Warning: Failed to close page for ${sizeName}: ${error instanceof Error ? error.message : String(error)}`);
71
+ });
72
+ }
73
+ }
74
+ const resourceCount = allResources.length;
75
+ const downloadedCount = allResources.filter((r) => r.content).length;
76
+ progress(`📄 Found ${resourceCount} total resources (${downloadedCount} downloaded successfully)`);
77
+ // Ensure output directory exists
78
+ progress(`📁 Creating output directory...`);
79
+ await fs.mkdir(outputDir, { recursive: true });
80
+ // Save all resources
81
+ progress(`💾 Saving files to disk...`);
82
+ const savedCount = await saveResources(allResources, outputDir, log, progress);
83
+ progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
84
+ }
85
+ finally {
86
+ progress(`🔧 Cleaning up browser...`);
87
+ await browser.close().catch((error) => {
88
+ // Log browser close errors but don't throw them
89
+ log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
90
+ });
38
91
  }
39
- progress(`🔍 Setting up resource detection...`);
40
- // Collect all requests
92
+ }
93
+ /**
94
+ * Process a page for a specific device size
95
+ * @param page
96
+ * @param url
97
+ * @param baseUrl
98
+ * @param resources
99
+ * @param options
100
+ * @param options.sizeName
101
+ * @param options.width
102
+ * @param options.resolution
103
+ * @param options.timeout
104
+ * @param options.log
105
+ * @param options.progress
106
+ */
107
+ async function processPageForSize(page, url, baseUrl, resources, options) {
108
+ const { sizeName, width, resolution, log, progress } = options;
41
109
  const requestPromises = [];
110
+ // Set up resource detection
42
111
  page.on('request', (request) => {
43
112
  const requestUrl = request.url();
44
113
  const requestUrlObj = new URL(requestUrl);
45
114
  // Only handle same-host resources
46
115
  if (requestUrlObj.hostname === baseUrl.hostname) {
47
- log(`📥 Intercepting: ${requestUrl}`);
116
+ log(`📥 [${sizeName}] Intercepting: ${requestUrl}`);
48
117
  const localPath = urlToLocalPath(requestUrl);
49
118
  const resourceType = getResourceType(requestUrl);
50
- resources.push({
51
- url: requestUrl,
52
- localPath,
53
- type: resourceType,
54
- });
119
+ // Check if this resource is already tracked
120
+ if (!resources.some((r) => r.url === requestUrl)) {
121
+ resources.push({
122
+ url: requestUrl,
123
+ localPath,
124
+ type: resourceType,
125
+ });
126
+ }
55
127
  }
56
128
  else {
57
- log(`🚫 Skipping external resource: ${requestUrl}`);
129
+ log(`🚫 [${sizeName}] Skipping external resource: ${requestUrl}`);
58
130
  }
59
131
  });
60
132
  page.on('response', (response) => {
@@ -69,47 +141,67 @@ export async function replicate(url, outputDir, options = {}) {
69
141
  .buffer()
70
142
  .then((buffer) => {
71
143
  resource.content = buffer;
72
- log(`✅ Downloaded: ${responseUrl}`);
144
+ log(`✅ [${sizeName}] Downloaded: ${responseUrl}`);
73
145
  })
74
146
  .catch((error) => {
75
147
  const errorMessage = error instanceof Error ? error.message : String(error);
76
- log(`❌ Failed to download: ${responseUrl} - ${errorMessage}`);
77
- // Don't rethrow here as this would break the entire operation
78
- // Individual resource failures should not stop the whole process
148
+ log(`❌ [${sizeName}] Failed to download: ${responseUrl} - ${errorMessage}`);
79
149
  });
80
150
  }
81
151
  else if (resource) {
82
- log(`❌ Resource failed (${response.status()}): ${responseUrl}`);
152
+ log(`❌ [${sizeName}] Resource failed (${response.status()}): ${responseUrl}`);
83
153
  }
84
154
  })();
85
155
  requestPromises.push(promise);
86
156
  }
87
157
  });
88
- try {
89
- // Navigate to the page
90
- progress(`📡 Navigating to ${url}...`);
91
- await page.goto(url, { waitUntil: 'networkidle2', timeout });
92
- progress(`⏳ Waiting for all resources to load...`);
93
- // Wait for all downloads to complete
94
- await Promise.all(requestPromises);
95
- const resourceCount = resources.length;
96
- const downloadedCount = resources.filter((r) => r.content).length;
97
- progress(`📄 Found ${resourceCount} resources (${downloadedCount} downloaded successfully)`);
98
- // Ensure output directory exists
99
- progress(`📁 Creating output directory...`);
100
- await fs.mkdir(outputDir, { recursive: true });
101
- // Save all resources
102
- progress(`💾 Saving files to disk...`);
103
- const savedCount = await saveResources(resources, outputDir, log, progress);
104
- progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
105
- }
106
- finally {
107
- progress(`🔧 Cleaning up browser...`);
108
- await browser.close().catch((error) => {
109
- // Log browser close errors but don't throw them
110
- log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
111
- });
112
- }
158
+ // Set viewport and navigate using beforePageScan (which includes scrolling)
159
+ progress(`📡 [${sizeName}] Setting viewport and navigating...`);
160
+ await beforePageScan(page, url, {
161
+ name: sizeName,
162
+ width,
163
+ resolution,
164
+ listener: (phase, data) => {
165
+ switch (phase) {
166
+ case 'setViewport': {
167
+ const setViewportData = data;
168
+ log(`📱 [${sizeName}] Viewport set: ${setViewportData.width}px${setViewportData.resolution ? ` @ ${setViewportData.resolution}x` : ''}`);
169
+ break;
170
+ }
171
+ case 'load': {
172
+ const loadData = data;
173
+ log(`📄 [${sizeName}] Page loaded (${loadData.type})`);
174
+ break;
175
+ }
176
+ case 'scroll': {
177
+ const scrollData = data;
178
+ switch (scrollData.message) {
179
+ case 'Start scrolling': {
180
+ log(`📜 [${sizeName}] Starting scroll to trigger lazy loading...`);
181
+ break;
182
+ }
183
+ case 'End of page': {
184
+ log(`📜 [${sizeName}] Scroll completed (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
185
+ break;
186
+ }
187
+ case 'Scrolling': {
188
+ const progress = Math.round((scrollData.scrollY / scrollData.scrollHeight) * 100);
189
+ log(`📜 [${sizeName}] Scrolling progress: ${progress}% (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
190
+ break;
191
+ }
192
+ // No default
193
+ }
194
+ break;
195
+ }
196
+ }
197
+ },
198
+ });
199
+ progress(`⏳ [${sizeName}] Waiting for all resources to load...`);
200
+ // Wait for all downloads to complete
201
+ await Promise.all(requestPromises);
202
+ const resourceCount = resources.length;
203
+ const downloadedCount = resources.filter((r) => r.content).length;
204
+ progress(`📄 [${sizeName}] Found ${resourceCount} resources (${downloadedCount} downloaded)`);
113
205
  }
114
206
  /**
115
207
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/replicator",
3
- "version": "0.2.0",
3
+ "version": "0.4.0",
4
4
  "description": "Replicate web pages with all their resources to local directories",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -24,14 +24,16 @@
24
24
  "clean": "tsc --build --clean"
25
25
  },
26
26
  "dependencies": {
27
- "@d-zero/cli-core": "1.1.0",
28
- "@d-zero/shared": "0.9.1",
27
+ "@d-zero/cli-core": "1.1.1",
28
+ "@d-zero/puppeteer-page-scan": "4.1.0",
29
+ "@d-zero/puppeteer-scroll": "3.0.6",
30
+ "@d-zero/shared": "0.9.2",
29
31
  "ansi-colors": "4.1.3",
30
32
  "minimist": "1.2.8",
31
- "puppeteer": "24.12.0"
33
+ "puppeteer": "24.18.0"
32
34
  },
33
35
  "devDependencies": {
34
36
  "@types/minimist": "1.2.5"
35
37
  },
36
- "gitHead": "7cc778738d8c811adb69cee528655e12eba52e87"
38
+ "gitHead": "8081edac801400fed7c0b7ebeccc0ce66ccfe131"
37
39
  }