@d-zero/replicator 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +138 -46
  2. package/package.json +7 -5
package/dist/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { promises as fs } from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import { URL } from 'node:url';
4
+ import { beforePageScan } from '@d-zero/puppeteer-page-scan';
4
5
  import { launch } from 'puppeteer';
5
6
  /**
6
7
  *
@@ -9,7 +10,12 @@ import { launch } from 'puppeteer';
9
10
  * @param options
10
11
  */
11
12
  export async function replicate(url, outputDir, options = {}) {
12
- const { verbose = false, userAgent, timeout = 30_000 } = options;
13
+ const { verbose = false, timeout = 30_000, sizes } = options;
14
+ const defaultSizes = {
15
+ desktop: { width: 1280 },
16
+ mobile: { width: 375, resolution: 2 },
17
+ };
18
+ const targetSizes = sizes ?? defaultSizes;
13
19
  const log = (message) => {
14
20
  if (verbose) {
15
21
  // eslint-disable-next-line no-console
@@ -22,39 +28,105 @@ export async function replicate(url, outputDir, options = {}) {
22
28
  console.log(message);
23
29
  };
24
30
  const baseUrl = new URL(url);
25
- const resources = [];
31
+ const allResources = [];
26
32
  progress(`🚀 Starting replication of ${url}`);
27
33
  log(` Output directory: ${outputDir}`);
34
+ log(` Device sizes: ${Object.keys(targetSizes).join(', ')}`);
28
35
  progress(`🌐 Launching browser...`);
29
36
  const browser = await launch({
30
37
  headless: true,
31
38
  timeout,
32
39
  });
33
- progress(`📄 Creating new page...`);
34
- const page = await browser.newPage();
35
- if (userAgent) {
36
- log(` Setting user agent: ${userAgent}`);
37
- await page.setUserAgent(userAgent);
40
+ try {
41
+ // Process each device size
42
+ for (const [sizeName, sizeConfig] of Object.entries(targetSizes)) {
43
+ const { width } = sizeConfig;
44
+ const resolution = 'resolution' in sizeConfig ? sizeConfig.resolution : undefined;
45
+ progress(`📱 Processing ${sizeName} (${width}px${resolution ? `, ${resolution}x` : ''})...`);
46
+ const page = await browser.newPage();
47
+ const sizeResources = [];
48
+ try {
49
+ await processPageForSize(page, url, baseUrl, sizeResources, {
50
+ sizeName,
51
+ width,
52
+ resolution,
53
+ timeout,
54
+ log,
55
+ progress,
56
+ });
57
+ // Merge resources, avoiding duplicates
58
+ for (const resource of sizeResources) {
59
+ const existing = allResources.find((r) => r.url === resource.url);
60
+ if (!existing) {
61
+ allResources.push(resource);
62
+ }
63
+ else if (!existing.content && resource.content) {
64
+ existing.content = resource.content;
65
+ }
66
+ }
67
+ }
68
+ finally {
69
+ await page.close().catch((error) => {
70
+ log(`⚠️ Warning: Failed to close page for ${sizeName}: ${error instanceof Error ? error.message : String(error)}`);
71
+ });
72
+ }
73
+ }
74
+ const resourceCount = allResources.length;
75
+ const downloadedCount = allResources.filter((r) => r.content).length;
76
+ progress(`📄 Found ${resourceCount} total resources (${downloadedCount} downloaded successfully)`);
77
+ // Ensure output directory exists
78
+ progress(`📁 Creating output directory...`);
79
+ await fs.mkdir(outputDir, { recursive: true });
80
+ // Save all resources
81
+ progress(`💾 Saving files to disk...`);
82
+ const savedCount = await saveResources(allResources, outputDir, log, progress);
83
+ progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
84
+ }
85
+ finally {
86
+ progress(`🔧 Cleaning up browser...`);
87
+ await browser.close().catch((error) => {
88
+ // Log browser close errors but don't throw them
89
+ log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
90
+ });
38
91
  }
39
- progress(`🔍 Setting up resource detection...`);
40
- // Collect all requests
92
+ }
93
+ /**
94
+ * Process a page for a specific device size
95
+ * @param page
96
+ * @param url
97
+ * @param baseUrl
98
+ * @param resources
99
+ * @param options
100
+ * @param options.sizeName
101
+ * @param options.width
102
+ * @param options.resolution
103
+ * @param options.timeout
104
+ * @param options.log
105
+ * @param options.progress
106
+ */
107
+ async function processPageForSize(page, url, baseUrl, resources, options) {
108
+ const { sizeName, width, resolution, log, progress } = options;
41
109
  const requestPromises = [];
110
+ // Set up resource detection
42
111
  page.on('request', (request) => {
43
112
  const requestUrl = request.url();
44
113
  const requestUrlObj = new URL(requestUrl);
45
114
  // Only handle same-host resources
46
115
  if (requestUrlObj.hostname === baseUrl.hostname) {
47
- log(`📥 Intercepting: ${requestUrl}`);
116
+ log(`📥 [${sizeName}] Intercepting: ${requestUrl}`);
48
117
  const localPath = urlToLocalPath(requestUrl);
49
118
  const resourceType = getResourceType(requestUrl);
50
- resources.push({
51
- url: requestUrl,
52
- localPath,
53
- type: resourceType,
54
- });
119
+ // Check if this resource is already tracked
120
+ if (!resources.some((r) => r.url === requestUrl)) {
121
+ resources.push({
122
+ url: requestUrl,
123
+ localPath,
124
+ type: resourceType,
125
+ });
126
+ }
55
127
  }
56
128
  else {
57
- log(`🚫 Skipping external resource: ${requestUrl}`);
129
+ log(`🚫 [${sizeName}] Skipping external resource: ${requestUrl}`);
58
130
  }
59
131
  });
60
132
  page.on('response', (response) => {
@@ -69,47 +141,67 @@ export async function replicate(url, outputDir, options = {}) {
69
141
  .buffer()
70
142
  .then((buffer) => {
71
143
  resource.content = buffer;
72
- log(`✅ Downloaded: ${responseUrl}`);
144
+ log(`✅ [${sizeName}] Downloaded: ${responseUrl}`);
73
145
  })
74
146
  .catch((error) => {
75
147
  const errorMessage = error instanceof Error ? error.message : String(error);
76
- log(`❌ Failed to download: ${responseUrl} - ${errorMessage}`);
77
- // Don't rethrow here as this would break the entire operation
78
- // Individual resource failures should not stop the whole process
148
+ log(`❌ [${sizeName}] Failed to download: ${responseUrl} - ${errorMessage}`);
79
149
  });
80
150
  }
81
151
  else if (resource) {
82
- log(`❌ Resource failed (${response.status()}): ${responseUrl}`);
152
+ log(`❌ [${sizeName}] Resource failed (${response.status()}): ${responseUrl}`);
83
153
  }
84
154
  })();
85
155
  requestPromises.push(promise);
86
156
  }
87
157
  });
88
- try {
89
- // Navigate to the page
90
- progress(`📡 Navigating to ${url}...`);
91
- await page.goto(url, { waitUntil: 'networkidle2', timeout });
92
- progress(`⏳ Waiting for all resources to load...`);
93
- // Wait for all downloads to complete
94
- await Promise.all(requestPromises);
95
- const resourceCount = resources.length;
96
- const downloadedCount = resources.filter((r) => r.content).length;
97
- progress(`📄 Found ${resourceCount} resources (${downloadedCount} downloaded successfully)`);
98
- // Ensure output directory exists
99
- progress(`📁 Creating output directory...`);
100
- await fs.mkdir(outputDir, { recursive: true });
101
- // Save all resources
102
- progress(`💾 Saving files to disk...`);
103
- const savedCount = await saveResources(resources, outputDir, log, progress);
104
- progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
105
- }
106
- finally {
107
- progress(`🔧 Cleaning up browser...`);
108
- await browser.close().catch((error) => {
109
- // Log browser close errors but don't throw them
110
- log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
111
- });
112
- }
158
+ // Set viewport and navigate using beforePageScan (which includes scrolling)
159
+ progress(`📡 [${sizeName}] Setting viewport and navigating...`);
160
+ await beforePageScan(page, url, {
161
+ name: sizeName,
162
+ width,
163
+ resolution,
164
+ listener: (phase, data) => {
165
+ switch (phase) {
166
+ case 'setViewport': {
167
+ const setViewportData = data;
168
+ log(`📱 [${sizeName}] Viewport set: ${setViewportData.width}px${setViewportData.resolution ? ` @ ${setViewportData.resolution}x` : ''}`);
169
+ break;
170
+ }
171
+ case 'load': {
172
+ const loadData = data;
173
+ log(`📄 [${sizeName}] Page loaded (${loadData.type})`);
174
+ break;
175
+ }
176
+ case 'scroll': {
177
+ const scrollData = data;
178
+ switch (scrollData.message) {
179
+ case 'Start scrolling': {
180
+ log(`📜 [${sizeName}] Starting scroll to trigger lazy loading...`);
181
+ break;
182
+ }
183
+ case 'End of page': {
184
+ log(`📜 [${sizeName}] Scroll completed (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
185
+ break;
186
+ }
187
+ case 'Scrolling': {
188
+ const progress = Math.round((scrollData.scrollY / scrollData.scrollHeight) * 100);
189
+ log(`📜 [${sizeName}] Scrolling progress: ${progress}% (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
190
+ break;
191
+ }
192
+ // No default
193
+ }
194
+ break;
195
+ }
196
+ }
197
+ },
198
+ });
199
+ progress(`⏳ [${sizeName}] Waiting for all resources to load...`);
200
+ // Wait for all downloads to complete
201
+ await Promise.all(requestPromises);
202
+ const resourceCount = resources.length;
203
+ const downloadedCount = resources.filter((r) => r.content).length;
204
+ progress(`📄 [${sizeName}] Found ${resourceCount} resources (${downloadedCount} downloaded)`);
113
205
  }
114
206
  /**
115
207
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/replicator",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "Replicate web pages with all their resources to local directories",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -24,14 +24,16 @@
24
24
  "clean": "tsc --build --clean"
25
25
  },
26
26
  "dependencies": {
27
- "@d-zero/cli-core": "1.1.0",
28
- "@d-zero/shared": "0.9.1",
27
+ "@d-zero/cli-core": "1.1.1",
28
+ "@d-zero/puppeteer-page-scan": "4.0.4",
29
+ "@d-zero/puppeteer-scroll": "3.0.5",
30
+ "@d-zero/shared": "0.9.2",
29
31
  "ansi-colors": "4.1.3",
30
32
  "minimist": "1.2.8",
31
- "puppeteer": "24.12.0"
33
+ "puppeteer": "24.17.1"
32
34
  },
33
35
  "devDependencies": {
34
36
  "@types/minimist": "1.2.5"
35
37
  },
36
- "gitHead": "7cc778738d8c811adb69cee528655e12eba52e87"
38
+ "gitHead": "6fa0fd190c54351762a70039cd8dfcafc51caa0b"
37
39
  }