@d-zero/replicator 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +138 -46
- package/package.json +7 -5
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { promises as fs } from 'node:fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { URL } from 'node:url';
|
|
4
|
+
import { beforePageScan } from '@d-zero/puppeteer-page-scan';
|
|
4
5
|
import { launch } from 'puppeteer';
|
|
5
6
|
/**
|
|
6
7
|
*
|
|
@@ -9,7 +10,12 @@ import { launch } from 'puppeteer';
|
|
|
9
10
|
* @param options
|
|
10
11
|
*/
|
|
11
12
|
export async function replicate(url, outputDir, options = {}) {
|
|
12
|
-
const { verbose = false,
|
|
13
|
+
const { verbose = false, timeout = 30_000, sizes } = options;
|
|
14
|
+
const defaultSizes = {
|
|
15
|
+
desktop: { width: 1280 },
|
|
16
|
+
mobile: { width: 375, resolution: 2 },
|
|
17
|
+
};
|
|
18
|
+
const targetSizes = sizes ?? defaultSizes;
|
|
13
19
|
const log = (message) => {
|
|
14
20
|
if (verbose) {
|
|
15
21
|
// eslint-disable-next-line no-console
|
|
@@ -22,39 +28,105 @@ export async function replicate(url, outputDir, options = {}) {
|
|
|
22
28
|
console.log(message);
|
|
23
29
|
};
|
|
24
30
|
const baseUrl = new URL(url);
|
|
25
|
-
const
|
|
31
|
+
const allResources = [];
|
|
26
32
|
progress(`🚀 Starting replication of ${url}`);
|
|
27
33
|
log(` Output directory: ${outputDir}`);
|
|
34
|
+
log(` Device sizes: ${Object.keys(targetSizes).join(', ')}`);
|
|
28
35
|
progress(`🌐 Launching browser...`);
|
|
29
36
|
const browser = await launch({
|
|
30
37
|
headless: true,
|
|
31
38
|
timeout,
|
|
32
39
|
});
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
try {
|
|
41
|
+
// Process each device size
|
|
42
|
+
for (const [sizeName, sizeConfig] of Object.entries(targetSizes)) {
|
|
43
|
+
const { width } = sizeConfig;
|
|
44
|
+
const resolution = 'resolution' in sizeConfig ? sizeConfig.resolution : undefined;
|
|
45
|
+
progress(`📱 Processing ${sizeName} (${width}px${resolution ? `, ${resolution}x` : ''})...`);
|
|
46
|
+
const page = await browser.newPage();
|
|
47
|
+
const sizeResources = [];
|
|
48
|
+
try {
|
|
49
|
+
await processPageForSize(page, url, baseUrl, sizeResources, {
|
|
50
|
+
sizeName,
|
|
51
|
+
width,
|
|
52
|
+
resolution,
|
|
53
|
+
timeout,
|
|
54
|
+
log,
|
|
55
|
+
progress,
|
|
56
|
+
});
|
|
57
|
+
// Merge resources, avoiding duplicates
|
|
58
|
+
for (const resource of sizeResources) {
|
|
59
|
+
const existing = allResources.find((r) => r.url === resource.url);
|
|
60
|
+
if (!existing) {
|
|
61
|
+
allResources.push(resource);
|
|
62
|
+
}
|
|
63
|
+
else if (!existing.content && resource.content) {
|
|
64
|
+
existing.content = resource.content;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
finally {
|
|
69
|
+
await page.close().catch((error) => {
|
|
70
|
+
log(`⚠️ Warning: Failed to close page for ${sizeName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const resourceCount = allResources.length;
|
|
75
|
+
const downloadedCount = allResources.filter((r) => r.content).length;
|
|
76
|
+
progress(`📄 Found ${resourceCount} total resources (${downloadedCount} downloaded successfully)`);
|
|
77
|
+
// Ensure output directory exists
|
|
78
|
+
progress(`📁 Creating output directory...`);
|
|
79
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
80
|
+
// Save all resources
|
|
81
|
+
progress(`💾 Saving files to disk...`);
|
|
82
|
+
const savedCount = await saveResources(allResources, outputDir, log, progress);
|
|
83
|
+
progress(`🎉 Replication complete! ${savedCount} files saved to ${outputDir}`);
|
|
84
|
+
}
|
|
85
|
+
finally {
|
|
86
|
+
progress(`🔧 Cleaning up browser...`);
|
|
87
|
+
await browser.close().catch((error) => {
|
|
88
|
+
// Log browser close errors but don't throw them
|
|
89
|
+
log(`⚠️ Warning: Failed to close browser: ${error instanceof Error ? error.message : String(error)}`);
|
|
90
|
+
});
|
|
38
91
|
}
|
|
39
|
-
|
|
40
|
-
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Process a page for a specific device size
|
|
95
|
+
* @param page
|
|
96
|
+
* @param url
|
|
97
|
+
* @param baseUrl
|
|
98
|
+
* @param resources
|
|
99
|
+
* @param options
|
|
100
|
+
* @param options.sizeName
|
|
101
|
+
* @param options.width
|
|
102
|
+
* @param options.resolution
|
|
103
|
+
* @param options.timeout
|
|
104
|
+
* @param options.log
|
|
105
|
+
* @param options.progress
|
|
106
|
+
*/
|
|
107
|
+
async function processPageForSize(page, url, baseUrl, resources, options) {
|
|
108
|
+
const { sizeName, width, resolution, log, progress } = options;
|
|
41
109
|
const requestPromises = [];
|
|
110
|
+
// Set up resource detection
|
|
42
111
|
page.on('request', (request) => {
|
|
43
112
|
const requestUrl = request.url();
|
|
44
113
|
const requestUrlObj = new URL(requestUrl);
|
|
45
114
|
// Only handle same-host resources
|
|
46
115
|
if (requestUrlObj.hostname === baseUrl.hostname) {
|
|
47
|
-
log(`📥 Intercepting: ${requestUrl}`);
|
|
116
|
+
log(`📥 [${sizeName}] Intercepting: ${requestUrl}`);
|
|
48
117
|
const localPath = urlToLocalPath(requestUrl);
|
|
49
118
|
const resourceType = getResourceType(requestUrl);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
119
|
+
// Check if this resource is already tracked
|
|
120
|
+
if (!resources.some((r) => r.url === requestUrl)) {
|
|
121
|
+
resources.push({
|
|
122
|
+
url: requestUrl,
|
|
123
|
+
localPath,
|
|
124
|
+
type: resourceType,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
55
127
|
}
|
|
56
128
|
else {
|
|
57
|
-
log(`🚫 Skipping external resource: ${requestUrl}`);
|
|
129
|
+
log(`🚫 [${sizeName}] Skipping external resource: ${requestUrl}`);
|
|
58
130
|
}
|
|
59
131
|
});
|
|
60
132
|
page.on('response', (response) => {
|
|
@@ -69,47 +141,67 @@ export async function replicate(url, outputDir, options = {}) {
|
|
|
69
141
|
.buffer()
|
|
70
142
|
.then((buffer) => {
|
|
71
143
|
resource.content = buffer;
|
|
72
|
-
log(`✅ Downloaded: ${responseUrl}`);
|
|
144
|
+
log(`✅ [${sizeName}] Downloaded: ${responseUrl}`);
|
|
73
145
|
})
|
|
74
146
|
.catch((error) => {
|
|
75
147
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
76
|
-
log(`❌ Failed to download: ${responseUrl} - ${errorMessage}`);
|
|
77
|
-
// Don't rethrow here as this would break the entire operation
|
|
78
|
-
// Individual resource failures should not stop the whole process
|
|
148
|
+
log(`❌ [${sizeName}] Failed to download: ${responseUrl} - ${errorMessage}`);
|
|
79
149
|
});
|
|
80
150
|
}
|
|
81
151
|
else if (resource) {
|
|
82
|
-
log(`❌ Resource failed (${response.status()}): ${responseUrl}`);
|
|
152
|
+
log(`❌ [${sizeName}] Resource failed (${response.status()}): ${responseUrl}`);
|
|
83
153
|
}
|
|
84
154
|
})();
|
|
85
155
|
requestPromises.push(promise);
|
|
86
156
|
}
|
|
87
157
|
});
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
158
|
+
// Set viewport and navigate using beforePageScan (which includes scrolling)
|
|
159
|
+
progress(`📡 [${sizeName}] Setting viewport and navigating...`);
|
|
160
|
+
await beforePageScan(page, url, {
|
|
161
|
+
name: sizeName,
|
|
162
|
+
width,
|
|
163
|
+
resolution,
|
|
164
|
+
listener: (phase, data) => {
|
|
165
|
+
switch (phase) {
|
|
166
|
+
case 'setViewport': {
|
|
167
|
+
const setViewportData = data;
|
|
168
|
+
log(`📱 [${sizeName}] Viewport set: ${setViewportData.width}px${setViewportData.resolution ? ` @ ${setViewportData.resolution}x` : ''}`);
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
case 'load': {
|
|
172
|
+
const loadData = data;
|
|
173
|
+
log(`📄 [${sizeName}] Page loaded (${loadData.type})`);
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
case 'scroll': {
|
|
177
|
+
const scrollData = data;
|
|
178
|
+
switch (scrollData.message) {
|
|
179
|
+
case 'Start scrolling': {
|
|
180
|
+
log(`📜 [${sizeName}] Starting scroll to trigger lazy loading...`);
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
case 'End of page': {
|
|
184
|
+
log(`📜 [${sizeName}] Scroll completed (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
case 'Scrolling': {
|
|
188
|
+
const progress = Math.round((scrollData.scrollY / scrollData.scrollHeight) * 100);
|
|
189
|
+
log(`📜 [${sizeName}] Scrolling progress: ${progress}% (${scrollData.scrollY}/${scrollData.scrollHeight}px)`);
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
// No default
|
|
193
|
+
}
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
},
|
|
198
|
+
});
|
|
199
|
+
progress(`⏳ [${sizeName}] Waiting for all resources to load...`);
|
|
200
|
+
// Wait for all downloads to complete
|
|
201
|
+
await Promise.all(requestPromises);
|
|
202
|
+
const resourceCount = resources.length;
|
|
203
|
+
const downloadedCount = resources.filter((r) => r.content).length;
|
|
204
|
+
progress(`📄 [${sizeName}] Found ${resourceCount} resources (${downloadedCount} downloaded)`);
|
|
113
205
|
}
|
|
114
206
|
/**
|
|
115
207
|
*
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@d-zero/replicator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Replicate web pages with all their resources to local directories",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,14 +24,16 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@d-zero/cli-core": "1.1.
|
|
28
|
-
"@d-zero/
|
|
27
|
+
"@d-zero/cli-core": "1.1.1",
|
|
28
|
+
"@d-zero/puppeteer-page-scan": "4.0.4",
|
|
29
|
+
"@d-zero/puppeteer-scroll": "3.0.5",
|
|
30
|
+
"@d-zero/shared": "0.9.2",
|
|
29
31
|
"ansi-colors": "4.1.3",
|
|
30
32
|
"minimist": "1.2.8",
|
|
31
|
-
"puppeteer": "24.
|
|
33
|
+
"puppeteer": "24.17.1"
|
|
32
34
|
},
|
|
33
35
|
"devDependencies": {
|
|
34
36
|
"@types/minimist": "1.2.5"
|
|
35
37
|
},
|
|
36
|
-
"gitHead": "
|
|
38
|
+
"gitHead": "6fa0fd190c54351762a70039cd8dfcafc51caa0b"
|
|
37
39
|
}
|