smippo 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +116 -0
- package/bin/smippo.js +5 -0
- package/package.json +100 -0
- package/src/cli.js +437 -0
- package/src/crawler.js +408 -0
- package/src/filter.js +155 -0
- package/src/index.js +60 -0
- package/src/interactive.js +391 -0
- package/src/link-extractor.js +212 -0
- package/src/link-rewriter.js +293 -0
- package/src/manifest.js +163 -0
- package/src/page-capture.js +151 -0
- package/src/progress.js +190 -0
- package/src/resource-saver.js +210 -0
- package/src/robots.js +104 -0
- package/src/screenshot.js +185 -0
- package/src/server.js +603 -0
- package/src/utils/logger.js +74 -0
- package/src/utils/path.js +76 -0
- package/src/utils/url.js +295 -0
- package/src/utils/version.js +14 -0
package/src/progress.js
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
// @flow
|
|
2
|
+
import cliProgress from 'cli-progress';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Create a styled multi-bar progress display for Smippo
|
|
7
|
+
*/
|
|
8
|
+
export function createProgressDisplay(options = {}) {
|
|
9
|
+
const {quiet = false, verbose = false} = options;
|
|
10
|
+
|
|
11
|
+
if (quiet) {
|
|
12
|
+
return {
|
|
13
|
+
start: () => {},
|
|
14
|
+
stop: () => {},
|
|
15
|
+
updatePage: () => {},
|
|
16
|
+
updateAsset: () => {},
|
|
17
|
+
log: () => {},
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Create multi-bar container
|
|
22
|
+
const multibar = new cliProgress.MultiBar(
|
|
23
|
+
{
|
|
24
|
+
clearOnComplete: false,
|
|
25
|
+
hideCursor: true,
|
|
26
|
+
format: (options, params, payload) => {
|
|
27
|
+
const {type, name} = payload;
|
|
28
|
+
const bar = options.barCompleteChar.repeat(
|
|
29
|
+
Math.round(params.progress * 30),
|
|
30
|
+
);
|
|
31
|
+
const empty = options.barIncompleteChar.repeat(
|
|
32
|
+
30 - Math.round(params.progress * 30),
|
|
33
|
+
);
|
|
34
|
+
const percent = Math.round(params.progress * 100);
|
|
35
|
+
|
|
36
|
+
if (type === 'pages') {
|
|
37
|
+
return ` ${chalk.cyan('Pages')} ${chalk.cyan('[')}${chalk.cyan(bar)}${chalk.dim(empty)}${chalk.cyan(']')} ${chalk.bold(params.value)}/${params.total} ${chalk.dim(`(${percent}%)`)}`;
|
|
38
|
+
} else if (type === 'assets') {
|
|
39
|
+
return ` ${chalk.magenta('Assets')} ${chalk.magenta('[')}${chalk.magenta(bar)}${chalk.dim(empty)}${chalk.magenta(']')} ${chalk.bold(params.value)}/${params.total} ${chalk.dim(`(${percent}%)`)}`;
|
|
40
|
+
} else if (type === 'current') {
|
|
41
|
+
const truncated = name.length > 50 ? '...' + name.slice(-47) : name;
|
|
42
|
+
return ` ${chalk.dim('Current:')} ${chalk.white(truncated)}`;
|
|
43
|
+
}
|
|
44
|
+
return '';
|
|
45
|
+
},
|
|
46
|
+
barCompleteChar: '█',
|
|
47
|
+
barIncompleteChar: '░',
|
|
48
|
+
},
|
|
49
|
+
cliProgress.Presets.shades_classic,
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
let pageBar = null;
|
|
53
|
+
let assetBar = null;
|
|
54
|
+
let currentBar = null;
|
|
55
|
+
let totalPages = 0;
|
|
56
|
+
let totalAssets = 0;
|
|
57
|
+
let completedPages = 0;
|
|
58
|
+
let completedAssets = 0;
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
/**
|
|
62
|
+
* Start the progress display
|
|
63
|
+
*/
|
|
64
|
+
start(estimatedPages = 1, estimatedAssets = 10) {
|
|
65
|
+
totalPages = estimatedPages;
|
|
66
|
+
totalAssets = estimatedAssets;
|
|
67
|
+
completedPages = 0;
|
|
68
|
+
completedAssets = 0;
|
|
69
|
+
|
|
70
|
+
console.log('');
|
|
71
|
+
pageBar = multibar.create(totalPages, 0, {type: 'pages'});
|
|
72
|
+
assetBar = multibar.create(totalAssets, 0, {type: 'assets'});
|
|
73
|
+
currentBar = multibar.create(100, 0, {
|
|
74
|
+
type: 'current',
|
|
75
|
+
name: 'Initializing...',
|
|
76
|
+
});
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Stop the progress display
|
|
81
|
+
*/
|
|
82
|
+
stop() {
|
|
83
|
+
if (multibar) {
|
|
84
|
+
multibar.stop();
|
|
85
|
+
}
|
|
86
|
+
console.log('');
|
|
87
|
+
},
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Update page progress
|
|
91
|
+
*/
|
|
92
|
+
updatePage(url, total = null) {
|
|
93
|
+
completedPages++;
|
|
94
|
+
if (total && total > totalPages) {
|
|
95
|
+
totalPages = total;
|
|
96
|
+
if (pageBar) pageBar.setTotal(totalPages);
|
|
97
|
+
}
|
|
98
|
+
if (pageBar) pageBar.update(completedPages);
|
|
99
|
+
if (currentBar) currentBar.update(0, {name: url});
|
|
100
|
+
},
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Update asset progress
|
|
104
|
+
*/
|
|
105
|
+
updateAsset(url, total = null) {
|
|
106
|
+
completedAssets++;
|
|
107
|
+
if (total && total > totalAssets) {
|
|
108
|
+
totalAssets = total;
|
|
109
|
+
if (assetBar) assetBar.setTotal(totalAssets);
|
|
110
|
+
}
|
|
111
|
+
if (assetBar) assetBar.update(completedAssets);
|
|
112
|
+
if (currentBar && verbose) currentBar.update(0, {name: url});
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Increment total assets estimate
|
|
117
|
+
*/
|
|
118
|
+
addAssets(count) {
|
|
119
|
+
totalAssets += count;
|
|
120
|
+
if (assetBar) assetBar.setTotal(totalAssets);
|
|
121
|
+
},
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Log a message (pauses bars)
|
|
125
|
+
*/
|
|
126
|
+
log(message) {
|
|
127
|
+
if (verbose) {
|
|
128
|
+
multibar.log(message + '\n');
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Update current status
|
|
134
|
+
*/
|
|
135
|
+
setStatus(status) {
|
|
136
|
+
if (currentBar) currentBar.update(0, {name: status});
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Simple spinner-based progress for non-TTY environments
|
|
143
|
+
*/
|
|
144
|
+
export function createSimpleProgress(options = {}) {
|
|
145
|
+
const {quiet = false, verbose = false} = options;
|
|
146
|
+
|
|
147
|
+
let pageCount = 0;
|
|
148
|
+
let assetCount = 0;
|
|
149
|
+
let lastUpdate = Date.now();
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
start: () => {
|
|
153
|
+
if (!quiet) console.log(chalk.cyan('\n Starting capture...\n'));
|
|
154
|
+
},
|
|
155
|
+
stop: () => {},
|
|
156
|
+
updatePage: url => {
|
|
157
|
+
pageCount++;
|
|
158
|
+
if (!quiet && (verbose || Date.now() - lastUpdate > 500)) {
|
|
159
|
+
process.stdout.write(
|
|
160
|
+
`\r ${chalk.cyan('Pages:')} ${pageCount} ${chalk.magenta('Assets:')} ${assetCount} ${chalk.dim('Current:')} ${url.slice(0, 50)}${url.length > 50 ? '...' : ''} `,
|
|
161
|
+
);
|
|
162
|
+
lastUpdate = Date.now();
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
updateAsset: () => {
|
|
166
|
+
assetCount++;
|
|
167
|
+
if (!quiet && verbose) {
|
|
168
|
+
process.stdout.write(
|
|
169
|
+
`\r ${chalk.cyan('Pages:')} ${pageCount} ${chalk.magenta('Assets:')} ${assetCount} `,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
addAssets: () => {},
|
|
174
|
+
log: message => {
|
|
175
|
+
if (verbose) console.log(message);
|
|
176
|
+
},
|
|
177
|
+
setStatus: () => {},
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Choose the appropriate progress display based on environment
|
|
183
|
+
*/
|
|
184
|
+
export function createProgress(options = {}) {
|
|
185
|
+
// Use simple progress if not a TTY or if specifically requested
|
|
186
|
+
if (!process.stdout.isTTY || options.simple) {
|
|
187
|
+
return createSimpleProgress(options);
|
|
188
|
+
}
|
|
189
|
+
return createProgressDisplay(options);
|
|
190
|
+
}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
// @flow
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import {urlToPath} from './utils/url.js';
|
|
5
|
+
import {sanitizePath, joinPath} from './utils/path.js';
|
|
6
|
+
import mime from 'mime-types';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Save resources to disk
|
|
10
|
+
*/
|
|
11
|
+
export class ResourceSaver {
|
|
12
|
+
constructor(options = {}) {
|
|
13
|
+
this.outputDir = options.output || './site';
|
|
14
|
+
this.structure = options.structure || 'original';
|
|
15
|
+
this.savedFiles = new Map(); // URL -> local path
|
|
16
|
+
this.stats = {
|
|
17
|
+
files: 0,
|
|
18
|
+
bytes: 0,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Get the local path for a URL
|
|
24
|
+
*/
|
|
25
|
+
getLocalPath(url) {
|
|
26
|
+
const relativePath = urlToPath(url, this.structure);
|
|
27
|
+
return joinPath(this.outputDir, sanitizePath(relativePath));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Save HTML content
|
|
32
|
+
*/
|
|
33
|
+
async saveHtml(url, html, _options = {}) {
|
|
34
|
+
const relativePath = urlToPath(url, this.structure);
|
|
35
|
+
const localPath = joinPath(this.outputDir, sanitizePath(relativePath));
|
|
36
|
+
|
|
37
|
+
// Ensure directory exists
|
|
38
|
+
await fs.ensureDir(path.dirname(localPath));
|
|
39
|
+
|
|
40
|
+
// Write the file
|
|
41
|
+
await fs.writeFile(localPath, html, 'utf8');
|
|
42
|
+
|
|
43
|
+
// Store the relative path (not full path) for link rewriting
|
|
44
|
+
this.savedFiles.set(url, sanitizePath(relativePath));
|
|
45
|
+
this.stats.files++;
|
|
46
|
+
this.stats.bytes += Buffer.byteLength(html, 'utf8');
|
|
47
|
+
|
|
48
|
+
return localPath;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Save a resource (binary or text)
|
|
53
|
+
*/
|
|
54
|
+
async saveResource(url, resource) {
|
|
55
|
+
let relativePath = urlToPath(url, this.structure);
|
|
56
|
+
relativePath = sanitizePath(relativePath);
|
|
57
|
+
let localPath = joinPath(this.outputDir, relativePath);
|
|
58
|
+
|
|
59
|
+
// Fix extension based on content type if needed
|
|
60
|
+
localPath = this._fixExtension(localPath, resource.contentType);
|
|
61
|
+
relativePath = this._fixExtension(relativePath, resource.contentType);
|
|
62
|
+
|
|
63
|
+
// Ensure directory exists
|
|
64
|
+
await fs.ensureDir(path.dirname(localPath));
|
|
65
|
+
|
|
66
|
+
// Write the file
|
|
67
|
+
await fs.writeFile(localPath, resource.body);
|
|
68
|
+
|
|
69
|
+
// Store the relative path (not full path) for link rewriting
|
|
70
|
+
this.savedFiles.set(url, relativePath);
|
|
71
|
+
this.stats.files++;
|
|
72
|
+
this.stats.bytes += resource.size;
|
|
73
|
+
|
|
74
|
+
return localPath;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Save multiple resources
|
|
79
|
+
*/
|
|
80
|
+
async saveResources(resources) {
|
|
81
|
+
const saved = [];
|
|
82
|
+
|
|
83
|
+
for (const [url, resource] of resources) {
|
|
84
|
+
try {
|
|
85
|
+
const localPath = await this.saveResource(url, resource);
|
|
86
|
+
saved.push({url, localPath, size: resource.size});
|
|
87
|
+
} catch (error) {
|
|
88
|
+
// Continue saving other resources
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return saved;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Save a screenshot
|
|
97
|
+
*/
|
|
98
|
+
async saveScreenshot(url, screenshot) {
|
|
99
|
+
const basePath = this.getLocalPath(url);
|
|
100
|
+
const screenshotPath = basePath.replace(/\.html?$/i, '.png');
|
|
101
|
+
|
|
102
|
+
await fs.ensureDir(path.dirname(screenshotPath));
|
|
103
|
+
await fs.writeFile(screenshotPath, screenshot);
|
|
104
|
+
|
|
105
|
+
return screenshotPath;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Save a PDF
|
|
110
|
+
*/
|
|
111
|
+
async savePdf(url, pdf) {
|
|
112
|
+
const basePath = this.getLocalPath(url);
|
|
113
|
+
const pdfPath = basePath.replace(/\.html?$/i, '.pdf');
|
|
114
|
+
|
|
115
|
+
await fs.ensureDir(path.dirname(pdfPath));
|
|
116
|
+
await fs.writeFile(pdfPath, pdf);
|
|
117
|
+
|
|
118
|
+
return pdfPath;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Get URL to local path mapping
|
|
123
|
+
*/
|
|
124
|
+
getUrlMap() {
|
|
125
|
+
return new Map(this.savedFiles);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Get relative path from output directory
|
|
130
|
+
*/
|
|
131
|
+
getRelativePath(localPath) {
|
|
132
|
+
return path.relative(this.outputDir, localPath);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Fix file extension based on content type
|
|
137
|
+
*/
|
|
138
|
+
_fixExtension(filePath, contentType) {
|
|
139
|
+
if (!contentType) return filePath;
|
|
140
|
+
|
|
141
|
+
const mimeType = contentType.split(';')[0].trim();
|
|
142
|
+
const expectedExt = mime.extension(mimeType);
|
|
143
|
+
|
|
144
|
+
if (!expectedExt) return filePath;
|
|
145
|
+
|
|
146
|
+
const currentExt = path.extname(filePath).slice(1).toLowerCase();
|
|
147
|
+
|
|
148
|
+
// Don't change if extension seems correct
|
|
149
|
+
const equivalentExtensions = {
|
|
150
|
+
jpeg: ['jpg', 'jpeg'],
|
|
151
|
+
htm: ['html', 'htm'],
|
|
152
|
+
js: ['js', 'mjs', 'cjs'],
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
const isEquivalent = Object.values(equivalentExtensions).some(
|
|
156
|
+
group => group.includes(currentExt) && group.includes(expectedExt),
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
if (isEquivalent || currentExt === expectedExt) {
|
|
160
|
+
return filePath;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Only fix if current extension is wrong or missing
|
|
164
|
+
if (!currentExt || !isKnownExtension(currentExt)) {
|
|
165
|
+
return `${filePath}.${expectedExt}`;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return filePath;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Check if extension is known
|
|
174
|
+
*/
|
|
175
|
+
function isKnownExtension(ext) {
|
|
176
|
+
const known = [
|
|
177
|
+
'html',
|
|
178
|
+
'htm',
|
|
179
|
+
'css',
|
|
180
|
+
'js',
|
|
181
|
+
'mjs',
|
|
182
|
+
'json',
|
|
183
|
+
'xml',
|
|
184
|
+
'png',
|
|
185
|
+
'jpg',
|
|
186
|
+
'jpeg',
|
|
187
|
+
'gif',
|
|
188
|
+
'webp',
|
|
189
|
+
'svg',
|
|
190
|
+
'ico',
|
|
191
|
+
'bmp',
|
|
192
|
+
'woff',
|
|
193
|
+
'woff2',
|
|
194
|
+
'ttf',
|
|
195
|
+
'eot',
|
|
196
|
+
'otf',
|
|
197
|
+
'mp3',
|
|
198
|
+
'mp4',
|
|
199
|
+
'webm',
|
|
200
|
+
'ogg',
|
|
201
|
+
'wav',
|
|
202
|
+
'avi',
|
|
203
|
+
'pdf',
|
|
204
|
+
'zip',
|
|
205
|
+
'tar',
|
|
206
|
+
'gz',
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
return known.includes(ext.toLowerCase());
|
|
210
|
+
}
|
package/src/robots.js
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
// @flow
|
|
2
|
+
import robotsParser from 'robots-parser';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* robots.txt handler
|
|
6
|
+
*/
|
|
7
|
+
export class RobotsHandler {
|
|
8
|
+
constructor(options = {}) {
|
|
9
|
+
this.enabled = !options.ignoreRobots;
|
|
10
|
+
this.userAgent = options.userAgent || 'Smippo/0.0.1';
|
|
11
|
+
this.cache = new Map();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Check if a URL is allowed by robots.txt
|
|
16
|
+
*/
|
|
17
|
+
async isAllowed(url, fetchFn) {
|
|
18
|
+
if (!this.enabled) return true;
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const robots = await this.getRobots(url, fetchFn);
|
|
22
|
+
if (!robots) return true;
|
|
23
|
+
|
|
24
|
+
return robots.isAllowed(url, this.userAgent);
|
|
25
|
+
} catch {
|
|
26
|
+
// If we can't fetch/parse robots.txt, allow access
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Get robots.txt parser for a URL
|
|
33
|
+
*/
|
|
34
|
+
async getRobots(url, fetchFn) {
|
|
35
|
+
try {
|
|
36
|
+
const parsed = new URL(url);
|
|
37
|
+
const robotsUrl = `${parsed.origin}/robots.txt`;
|
|
38
|
+
|
|
39
|
+
// Check cache
|
|
40
|
+
if (this.cache.has(robotsUrl)) {
|
|
41
|
+
return this.cache.get(robotsUrl);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Fetch robots.txt
|
|
45
|
+
const robotsContent = await fetchFn(robotsUrl);
|
|
46
|
+
|
|
47
|
+
if (!robotsContent) {
|
|
48
|
+
this.cache.set(robotsUrl, null);
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Parse robots.txt
|
|
53
|
+
const robots = robotsParser(robotsUrl, robotsContent);
|
|
54
|
+
this.cache.set(robotsUrl, robots);
|
|
55
|
+
|
|
56
|
+
return robots;
|
|
57
|
+
} catch {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get crawl delay for a domain
|
|
64
|
+
*/
|
|
65
|
+
getCrawlDelay(url) {
|
|
66
|
+
if (!this.enabled) return 0;
|
|
67
|
+
|
|
68
|
+
try {
|
|
69
|
+
const parsed = new URL(url);
|
|
70
|
+
const robotsUrl = `${parsed.origin}/robots.txt`;
|
|
71
|
+
const robots = this.cache.get(robotsUrl);
|
|
72
|
+
|
|
73
|
+
if (!robots) return 0;
|
|
74
|
+
|
|
75
|
+
return robots.getCrawlDelay(this.userAgent) || 0;
|
|
76
|
+
} catch {
|
|
77
|
+
return 0;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get sitemap URLs from robots.txt
|
|
83
|
+
*/
|
|
84
|
+
getSitemaps(url) {
|
|
85
|
+
try {
|
|
86
|
+
const parsed = new URL(url);
|
|
87
|
+
const robotsUrl = `${parsed.origin}/robots.txt`;
|
|
88
|
+
const robots = this.cache.get(robotsUrl);
|
|
89
|
+
|
|
90
|
+
if (!robots) return [];
|
|
91
|
+
|
|
92
|
+
return robots.getSitemaps() || [];
|
|
93
|
+
} catch {
|
|
94
|
+
return [];
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Clear the cache
|
|
100
|
+
*/
|
|
101
|
+
clearCache() {
|
|
102
|
+
this.cache.clear();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
// @flow
|
|
2
|
+
import {chromium} from 'playwright';
|
|
3
|
+
import fs from 'fs-extra';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Capture a screenshot of a URL
|
|
9
|
+
* Based on Playwright Screenshots API: https://playwright.dev/docs/screenshots
|
|
10
|
+
*/
|
|
11
|
+
export async function captureScreenshot(url, options = {}) {
|
|
12
|
+
const {
|
|
13
|
+
output,
|
|
14
|
+
fullPage = false,
|
|
15
|
+
format = 'png',
|
|
16
|
+
quality,
|
|
17
|
+
viewport = {width: 1920, height: 1080},
|
|
18
|
+
device,
|
|
19
|
+
selector,
|
|
20
|
+
wait = 'networkidle',
|
|
21
|
+
waitTime = 0,
|
|
22
|
+
timeout = 30000,
|
|
23
|
+
userAgent,
|
|
24
|
+
darkMode = false,
|
|
25
|
+
scale = 'device',
|
|
26
|
+
omitBackground = false,
|
|
27
|
+
verbose = false,
|
|
28
|
+
quiet = false,
|
|
29
|
+
} = options;
|
|
30
|
+
|
|
31
|
+
// Normalize URL
|
|
32
|
+
if (!url.startsWith('http://') && !url.startsWith('https://')) {
|
|
33
|
+
url = 'https://' + url;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Determine output path
|
|
37
|
+
let outputPath = output;
|
|
38
|
+
if (!outputPath) {
|
|
39
|
+
const urlObj = new URL(url);
|
|
40
|
+
const timestamp = new Date()
|
|
41
|
+
.toISOString()
|
|
42
|
+
.replace(/[:.]/g, '-')
|
|
43
|
+
.slice(0, 19);
|
|
44
|
+
const ext = format === 'jpeg' ? 'jpg' : 'png';
|
|
45
|
+
outputPath = `${urlObj.hostname}-${timestamp}.${ext}`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Ensure output directory exists
|
|
49
|
+
const outputDir = path.dirname(outputPath);
|
|
50
|
+
if (outputDir && outputDir !== '.') {
|
|
51
|
+
await fs.ensureDir(outputDir);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!quiet) {
|
|
55
|
+
console.log('');
|
|
56
|
+
console.log(chalk.cyan(' 📸 Smippo Screenshot'));
|
|
57
|
+
console.log(chalk.dim(` URL: ${url}`));
|
|
58
|
+
console.log('');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const browser = await chromium.launch();
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
// Set up context options
|
|
65
|
+
const contextOptions = {
|
|
66
|
+
viewport,
|
|
67
|
+
userAgent: userAgent || 'Smippo/0.0.1 Screenshot',
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Device emulation
|
|
71
|
+
if (device) {
|
|
72
|
+
const {devices} = await import('playwright');
|
|
73
|
+
if (devices[device]) {
|
|
74
|
+
Object.assign(contextOptions, devices[device]);
|
|
75
|
+
if (!quiet) console.log(chalk.dim(` Device: ${device}`));
|
|
76
|
+
} else {
|
|
77
|
+
console.warn(
|
|
78
|
+
chalk.yellow(
|
|
79
|
+
` Warning: Unknown device "${device}", using default viewport`,
|
|
80
|
+
),
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Dark mode
|
|
86
|
+
if (darkMode) {
|
|
87
|
+
contextOptions.colorScheme = 'dark';
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const context = await browser.newContext(contextOptions);
|
|
91
|
+
const page = await context.newPage();
|
|
92
|
+
|
|
93
|
+
// Navigate
|
|
94
|
+
if (verbose) console.log(chalk.dim(` Navigating to ${url}...`));
|
|
95
|
+
|
|
96
|
+
await page.goto(url, {
|
|
97
|
+
waitUntil: wait,
|
|
98
|
+
timeout,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Additional wait time
|
|
102
|
+
if (waitTime > 0) {
|
|
103
|
+
if (verbose) console.log(chalk.dim(` Waiting ${waitTime}ms...`));
|
|
104
|
+
await page.waitForTimeout(waitTime);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Screenshot options
|
|
108
|
+
const screenshotOptions = {
|
|
109
|
+
path: outputPath,
|
|
110
|
+
type: format,
|
|
111
|
+
fullPage,
|
|
112
|
+
scale,
|
|
113
|
+
omitBackground,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// JPEG quality (only for jpeg format)
|
|
117
|
+
if (format === 'jpeg' && quality) {
|
|
118
|
+
screenshotOptions.quality = quality;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Take screenshot
|
|
122
|
+
if (selector) {
|
|
123
|
+
// Element screenshot
|
|
124
|
+
if (verbose) console.log(chalk.dim(` Capturing element: ${selector}`));
|
|
125
|
+
const element = page.locator(selector);
|
|
126
|
+
await element.screenshot(screenshotOptions);
|
|
127
|
+
} else {
|
|
128
|
+
// Page screenshot
|
|
129
|
+
if (verbose) {
|
|
130
|
+
console.log(
|
|
131
|
+
chalk.dim(` Capturing ${fullPage ? 'full page' : 'viewport'}...`),
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
await page.screenshot(screenshotOptions);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Get file size
|
|
138
|
+
const stats = await fs.stat(outputPath);
|
|
139
|
+
const fileSize = formatFileSize(stats.size);
|
|
140
|
+
|
|
141
|
+
if (!quiet) {
|
|
142
|
+
console.log(chalk.green(` ✓ Screenshot saved`));
|
|
143
|
+
console.log(chalk.dim(` File: ${outputPath}`));
|
|
144
|
+
console.log(chalk.dim(` Size: ${fileSize}`));
|
|
145
|
+
if (fullPage) {
|
|
146
|
+
const dimensions = await page.evaluate(() => ({
|
|
147
|
+
// eslint-disable-next-line no-undef
|
|
148
|
+
width: document.documentElement.scrollWidth,
|
|
149
|
+
// eslint-disable-next-line no-undef
|
|
150
|
+
height: document.documentElement.scrollHeight,
|
|
151
|
+
}));
|
|
152
|
+
console.log(
|
|
153
|
+
chalk.dim(` Dimensions: ${dimensions.width}x${dimensions.height}px`),
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
console.log('');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
path: outputPath,
|
|
161
|
+
size: stats.size,
|
|
162
|
+
url,
|
|
163
|
+
};
|
|
164
|
+
} finally {
|
|
165
|
+
await browser.close();
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Format file size for display
|
|
171
|
+
*/
|
|
172
|
+
function formatFileSize(bytes) {
|
|
173
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
174
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
175
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Parse viewport string (e.g., "1920x1080")
|
|
180
|
+
*/
|
|
181
|
+
export function parseViewport(viewportStr) {
|
|
182
|
+
if (!viewportStr) return {width: 1920, height: 1080};
|
|
183
|
+
const [width, height] = viewportStr.split('x').map(Number);
|
|
184
|
+
return {width: width || 1920, height: height || 1080};
|
|
185
|
+
}
|