smippo 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Smippo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,116 @@
1
+ <p align="center">
2
+ <img src="assets/logo.svg" alt="Smippo Logo" width="120" height="100">
3
+ </p>
4
+
5
+ <h1 align="center">SMIPPO</h1>
6
+
7
+ <p align="center">
8
+ <strong>S.M.I.P.P.O.</strong> = Structured Mirroring of Internet Pages and Public Objects
9
+ </p>
10
+
11
+ <p align="center">
12
+ Modern website copier — consumes everything fast. Hippos don't nibble. They vacuum.
13
+ </p>
14
+
15
+ <p align="center">
16
+ <a href="https://smippo.com"><img src="https://img.shields.io/badge/docs-smippo.com-blue" alt="Documentation"></a>
17
+ <a href="https://www.npmjs.com/package/smippo"><img src="https://img.shields.io/npm/v/smippo?color=cb0000&label=npm" alt="npm version"></a>
18
+ <a href="https://www.npmjs.com/package/smippo"><img src="https://img.shields.io/npm/dm/smippo?color=cb0000" alt="npm downloads"></a>
19
+ <a href="./LICENSE"><img src="https://img.shields.io/npm/l/smippo?color=blue" alt="license"></a>
20
+ <a href="https://nodejs.org"><img src="https://img.shields.io/node/v/smippo?color=339933" alt="node"></a>
21
+ <a href="https://github.com/pouyanafisi/smippo/pulls"><img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg" alt="PRs Welcome"></a>
22
+ </p>
23
+
24
+ ---
25
+
26
+ **S.M.I.P.P.O.** (Structured Mirroring of Internet Pages and Public Objects) is a command-line website copier and scraper that captures websites exactly as they appear in your browser. Create complete offline mirrors with all assets, styles, and dynamic content preserved. Perfect for website duplication, archiving, and offline browsing.
27
+
28
+ 📚 **[View complete documentation →](https://smippo.com)**
29
+
30
+ ## Quick Start
31
+
32
+ Install globally:
33
+
34
+ ```bash
35
+ npm install -g smippo
36
+ ```
37
+
38
+ Capture a single page:
39
+
40
+ ```bash
41
+ smippo https://example.com
42
+ ```
43
+
44
+ Mirror a site (3 levels deep):
45
+
46
+ ```bash
47
+ smippo https://example.com --depth 3
48
+ ```
49
+
50
+ Or use without installing:
51
+
52
+ ```bash
53
+ npx smippo https://example.com
54
+ ```
55
+
56
+ ## Commands
57
+
58
+ Smippo provides several commands for different use cases:
59
+
60
+ - **`smippo <url>`** — Capture and mirror websites with full fidelity
61
+ - **`smippo capture <url>`** — Take screenshots of web pages
62
+ - **`smippo serve <directory>`** — Serve captured sites locally
63
+ - **`smippo continue`** — Resume an interrupted capture
64
+ - **`smippo update`** — Update an existing mirror
65
+
66
+ Run `smippo` with no arguments to start the interactive guided mode.
67
+
68
+ ## Features
69
+
70
+ - **🚀 Vacuum Architecture** — Parallel workers consume sites rapidly
71
+ - **📸 Complete Fidelity** — Captures pages exactly as rendered, including CSS-in-JS, dynamic content, and lazy-loaded images
72
+ - **🎯 Smart Filtering** — Filter by URL patterns, MIME types, and file sizes. Respects robots.txt
73
+ - **🌐 Built-in Server** — Serve captured sites locally with directory browsing
74
+ - **📊 HAR Files** — Generates HTTP Archive files for debugging and replay
75
+ - **💻 Programmatic API** — Use Smippo in your Node.js applications
76
+
77
+ ## Documentation
78
+
79
+ For complete documentation, guides, and API reference, visit **[smippo.com](https://smippo.com)**:
80
+
81
+ - **[Installation Guide](https://smippo.com/getting-started/installation)** — Detailed installation instructions
82
+ - **[Commands Reference](https://smippo.com/commands)** — All available commands and options
83
+ - **[Configuration](https://smippo.com/configuration)** — Filtering, scope control, performance tuning
84
+ - **[Guides](https://smippo.com/guides)** — Output structure, link rewriting, troubleshooting
85
+ - **[Programmatic API](https://smippo.com/api/programmatic)** — Use Smippo in your Node.js code
86
+ - **[Examples](https://smippo.com/getting-started/examples)** — Real-world use cases
87
+
88
+ ## Requirements
89
+
90
+ - Node.js 18 or later
91
+ - Chromium (automatically downloaded on first install)
92
+
93
+ ## Contributing
94
+
95
+ Contributions are welcome! Whether it's bug reports, feature requests, or pull requests — all contributions help make Smippo better.
96
+
97
+ Please read our [Contributing Guide](CONTRIBUTING.md) for details on development setup, code style guidelines, and the pull request process.
98
+
99
+ Quick start:
100
+
101
+ ```bash
102
+ git clone https://github.com/pouyanafisi/smippo.git
103
+ cd smippo
104
+ npm install
105
+ npm test
106
+ ```
107
+
108
+ ## License
109
+
110
+ [MIT](./LICENSE) — feel free to use this in your own projects.
111
+
112
+ ## Acknowledgments
113
+
114
+ - Built with [Playwright](https://playwright.dev/) for reliable browser automation
115
+ - CLI powered by [Commander.js](https://github.com/tj/commander.js) and [@clack/prompts](https://github.com/natemoo-re/clack)
116
+ - Inspired by classic website copiers like [HTTrack](https://www.httrack.com/)
package/bin/smippo.js ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env node
2
+
3
+ import {run} from '../src/cli.js';
4
+
5
+ run();
package/package.json ADDED
@@ -0,0 +1,100 @@
1
+ {
2
+ "name": "smippo",
3
+ "version": "0.0.1",
4
+ "description": "S.M.I.P.P.O. — Structured Mirroring of Internet Pages and Public Objects. Modern website copier that captures sites exactly as they appear in your browser.",
5
+ "main": "src/index.js",
6
+ "bin": {
7
+ "smippo": "bin/smippo.js"
8
+ },
9
+ "type": "module",
10
+ "scripts": {
11
+ "start": "node bin/smippo.js",
12
+ "dev": "node bin/smippo.js",
13
+ "test": "mocha",
14
+ "test:unit": "mocha 'test/**/*.test.js'",
15
+ "test:integration": "mocha 'test/integration/**/*.test.js'",
16
+ "test:watch": "mocha --watch",
17
+ "lint": "eslint .",
18
+ "lint:fix": "eslint . --fix",
19
+ "format": "prettier --write .",
20
+ "format:check": "prettier --check .",
21
+ "typecheck": "tsc --noEmit",
22
+ "prepare": "husky install",
23
+ "postinstall": "npx playwright install chromium",
24
+ "prepublishOnly": "npm run lint && npm run format:check && npm test",
25
+ "verdaccio": "verdaccio --config verdaccio.yml",
26
+ "verdaccio:publish": "node verdaccioPublish.js",
27
+ "publish:github": "npm publish --registry=https://npm.pkg.github.com"
28
+ },
29
+ "keywords": [
30
+ "website",
31
+ "copier",
32
+ "mirror",
33
+ "offline",
34
+ "browser",
35
+ "playwright",
36
+ "crawler",
37
+ "scraper",
38
+ "archiver",
39
+ "smippo",
40
+ "website-mirror",
41
+ "site-copier",
42
+ "web-scraper",
43
+ "offline-browsing"
44
+ ],
45
+ "author": "Pouyan Afisi",
46
+ "license": "MIT",
47
+ "repository": {
48
+ "type": "git",
49
+ "url": "git+https://github.com/pouyanafisi/smippo.git"
50
+ },
51
+ "bugs": {
52
+ "url": "https://github.com/pouyanafisi/smippo/issues"
53
+ },
54
+ "homepage": "https://smippo.dev",
55
+ "publishConfig": {
56
+ "registry": "https://registry.npmjs.org"
57
+ },
58
+ "engines": {
59
+ "node": ">=18.0.0"
60
+ },
61
+ "files": [
62
+ "bin/",
63
+ "src/",
64
+ "LICENSE",
65
+ "README.md"
66
+ ],
67
+ "lint-staged": {
68
+ "*.js": [
69
+ "eslint --fix",
70
+ "prettier --write"
71
+ ],
72
+ "*.{json,md}": [
73
+ "prettier --write"
74
+ ]
75
+ },
76
+ "dependencies": {
77
+ "@clack/prompts": "^0.11.0",
78
+ "chalk": "^5.3.0",
79
+ "cheerio": "^1.0.0-rc.12",
80
+ "cli-progress": "^3.12.0",
81
+ "commander": "^12.0.0",
82
+ "figlet": "^1.9.4",
83
+ "fs-extra": "^11.2.0",
84
+ "glob": "^10.3.10",
85
+ "gradient-string": "^3.0.0",
86
+ "mime-types": "^2.1.35",
87
+ "minimatch": "^10.1.1",
88
+ "ora": "^8.0.1",
89
+ "p-queue": "^8.0.1",
90
+ "playwright": "^1.41.0",
91
+ "robots-parser": "^3.0.1"
92
+ },
93
+ "devDependencies": {
94
+ "eslint": "^8.57.0",
95
+ "husky": "^9.0.0",
96
+ "lint-staged": "^15.2.0",
97
+ "mocha": "^10.4.0",
98
+ "prettier": "^3.2.5"
99
+ }
100
+ }
package/src/cli.js ADDED
@@ -0,0 +1,437 @@
1
+ // @flow
2
+ import {Command} from 'commander';
3
+ import chalk from 'chalk';
4
+ import ora from 'ora';
5
+ import {Crawler} from './crawler.js';
6
+ import {readManifest, manifestExists} from './manifest.js';
7
+ import {version} from './utils/version.js';
8
+ import {
9
+ showHelp,
10
+ runInteractiveCapture,
11
+ shouldRunInteractive,
12
+ } from './interactive.js';
13
+
14
+ const program = new Command();
15
+
16
+ export function run() {
17
+ // Check for help command first
18
+ const args = process.argv.slice(2);
19
+ if (args.includes('help') || args.includes('--help') || args.includes('-h')) {
20
+ showHelp();
21
+ return;
22
+ }
23
+
24
+ // Check if we should run interactive mode
25
+ if (shouldRunInteractive(args)) {
26
+ runInteractiveCapture()
27
+ .then(options => {
28
+ return capture(options.url, {
29
+ output: options.output,
30
+ depth: options.depth,
31
+ scope: options.scope,
32
+ externalAssets: options.externalAssets,
33
+ static: options.static,
34
+ screenshot: options.screenshot,
35
+ workers: options.workers,
36
+ });
37
+ })
38
+ .catch(error => {
39
+ console.error(chalk.red(`\n✗ Error: ${error.message}`));
40
+ process.exit(1);
41
+ });
42
+ return;
43
+ }
44
+
45
+ program
46
+ .name('smippo')
47
+ .description(
48
+ 'Modern website copier powered by Playwright - capture JS-rendered pages for offline viewing',
49
+ )
50
+ .version(version);
51
+
52
+ // Main capture command
53
+ program
54
+ .argument('[url]', 'URL to capture')
55
+ .option('-o, --output <dir>', 'Output directory', './site')
56
+ .option('-d, --depth <n>', 'Recursion depth (0 = single page)', '0')
57
+ .option('--no-crawl', 'Disable link following (same as -d 0)')
58
+ .option('--dry-run', 'Show what would be captured without downloading')
59
+
60
+ // Scope options
61
+ .option(
62
+ '-s, --scope <type>',
63
+ 'Link scope: subdomain|domain|tld|all',
64
+ 'domain',
65
+ )
66
+ .option('--stay-in-dir', 'Only follow links in same directory or subdirs')
67
+ .option('--external-assets', 'Capture assets from external domains')
68
+
69
+ // Filter options
70
+ .option('-I, --include <glob...>', 'Include URLs matching pattern')
71
+ .option('-E, --exclude <glob...>', 'Exclude URLs matching pattern')
72
+ .option('--mime-include <type...>', 'Include MIME types')
73
+ .option('--mime-exclude <type...>', 'Exclude MIME types')
74
+ .option('--max-size <size>', 'Maximum file size (e.g., 10MB)')
75
+ .option('--min-size <size>', 'Minimum file size (e.g., 1KB)')
76
+
77
+ // Browser options
78
+ .option(
79
+ '--wait <strategy>',
80
+ 'Wait strategy: networkidle|load|domcontentloaded',
81
+ 'networkidle',
82
+ )
83
+ .option('--wait-time <ms>', 'Additional wait time after network idle', '0')
84
+ .option('--timeout <ms>', 'Page load timeout', '30000')
85
+ .option('--user-agent <string>', 'Custom user agent')
86
+ .option('--viewport <WxH>', 'Viewport size', '1920x1080')
87
+ .option('--device <name>', 'Emulate device (e.g., "iPhone 13")')
88
+
89
+ // Network options
90
+ .option('--proxy <url>', 'Proxy server URL')
91
+ .option('--cookies <file>', 'Load cookies from JSON file')
92
+ .option('--headers <json>', 'Custom headers as JSON')
93
+ .option('--capture-auth', 'Interactive authentication capture')
94
+
95
+ // Output options
96
+ .option(
97
+ '--structure <type>',
98
+ 'Output structure: original|flat|domain',
99
+ 'original',
100
+ )
101
+ .option('--har', 'Generate HAR file', true)
102
+ .option('--no-har', 'Disable HAR file generation')
103
+ .option('--screenshot', 'Take screenshot of each page')
104
+ .option('--pdf', 'Save PDF of each page')
105
+ .option('--static', 'Remove scripts for static offline viewing')
106
+ .option('--inline-css', 'Inline CSS into HTML for single-file output')
107
+
108
+ // Performance options
109
+ .option('-w, --workers <n>', 'Parallel workers/pages (default: 8)', '8')
110
+ .option('-c, --concurrency <n>', 'Alias for --workers', '8')
111
+ .option('--max-pages <n>', 'Maximum pages to capture')
112
+ .option('--max-time <seconds>', 'Maximum total time')
113
+ .option('--rate-limit <ms>', 'Delay between requests')
114
+
115
+ // Robots options
116
+ .option('--ignore-robots', 'Ignore robots.txt')
117
+ .option('--respect-robots', 'Respect robots.txt', true)
118
+
119
+ // Cache options
120
+ .option('--no-cache', "Don't use cache")
121
+
122
+ // Logging options
123
+ .option('-v, --verbose', 'Verbose output')
124
+ .option('-q, --quiet', 'Minimal output')
125
+ .option('--log-file <path>', 'Write logs to file')
126
+ .option('--debug', 'Debug mode with visible browser')
127
+
128
+ // Interaction options
129
+ .option('--no-interaction', 'Non-interactive mode (for CI/scripts)')
130
+ .option('-y, --yes', 'Skip prompts, use defaults')
131
+
132
+ .action(async (url, options) => {
133
+ if (!url) {
134
+ showHelp();
135
+ return;
136
+ }
137
+
138
+ try {
139
+ await capture(url, options);
140
+ } catch (error) {
141
+ console.error(chalk.red(`\n✗ Error: ${error.message}`));
142
+ if (options.verbose || options.debug) {
143
+ console.error(error.stack);
144
+ }
145
+ process.exit(1);
146
+ }
147
+ });
148
+
149
+ // Continue command
150
+ program
151
+ .command('continue')
152
+ .description('Resume an interrupted capture')
153
+ .option('-o, --output <dir>', 'Output directory', './site')
154
+ .option('-v, --verbose', 'Verbose output')
155
+ .action(async options => {
156
+ try {
157
+ await continueCapture(options);
158
+ } catch (error) {
159
+ console.error(chalk.red(`\n✗ Error: ${error.message}`));
160
+ process.exit(1);
161
+ }
162
+ });
163
+
164
+ // Update command
165
+ program
166
+ .command('update')
167
+ .description('Update an existing mirror')
168
+ .option('-o, --output <dir>', 'Output directory', './site')
169
+ .option('-v, --verbose', 'Verbose output')
170
+ .action(async options => {
171
+ try {
172
+ await updateCapture(options);
173
+ } catch (error) {
174
+ console.error(chalk.red(`\n✗ Error: ${error.message}`));
175
+ process.exit(1);
176
+ }
177
+ });
178
+
179
+ // Serve command
180
+ program
181
+ .command('serve [directory]')
182
+ .description('Serve a captured site locally')
183
+ .option(
184
+ '-p, --port <port>',
185
+ 'Port to serve on (auto-finds available)',
186
+ '8080',
187
+ )
188
+ .option('-H, --host <host>', 'Host to bind to', '127.0.0.1')
189
+ .option('-o, --open', 'Open browser automatically')
190
+ .option('--no-cors', 'Disable CORS headers')
191
+ .option('-v, --verbose', 'Show all requests')
192
+ .option('-q, --quiet', 'Minimal output')
193
+ .action(async (directory, options) => {
194
+ const {serve} = await import('./server.js');
195
+ await serve({
196
+ directory: directory || './site',
197
+ port: options.port,
198
+ host: options.host,
199
+ open: options.open,
200
+ cors: options.cors,
201
+ verbose: options.verbose,
202
+ quiet: options.quiet,
203
+ });
204
+ });
205
+
206
+ // Screenshot capture command
207
+ program
208
+ .command('capture <url>')
209
+ .description('Take a screenshot of a URL')
210
+ .option(
211
+ '-O, --out <file>',
212
+ 'Output file path (auto-generated if not specified)',
213
+ )
214
+ .option('-f, --full-page', 'Capture full scrollable page')
215
+ .option('--format <type>', 'Image format: png|jpeg', 'png')
216
+ .option('--quality <n>', 'JPEG quality (1-100)', '80')
217
+ .option('--viewport <WxH>', 'Viewport size', '1920x1080')
218
+ .option('--device <name>', 'Emulate device (e.g., "iPhone 13", "iPad Pro")')
219
+ .option('--selector <css>', 'Capture specific element by CSS selector')
220
+ .option(
221
+ '--wait <strategy>',
222
+ 'Wait strategy: networkidle|load|domcontentloaded',
223
+ 'networkidle',
224
+ )
225
+ .option('--wait-time <ms>', 'Additional wait time after load', '0')
226
+ .option('--timeout <ms>', 'Page load timeout', '30000')
227
+ .option('--dark-mode', 'Use dark color scheme')
228
+ .option('--no-background', 'Transparent background (PNG only)')
229
+ .option('-v, --verbose', 'Verbose output')
230
+ .option('-q, --quiet', 'Minimal output')
231
+ .action(async (url, options) => {
232
+ try {
233
+ const {captureScreenshot, parseViewport} =
234
+ await import('./screenshot.js');
235
+ await captureScreenshot(url, {
236
+ output: options.out,
237
+ fullPage: options.fullPage,
238
+ format: options.format,
239
+ quality: options.quality ? parseInt(options.quality, 10) : undefined,
240
+ viewport: parseViewport(options.viewport),
241
+ device: options.device,
242
+ selector: options.selector,
243
+ wait: options.wait,
244
+ waitTime: parseInt(options.waitTime, 10),
245
+ timeout: parseInt(options.timeout, 10),
246
+ darkMode: options.darkMode,
247
+ omitBackground: !options.background,
248
+ verbose: options.verbose,
249
+ quiet: options.quiet,
250
+ });
251
+ } catch (error) {
252
+ console.error(chalk.red(`\n✗ Error: ${error.message}`));
253
+ if (options.verbose) {
254
+ console.error(error.stack);
255
+ }
256
+ process.exit(1);
257
+ }
258
+ });
259
+
260
+ // Help command
261
+ program
262
+ .command('help')
263
+ .description('Show detailed help')
264
+ .action(() => {
265
+ showHelp();
266
+ });
267
+
268
+ program.parse();
269
+ }
270
+
271
+ async function capture(url, options) {
272
+ const spinner = ora({
273
+ text: 'Initializing browser...',
274
+ isSilent: options.quiet,
275
+ }).start();
276
+
277
+ const crawler = new Crawler({
278
+ url,
279
+ output: options.output,
280
+ depth: parseInt(options.depth, 10),
281
+ scope: options.scope,
282
+ stayInDir: options.stayInDir,
283
+ externalAssets: options.externalAssets,
284
+ include: options.include || [],
285
+ exclude: options.exclude || [],
286
+ mimeInclude: options.mimeInclude || [],
287
+ mimeExclude: options.mimeExclude || [],
288
+ maxSize: parseSize(options.maxSize),
289
+ minSize: parseSize(options.minSize),
290
+ wait: options.wait,
291
+ waitTime: parseInt(options.waitTime, 10),
292
+ timeout: parseInt(options.timeout, 10),
293
+ userAgent: options.userAgent,
294
+ viewport: parseViewport(options.viewport),
295
+ device: options.device,
296
+ proxy: options.proxy,
297
+ cookies: options.cookies,
298
+ headers: options.headers ? JSON.parse(options.headers) : {},
299
+ captureAuth: options.captureAuth,
300
+ structure: options.structure,
301
+ har: options.har,
302
+ screenshot: options.screenshot,
303
+ pdf: options.pdf,
304
+ noJs: options.static,
305
+ inlineCss: options.inlineCss,
306
+ concurrency: parseInt(options.workers || options.concurrency, 10),
307
+ maxPages: options.maxPages ? parseInt(options.maxPages, 10) : undefined,
308
+ maxTime: options.maxTime ? parseInt(options.maxTime, 10) * 1000 : undefined,
309
+ rateLimit: options.rateLimit ? parseInt(options.rateLimit, 10) : 0,
310
+ ignoreRobots: options.ignoreRobots,
311
+ useCache: options.cache,
312
+ verbose: options.verbose,
313
+ quiet: options.quiet,
314
+ logFile: options.logFile,
315
+ debug: options.debug,
316
+ dryRun: options.dryRun,
317
+ spinner,
318
+ });
319
+
320
+ crawler.on('page:start', ({url}) => {
321
+ spinner.text = `Capturing: ${truncateUrl(url, 60)}`;
322
+ });
323
+
324
+ crawler.on('page:complete', ({url, size}) => {
325
+ if (options.verbose) {
326
+ spinner.succeed(
327
+ `Captured: ${truncateUrl(url, 50)} (${formatSize(size)})`,
328
+ );
329
+ spinner.start();
330
+ }
331
+ });
332
+
333
+ crawler.on('asset:save', ({url, size}) => {
334
+ if (options.verbose) {
335
+ spinner.text = `Asset: ${truncateUrl(url, 60)} (${formatSize(size)})`;
336
+ }
337
+ });
338
+
339
+ crawler.on('error', ({url, error}) => {
340
+ if (!options.quiet) {
341
+ spinner.warn(`Failed: ${truncateUrl(url, 50)} - ${error.message}`);
342
+ spinner.start();
343
+ }
344
+ });
345
+
346
+ const result = await crawler.start();
347
+
348
+ spinner.succeed(chalk.green(`Capture complete!`));
349
+ console.log('');
350
+ console.log(chalk.cyan(' Summary:'));
351
+ console.log(` Pages captured: ${result.stats.pagesCapt}`);
352
+ console.log(` Assets saved: ${result.stats.assetsCapt}`);
353
+ console.log(` Total size: ${formatSize(result.stats.totalSize)}`);
354
+ console.log(` Duration: ${formatDuration(result.stats.duration)}`);
355
+ if (result.stats.errors > 0) {
356
+ console.log(chalk.yellow(` Errors: ${result.stats.errors}`));
357
+ }
358
+ console.log('');
359
+ console.log(` Output: ${chalk.underline(options.output)}`);
360
+ }
361
+
362
+ async function continueCapture(options) {
363
+ if (!manifestExists(options.output)) {
364
+ throw new Error(
365
+ 'No capture found in the specified directory. Start a new capture first.',
366
+ );
367
+ }
368
+
369
+ const manifest = await readManifest(options.output);
370
+ console.log(chalk.cyan(`Continuing capture of ${manifest.rootUrl}...`));
371
+
372
+ await capture(manifest.rootUrl, {
373
+ ...manifest.options,
374
+ ...options,
375
+ useCache: true,
376
+ });
377
+ }
378
+
379
+ async function updateCapture(options) {
380
+ if (!manifestExists(options.output)) {
381
+ throw new Error(
382
+ 'No capture found in the specified directory. Start a new capture first.',
383
+ );
384
+ }
385
+
386
+ const manifest = await readManifest(options.output);
387
+ console.log(chalk.cyan(`Updating mirror of ${manifest.rootUrl}...`));
388
+
389
+ await capture(manifest.rootUrl, {
390
+ ...manifest.options,
391
+ ...options,
392
+ useCache: true,
393
+ update: true,
394
+ });
395
+ }
396
+
397
+ function parseSize(sizeStr) {
398
+ if (!sizeStr) return undefined;
399
+ const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*(KB|MB|GB|B)?$/i);
400
+ if (!match) return undefined;
401
+
402
+ const num = parseFloat(match[1]);
403
+ const unit = (match[2] || 'B').toUpperCase();
404
+
405
+ const multipliers = {B: 1, KB: 1024, MB: 1024 * 1024, GB: 1024 * 1024 * 1024};
406
+ return num * (multipliers[unit] || 1);
407
+ }
408
+
409
+ function parseViewport(viewportStr) {
410
+ if (!viewportStr) return {width: 1920, height: 1080};
411
+ const [width, height] = viewportStr.split('x').map(Number);
412
+ return {width: width || 1920, height: height || 1080};
413
+ }
414
+
415
+ function truncateUrl(url, maxLen) {
416
+ if (url.length <= maxLen) return url;
417
+ return url.slice(0, maxLen - 3) + '...';
418
+ }
419
+
420
+ function formatSize(bytes) {
421
+ if (!bytes) return '0 B';
422
+ const units = ['B', 'KB', 'MB', 'GB'];
423
+ let i = 0;
424
+ while (bytes >= 1024 && i < units.length - 1) {
425
+ bytes /= 1024;
426
+ i++;
427
+ }
428
+ return `${bytes.toFixed(i > 0 ? 1 : 0)} ${units[i]}`;
429
+ }
430
+
431
+ function formatDuration(ms) {
432
+ if (ms < 1000) return `${ms}ms`;
433
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
434
+ const mins = Math.floor(ms / 60000);
435
+ const secs = Math.round((ms % 60000) / 1000);
436
+ return `${mins}m ${secs}s`;
437
+ }