smippo 0.0.9 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -354
- package/package.json +1 -1
- package/src/cli.js +88 -6
- package/src/crawler.js +6 -0
- package/src/page-capture.js +317 -3
- package/src/server.js +126 -38
- package/src/utils/home.js +175 -0
package/README.md
CHANGED
|
@@ -27,45 +27,6 @@
|
|
|
27
27
|
|
|
28
28
|
📚 **[View complete documentation →](https://smippo.com)**
|
|
29
29
|
|
|
30
|
-
## Table of Contents
|
|
31
|
-
|
|
32
|
-
- [Table of Contents](#table-of-contents)
|
|
33
|
-
- [Features](#features)
|
|
34
|
-
- [Quick Start](#quick-start)
|
|
35
|
-
- [Installation](#installation)
|
|
36
|
-
- [Requirements](#requirements)
|
|
37
|
-
- [npm (Global)](#npm-global)
|
|
38
|
-
- [Homebrew (Coming soon)](#homebrew-coming-soon)
|
|
39
|
-
- [Usage](#usage)
|
|
40
|
-
- [Basic Usage](#basic-usage)
|
|
41
|
-
- [Interactive Mode](#interactive-mode)
|
|
42
|
-
- [Filtering](#filtering)
|
|
43
|
-
- [Scope Control](#scope-control)
|
|
44
|
-
- [Browser Options](#browser-options)
|
|
45
|
-
- [Screenshots](#screenshots)
|
|
46
|
-
- [Authentication](#authentication)
|
|
47
|
-
- [Output Options](#output-options)
|
|
48
|
-
- [Performance \& Parallelism: The Vacuum Architecture](#performance--parallelism-the-vacuum-architecture)
|
|
49
|
-
- [Continue/Update](#continueupdate)
|
|
50
|
-
- [Serve](#serve)
|
|
51
|
-
- [Static Mode](#static-mode)
|
|
52
|
-
- [Structured Output](#structured-output)
|
|
53
|
-
- [Programmatic API](#programmatic-api)
|
|
54
|
-
- [Contributing](#contributing)
|
|
55
|
-
- [License](#license)
|
|
56
|
-
- [Acknowledgments](#acknowledgments)
|
|
57
|
-
|
|
58
|
-
## Features
|
|
59
|
-
|
|
60
|
-
- **🚀 Vacuum Architecture** — Parallel workers consume sites rapidly, just like hippos vacuum up everything in their path
|
|
61
|
-
- **📸 Structured Mirroring** — Every page, every resource, every network request captured in organized, structured output
|
|
62
|
-
- **🔍 Complete Fidelity** — Gets the page exactly as you see it, including CSS-in-JS, dynamic content, and lazy-loaded images
|
|
63
|
-
- **🎯 Smart Consumption** — Respects robots.txt, filters by URL patterns, MIME types, and file sizes
|
|
64
|
-
- **📦 Structured Output** — Organized mirror structure preserves original paths for seamless offline browsing
|
|
65
|
-
- **🎨 Beautiful CLI** — Interactive guided mode, progress bars, and elegant terminal output
|
|
66
|
-
- **🌐 Built-in Server** — Serve captured sites locally with directory browsing
|
|
67
|
-
- **📊 HAR Files** — Generates HTTP Archive files for debugging and replay
|
|
68
|
-
|
|
69
30
|
## Quick Start
|
|
70
31
|
|
|
71
32
|
Install globally:
|
|
@@ -92,336 +53,48 @@ Or use without installing:
|
|
|
92
53
|
npx smippo https://example.com
|
|
93
54
|
```
|
|
94
55
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
## Installation
|
|
98
|
-
|
|
99
|
-
### Requirements
|
|
100
|
-
|
|
101
|
-
- Node.js 18 or later
|
|
102
|
-
- Chromium (automatically downloaded on first install)
|
|
103
|
-
|
|
104
|
-
### npm (Global)
|
|
105
|
-
|
|
106
|
-
```bash
|
|
107
|
-
npm install -g smippo
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
### Homebrew (Coming soon)
|
|
111
|
-
|
|
112
|
-
```bash
|
|
113
|
-
brew install smippo
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
## Usage
|
|
117
|
-
|
|
118
|
-
### Basic Usage
|
|
119
|
-
|
|
120
|
-
```bash
|
|
121
|
-
# Capture a single page with all assets
|
|
122
|
-
smippo https://example.com
|
|
123
|
-
|
|
124
|
-
# Mirror a site with depth control
|
|
125
|
-
smippo https://example.com --depth 3
|
|
126
|
-
|
|
127
|
-
# Save to custom directory
|
|
128
|
-
smippo https://example.com --output ./my-mirror
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
### Interactive Mode
|
|
132
|
-
|
|
133
|
-
Just run `smippo` with no arguments to start the guided wizard:
|
|
134
|
-
|
|
135
|
-
```bash
|
|
136
|
-
smippo
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
This will walk you through:
|
|
140
|
-
|
|
141
|
-
- URL to capture
|
|
142
|
-
- Crawl depth
|
|
143
|
-
- Scope settings
|
|
144
|
-
- Asset options
|
|
145
|
-
- Advanced configuration
|
|
146
|
-
|
|
147
|
-
Perfect for beginners or when you want to explore options!
|
|
148
|
-
|
|
149
|
-
### Filtering
|
|
150
|
-
|
|
151
|
-
```bash
|
|
152
|
-
# Include only specific patterns
|
|
153
|
-
smippo https://example.com --include "*.html" --include "*.css"
|
|
154
|
-
|
|
155
|
-
# Exclude patterns
|
|
156
|
-
smippo https://example.com --exclude "*tracking*" --exclude "*ads*"
|
|
157
|
-
|
|
158
|
-
# Filter by MIME type
|
|
159
|
-
smippo https://example.com --mime-include "image/*" --mime-exclude "video/*"
|
|
160
|
-
|
|
161
|
-
# Filter by file size
|
|
162
|
-
smippo https://example.com --max-size 5MB --min-size 1KB
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
### Scope Control
|
|
166
|
-
|
|
167
|
-
```bash
|
|
168
|
-
# Stay on same subdomain (default)
|
|
169
|
-
smippo https://www.example.com --scope subdomain
|
|
170
|
-
|
|
171
|
-
# Allow all subdomains
|
|
172
|
-
smippo https://www.example.com --scope domain
|
|
173
|
-
|
|
174
|
-
# Go everywhere (use with caution!)
|
|
175
|
-
smippo https://example.com --scope all --depth 2
|
|
176
|
-
```
|
|
177
|
-
|
|
178
|
-
### Browser Options
|
|
56
|
+
## Commands
|
|
179
57
|
|
|
180
|
-
|
|
181
|
-
# Wait for specific condition
|
|
182
|
-
smippo https://example.com --wait networkidle
|
|
183
|
-
smippo https://example.com --wait domcontentloaded
|
|
184
|
-
|
|
185
|
-
# Add extra wait time for slow sites
|
|
186
|
-
smippo https://example.com --wait-time 5000
|
|
187
|
-
|
|
188
|
-
# Custom user agent
|
|
189
|
-
smippo https://example.com --user-agent "Mozilla/5.0..."
|
|
58
|
+
Smippo provides several commands for different use cases:
|
|
190
59
|
|
|
191
|
-
|
|
192
|
-
smippo
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
smippo
|
|
196
|
-
```
|
|
197
|
-
|
|
198
|
-
### Screenshots
|
|
199
|
-
|
|
200
|
-
Take quick screenshots without mirroring the full site:
|
|
201
|
-
|
|
202
|
-
```bash
|
|
203
|
-
# Basic screenshot
|
|
204
|
-
smippo capture https://example.com
|
|
60
|
+
- **`smippo <url>`** — Capture and mirror websites with full fidelity
|
|
61
|
+
- **`smippo capture <url>`** — Take screenshots of web pages
|
|
62
|
+
- **`smippo serve <directory>`** — Serve captured sites locally
|
|
63
|
+
- **`smippo continue`** — Resume an interrupted capture
|
|
64
|
+
- **`smippo update`** — Update an existing mirror
|
|
205
65
|
|
|
206
|
-
|
|
207
|
-
smippo capture https://example.com --full-page
|
|
66
|
+
Run `smippo` with no arguments to start the interactive guided mode.
|
|
208
67
|
|
|
209
|
-
|
|
210
|
-
smippo capture https://example.com -O ./screenshots/example.png
|
|
211
|
-
|
|
212
|
-
# Mobile device screenshot
|
|
213
|
-
smippo capture https://example.com --device "iPhone 13" -O mobile.png
|
|
214
|
-
|
|
215
|
-
# Screenshot with dark mode
|
|
216
|
-
smippo capture https://example.com --dark-mode
|
|
217
|
-
|
|
218
|
-
# Capture specific element
|
|
219
|
-
smippo capture https://example.com --selector ".hero-section"
|
|
220
|
-
|
|
221
|
-
# JPEG format with quality
|
|
222
|
-
smippo capture https://example.com --format jpeg --quality 90
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
### Authentication
|
|
226
|
-
|
|
227
|
-
```bash
|
|
228
|
-
# Basic auth
|
|
229
|
-
smippo https://user:pass@example.com
|
|
230
|
-
|
|
231
|
-
# Cookie-based auth
|
|
232
|
-
smippo https://example.com --cookies cookies.json
|
|
233
|
-
|
|
234
|
-
# Interactive login (opens browser window)
|
|
235
|
-
smippo https://example.com --capture-auth
|
|
236
|
-
```
|
|
237
|
-
|
|
238
|
-
### Output Options
|
|
239
|
-
|
|
240
|
-
```bash
|
|
241
|
-
# Generate screenshots
|
|
242
|
-
smippo https://example.com --screenshot
|
|
243
|
-
|
|
244
|
-
# Generate PDFs
|
|
245
|
-
smippo https://example.com --pdf
|
|
246
|
-
|
|
247
|
-
# Skip HAR file
|
|
248
|
-
smippo https://example.com --no-har
|
|
249
|
-
|
|
250
|
-
# Output structure
|
|
251
|
-
smippo https://example.com --structure original # URL paths (default)
|
|
252
|
-
smippo https://example.com --structure flat # All in one directory
|
|
253
|
-
smippo https://example.com --structure domain # Organized by domain
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
### Performance & Parallelism: The Vacuum Architecture
|
|
257
|
-
|
|
258
|
-
Smippo's parallel worker architecture mirrors how hippos consume everything in their path—rapidly and efficiently. Multiple workers operate simultaneously, each vacuuming up pages, resources, and network requests in parallel.
|
|
259
|
-
|
|
260
|
-
```bash
|
|
261
|
-
# Default: 8 parallel workers (8 hippos vacuuming simultaneously)
|
|
262
|
-
smippo https://example.com
|
|
263
|
-
|
|
264
|
-
# Limit to 4 workers (for rate-limited sites)
|
|
265
|
-
smippo https://example.com --workers 4
|
|
266
|
-
|
|
267
|
-
# Single worker (sequential, safest)
|
|
268
|
-
smippo https://example.com --workers 1
|
|
269
|
-
|
|
270
|
-
# Maximum speed (use with caution)
|
|
271
|
-
smippo https://example.com --workers 16
|
|
272
|
-
|
|
273
|
-
# Limit total pages
|
|
274
|
-
smippo https://example.com --max-pages 100
|
|
275
|
-
|
|
276
|
-
# Limit total time
|
|
277
|
-
smippo https://example.com --max-time 300 # 5 minutes
|
|
278
|
-
|
|
279
|
-
# Rate limiting (delay between requests per worker)
|
|
280
|
-
smippo https://example.com --rate-limit 1000 # 1 second between requests
|
|
281
|
-
```
|
|
282
|
-
|
|
283
|
-
**The Vacuum Architecture:**
|
|
284
|
-
|
|
285
|
-
Each worker operates like an independent hippo, vacuuming up:
|
|
286
|
-
|
|
287
|
-
- Fully rendered pages (after JavaScript execution)
|
|
288
|
-
- All network resources (images, fonts, stylesheets, API responses)
|
|
289
|
-
- Network metadata (captured in HAR files)
|
|
290
|
-
- Link structures (for recursive crawling)
|
|
291
|
-
|
|
292
|
-
All captured content is then **structured** into organized mirrors that preserve original paths and relationships.
|
|
293
|
-
|
|
294
|
-
**Tips for optimal performance:**
|
|
295
|
-
|
|
296
|
-
- Use `--workers 1` for sites with strict rate limiting
|
|
297
|
-
- Use `--workers 4-8` for most sites (default: 8)
|
|
298
|
-
- Use `--workers 16` only for fast servers you control
|
|
299
|
-
- Combine `--workers` with `--rate-limit` for polite crawling
|
|
300
|
-
|
|
301
|
-
### Continue/Update
|
|
302
|
-
|
|
303
|
-
```bash
|
|
304
|
-
# Continue an interrupted capture
|
|
305
|
-
smippo continue
|
|
306
|
-
|
|
307
|
-
# Update an existing mirror
|
|
308
|
-
smippo update
|
|
309
|
-
```
|
|
310
|
-
|
|
311
|
-
### Serve
|
|
312
|
-
|
|
313
|
-
Serve captured sites locally with a built-in web server:
|
|
314
|
-
|
|
315
|
-
```bash
|
|
316
|
-
# Serve with auto port detection
|
|
317
|
-
smippo serve ./site
|
|
318
|
-
|
|
319
|
-
# Specify port
|
|
320
|
-
smippo serve ./site --port 3000
|
|
321
|
-
|
|
322
|
-
# Open browser automatically
|
|
323
|
-
smippo serve ./site --open
|
|
324
|
-
|
|
325
|
-
# Show all requests
|
|
326
|
-
smippo serve ./site --verbose
|
|
327
|
-
```
|
|
328
|
-
|
|
329
|
-
The server provides:
|
|
330
|
-
|
|
331
|
-
- **Auto port detection** — Finds next available port if default is busy
|
|
332
|
-
- **Proper MIME types** — Correct content-type headers for all file types
|
|
333
|
-
- **CORS support** — Enabled by default for local development
|
|
334
|
-
- **Nice terminal UI** — Shows clickable URL and request logs
|
|
335
|
-
|
|
336
|
-
### Static Mode
|
|
337
|
-
|
|
338
|
-
For any site, use `--static` to strip scripts for true offline viewing:
|
|
339
|
-
|
|
340
|
-
```bash
|
|
341
|
-
# Capture as static HTML (removes JS, keeps rendered content)
|
|
342
|
-
smippo https://example.com --static --external-assets
|
|
343
|
-
|
|
344
|
-
# Then serve
|
|
345
|
-
smippo serve ./site --open
|
|
346
|
-
```
|
|
347
|
-
|
|
348
|
-
## Structured Output
|
|
349
|
-
|
|
350
|
-
Smippo creates **structured mirrors** that preserve the original URL structure and relationships. Every page, every resource, every network request is organized and stored in a logical hierarchy:
|
|
351
|
-
|
|
352
|
-
```
|
|
353
|
-
site/
|
|
354
|
-
├── example.com/
|
|
355
|
-
│ ├── index.html
|
|
356
|
-
│ ├── about/
|
|
357
|
-
│ │ └── index.html
|
|
358
|
-
│ └── assets/
|
|
359
|
-
│ ├── style.css
|
|
360
|
-
│ └── logo.png
|
|
361
|
-
├── .smippo/
|
|
362
|
-
│ ├── cache.json # Metadata cache
|
|
363
|
-
│ ├── network.har # HAR file
|
|
364
|
-
│ ├── manifest.json # Capture manifest
|
|
365
|
-
│ └── log.txt # Capture log
|
|
366
|
-
└── index.html # Entry point
|
|
367
|
-
```
|
|
368
|
-
|
|
369
|
-
## Programmatic API
|
|
370
|
-
|
|
371
|
-
```javascript
|
|
372
|
-
import {capture, Crawler, createServer} from 'smippo';
|
|
373
|
-
|
|
374
|
-
// Simple capture
|
|
375
|
-
const result = await capture('https://example.com', {
|
|
376
|
-
output: './mirror',
|
|
377
|
-
depth: 2,
|
|
378
|
-
});
|
|
379
|
-
|
|
380
|
-
console.log(`Captured ${result.stats.pagesCapt} pages`);
|
|
381
|
-
|
|
382
|
-
// Advanced usage with events
|
|
383
|
-
const crawler = new Crawler({
|
|
384
|
-
url: 'https://example.com',
|
|
385
|
-
output: './mirror',
|
|
386
|
-
depth: 3,
|
|
387
|
-
scope: 'domain',
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
crawler.on('page:complete', ({url, size}) => {
|
|
391
|
-
console.log(`Captured: ${url} (${size} bytes)`);
|
|
392
|
-
});
|
|
68
|
+
## Features
|
|
393
69
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
70
|
+
- **🚀 Vacuum Architecture** — Parallel workers consume sites rapidly
|
|
71
|
+
- **📸 Complete Fidelity** — Captures pages exactly as rendered, including CSS-in-JS, dynamic content, and lazy-loaded images
|
|
72
|
+
- **🎯 Smart Filtering** — Filter by URL patterns, MIME types, and file sizes. Respects robots.txt
|
|
73
|
+
- **🌐 Built-in Server** — Serve captured sites locally with directory browsing
|
|
74
|
+
- **📊 HAR Files** — Generates HTTP Archive files for debugging and replay
|
|
75
|
+
- **💻 Programmatic API** — Use Smippo in your Node.js applications
|
|
397
76
|
|
|
398
|
-
|
|
77
|
+
## Documentation
|
|
399
78
|
|
|
400
|
-
|
|
401
|
-
const server = await createServer({
|
|
402
|
-
directory: './mirror',
|
|
403
|
-
port: 8080,
|
|
404
|
-
open: true, // Opens browser automatically
|
|
405
|
-
});
|
|
79
|
+
For complete documentation, guides, and API reference, visit **[smippo.com](https://smippo.com)**:
|
|
406
80
|
|
|
407
|
-
|
|
81
|
+
- **[Installation Guide](https://smippo.com/getting-started/installation)** — Detailed installation instructions
|
|
82
|
+
- **[Commands Reference](https://smippo.com/commands)** — All available commands and options
|
|
83
|
+
- **[Configuration](https://smippo.com/configuration)** — Filtering, scope control, performance tuning
|
|
84
|
+
- **[Guides](https://smippo.com/guides)** — Output structure, link rewriting, troubleshooting
|
|
85
|
+
- **[Programmatic API](https://smippo.com/api/programmatic)** — Use Smippo in your Node.js code
|
|
86
|
+
- **[Examples](https://smippo.com/getting-started/examples)** — Real-world use cases
|
|
408
87
|
|
|
409
|
-
|
|
410
|
-
await server.close();
|
|
411
|
-
```
|
|
88
|
+
## Requirements
|
|
412
89
|
|
|
413
|
-
|
|
90
|
+
- Node.js 18 or later
|
|
91
|
+
- Chromium (automatically downloaded on first install)
|
|
414
92
|
|
|
415
93
|
## Contributing
|
|
416
94
|
|
|
417
95
|
Contributions are welcome! Whether it's bug reports, feature requests, or pull requests — all contributions help make Smippo better.
|
|
418
96
|
|
|
419
|
-
Please read our [Contributing Guide](CONTRIBUTING.md) for details on
|
|
420
|
-
|
|
421
|
-
- Development setup
|
|
422
|
-
- Code style guidelines
|
|
423
|
-
- Pull request process
|
|
424
|
-
- Testing requirements
|
|
97
|
+
Please read our [Contributing Guide](CONTRIBUTING.md) for details on development setup, code style guidelines, and the pull request process.
|
|
425
98
|
|
|
426
99
|
Quick start:
|
|
427
100
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "smippo",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "S.M.I.P.P.O. — Structured Mirroring of Internet Pages and Public Objects. Modern website copier that captures sites exactly as they appear in your browser.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/cli.js
CHANGED
|
@@ -10,6 +10,12 @@ import {
|
|
|
10
10
|
runInteractiveCapture,
|
|
11
11
|
shouldRunInteractive,
|
|
12
12
|
} from './interactive.js';
|
|
13
|
+
import {
|
|
14
|
+
getSiteDir,
|
|
15
|
+
getDomainFromUrl,
|
|
16
|
+
addSiteToGlobalManifest,
|
|
17
|
+
ensureSmippoHome,
|
|
18
|
+
} from './utils/home.js';
|
|
13
19
|
|
|
14
20
|
const program = new Command();
|
|
15
21
|
|
|
@@ -52,7 +58,10 @@ export function run() {
|
|
|
52
58
|
// Main capture command
|
|
53
59
|
program
|
|
54
60
|
.argument('[url]', 'URL to capture')
|
|
55
|
-
.option(
|
|
61
|
+
.option(
|
|
62
|
+
'-o, --output <dir>',
|
|
63
|
+
'Output directory (default: ~/.smippo/sites/[domain])',
|
|
64
|
+
)
|
|
56
65
|
.option('-d, --depth <n>', 'Recursion depth (0 = single page)', '0')
|
|
57
66
|
.option('--no-crawl', 'Disable link following (same as -d 0)')
|
|
58
67
|
.option('--dry-run', 'Show what would be captured without downloading')
|
|
@@ -80,8 +89,47 @@ export function run() {
|
|
|
80
89
|
'Wait strategy: networkidle|load|domcontentloaded',
|
|
81
90
|
'networkidle',
|
|
82
91
|
)
|
|
83
|
-
.option(
|
|
92
|
+
.option(
|
|
93
|
+
'--wait-time <ms>',
|
|
94
|
+
'Additional wait time after network idle',
|
|
95
|
+
'500',
|
|
96
|
+
)
|
|
84
97
|
.option('--timeout <ms>', 'Page load timeout', '30000')
|
|
98
|
+
|
|
99
|
+
// Scroll and reveal options (for capturing dynamic content)
|
|
100
|
+
.option(
|
|
101
|
+
'--scroll',
|
|
102
|
+
'Pre-scroll page to trigger lazy content (default: true)',
|
|
103
|
+
)
|
|
104
|
+
.option('--no-scroll', 'Disable pre-scroll behavior')
|
|
105
|
+
.option(
|
|
106
|
+
'--scroll-wait <ms>',
|
|
107
|
+
'Wait time after scrolling for animations',
|
|
108
|
+
'1000',
|
|
109
|
+
)
|
|
110
|
+
.option(
|
|
111
|
+
'--scroll-step <px>',
|
|
112
|
+
'Pixels per scroll increment (default: 200)',
|
|
113
|
+
'200',
|
|
114
|
+
)
|
|
115
|
+
.option(
|
|
116
|
+
'--scroll-delay <ms>',
|
|
117
|
+
'Delay between scroll steps (default: 50)',
|
|
118
|
+
'50',
|
|
119
|
+
)
|
|
120
|
+
.option(
|
|
121
|
+
'--scroll-behavior <type>',
|
|
122
|
+
'Scroll behavior: smooth|instant (default: smooth)',
|
|
123
|
+
'smooth',
|
|
124
|
+
)
|
|
125
|
+
.option(
|
|
126
|
+
'--reveal-all',
|
|
127
|
+
'Force reveal scroll-triggered content like GSAP, AOS (default: true)',
|
|
128
|
+
)
|
|
129
|
+
.option(
|
|
130
|
+
'--no-reveal-all',
|
|
131
|
+
'Disable force-reveal of scroll-triggered content',
|
|
132
|
+
)
|
|
85
133
|
.option('--user-agent <string>', 'Custom user agent')
|
|
86
134
|
.option('--viewport <WxH>', 'Viewport size', '1920x1080')
|
|
87
135
|
.option('--device <name>', 'Emulate device (e.g., "iPhone 13")')
|
|
@@ -179,7 +227,7 @@ export function run() {
|
|
|
179
227
|
// Serve command
|
|
180
228
|
program
|
|
181
229
|
.command('serve [directory]')
|
|
182
|
-
.description('Serve a captured site locally')
|
|
230
|
+
.description('Serve a captured site locally (default: ~/.smippo/sites/)')
|
|
183
231
|
.option(
|
|
184
232
|
'-p, --port <port>',
|
|
185
233
|
'Port to serve on (auto-finds available)',
|
|
@@ -192,8 +240,13 @@ export function run() {
|
|
|
192
240
|
.option('-q, --quiet', 'Minimal output')
|
|
193
241
|
.action(async (directory, options) => {
|
|
194
242
|
const {serve} = await import('./server.js');
|
|
243
|
+
const {getSitesDir} = await import('./utils/home.js');
|
|
244
|
+
|
|
245
|
+
// If no directory specified, use global smippo sites directory
|
|
246
|
+
const serveDir = directory || getSitesDir();
|
|
247
|
+
|
|
195
248
|
await serve({
|
|
196
|
-
directory:
|
|
249
|
+
directory: serveDir,
|
|
197
250
|
port: options.port,
|
|
198
251
|
host: options.host,
|
|
199
252
|
open: options.open,
|
|
@@ -269,6 +322,14 @@ export function run() {
|
|
|
269
322
|
}
|
|
270
323
|
|
|
271
324
|
async function capture(url, options) {
|
|
325
|
+
// Compute output directory based on URL domain if not specified
|
|
326
|
+
let outputDir = options.output;
|
|
327
|
+
if (!outputDir) {
|
|
328
|
+
const domain = getDomainFromUrl(url);
|
|
329
|
+
outputDir = getSiteDir(domain);
|
|
330
|
+
await ensureSmippoHome();
|
|
331
|
+
}
|
|
332
|
+
|
|
272
333
|
const spinner = ora({
|
|
273
334
|
text: 'Initializing browser...',
|
|
274
335
|
isSilent: options.quiet,
|
|
@@ -276,7 +337,7 @@ async function capture(url, options) {
|
|
|
276
337
|
|
|
277
338
|
const crawler = new Crawler({
|
|
278
339
|
url,
|
|
279
|
-
output:
|
|
340
|
+
output: outputDir,
|
|
280
341
|
depth: parseInt(options.depth, 10),
|
|
281
342
|
scope: options.scope,
|
|
282
343
|
stayInDir: options.stayInDir,
|
|
@@ -290,6 +351,12 @@ async function capture(url, options) {
|
|
|
290
351
|
wait: options.wait,
|
|
291
352
|
waitTime: parseInt(options.waitTime, 10),
|
|
292
353
|
timeout: parseInt(options.timeout, 10),
|
|
354
|
+
scroll: options.scroll,
|
|
355
|
+
scrollWait: parseInt(options.scrollWait, 10),
|
|
356
|
+
scrollStep: parseInt(options.scrollStep, 10),
|
|
357
|
+
scrollDelay: parseInt(options.scrollDelay, 10),
|
|
358
|
+
scrollBehavior: options.scrollBehavior,
|
|
359
|
+
revealAll: options.revealAll,
|
|
293
360
|
userAgent: options.userAgent,
|
|
294
361
|
viewport: parseViewport(options.viewport),
|
|
295
362
|
device: options.device,
|
|
@@ -356,7 +423,22 @@ async function capture(url, options) {
|
|
|
356
423
|
console.log(chalk.yellow(` Errors: ${result.stats.errors}`));
|
|
357
424
|
}
|
|
358
425
|
console.log('');
|
|
359
|
-
console.log(` Output: ${chalk.underline(
|
|
426
|
+
console.log(` Output: ${chalk.underline(outputDir)}`);
|
|
427
|
+
|
|
428
|
+
// Update global manifest with this capture (tracks all sites regardless of location)
|
|
429
|
+
try {
|
|
430
|
+
const domain = getDomainFromUrl(url);
|
|
431
|
+
await addSiteToGlobalManifest({
|
|
432
|
+
domain,
|
|
433
|
+
rootUrl: url,
|
|
434
|
+
outputDir: outputDir,
|
|
435
|
+
title: result.pages?.[0]?.title || domain,
|
|
436
|
+
pagesCount: result.stats.pagesCapt,
|
|
437
|
+
assetsCount: result.stats.assetsCapt,
|
|
438
|
+
});
|
|
439
|
+
} catch {
|
|
440
|
+
// Silently ignore manifest errors
|
|
441
|
+
}
|
|
360
442
|
}
|
|
361
443
|
|
|
362
444
|
async function continueCapture(options) {
|
package/src/crawler.js
CHANGED
|
@@ -270,6 +270,12 @@ export class Crawler extends EventEmitter {
|
|
|
270
270
|
mimeExclude: this.options.mimeExclude,
|
|
271
271
|
maxSize: this.options.maxSize,
|
|
272
272
|
minSize: this.options.minSize,
|
|
273
|
+
scroll: this.options.scroll,
|
|
274
|
+
scrollWait: this.options.scrollWait,
|
|
275
|
+
scrollStep: this.options.scrollStep,
|
|
276
|
+
scrollDelay: this.options.scrollDelay,
|
|
277
|
+
scrollBehavior: this.options.scrollBehavior,
|
|
278
|
+
revealAll: this.options.revealAll,
|
|
273
279
|
});
|
|
274
280
|
|
|
275
281
|
const result = await capture.capture(url);
|
package/src/page-capture.js
CHANGED
|
@@ -35,9 +35,26 @@ export class PageCapture {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
// Additional wait time if specified
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
// Additional wait time if specified (default 500ms for animations to start)
|
|
39
|
+
const waitTime = this.options.waitTime ?? 500;
|
|
40
|
+
if (waitTime > 0) {
|
|
41
|
+
await this.page.waitForTimeout(waitTime);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Step 1: Force reveal all scroll-triggered content
|
|
45
|
+
if (this.options.revealAll !== false) {
|
|
46
|
+
await this._revealAllContent();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Step 2: Pre-scroll the page to trigger scroll animations
|
|
50
|
+
if (this.options.scroll !== false) {
|
|
51
|
+
await this._scrollPage();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Step 3: Additional wait after scroll for animations to complete
|
|
55
|
+
const scrollWait = this.options.scrollWait ?? 1000;
|
|
56
|
+
if (scrollWait > 0 && this.options.scroll !== false) {
|
|
57
|
+
await this.page.waitForTimeout(scrollWait);
|
|
41
58
|
}
|
|
42
59
|
|
|
43
60
|
// Get the rendered HTML
|
|
@@ -148,4 +165,301 @@ export class PageCapture {
|
|
|
148
165
|
return type === filter || type.startsWith(filter + ';');
|
|
149
166
|
});
|
|
150
167
|
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Pre-scroll the page to trigger scroll-based animations and lazy loading
|
|
171
|
+
* Performs smooth, incremental scrolling to trigger all scroll-based content
|
|
172
|
+
*/
|
|
173
|
+
async _scrollPage() {
|
|
174
|
+
const scrollBehavior = this.options.scrollBehavior || 'smooth';
|
|
175
|
+
const scrollStep = this.options.scrollStep || 200; // pixels per step
|
|
176
|
+
const scrollDelay = this.options.scrollDelay || 50; // ms between steps
|
|
177
|
+
|
|
178
|
+
/* eslint-disable no-undef */
|
|
179
|
+
await this.page.evaluate(
|
|
180
|
+
async ({step, delay, behavior}) => {
|
|
181
|
+
// Helper for smooth scrolling with requestAnimationFrame
|
|
182
|
+
const smoothScroll = (targetY, duration = 300) => {
|
|
183
|
+
return new Promise(resolve => {
|
|
184
|
+
const startY = window.scrollY;
|
|
185
|
+
const distance = targetY - startY;
|
|
186
|
+
const startTime = performance.now();
|
|
187
|
+
|
|
188
|
+
const animate = currentTime => {
|
|
189
|
+
const elapsed = currentTime - startTime;
|
|
190
|
+
const progress = Math.min(elapsed / duration, 1);
|
|
191
|
+
|
|
192
|
+
// Easing function (ease-out-cubic)
|
|
193
|
+
const eased = 1 - Math.pow(1 - progress, 3);
|
|
194
|
+
|
|
195
|
+
window.scrollTo(0, startY + distance * eased);
|
|
196
|
+
|
|
197
|
+
if (progress < 1) {
|
|
198
|
+
requestAnimationFrame(animate);
|
|
199
|
+
} else {
|
|
200
|
+
resolve();
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
requestAnimationFrame(animate);
|
|
205
|
+
});
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
// Get initial page height
|
|
209
|
+
let lastHeight = document.body.scrollHeight;
|
|
210
|
+
let currentY = 0;
|
|
211
|
+
|
|
212
|
+
// Phase 1: Scroll down incrementally
|
|
213
|
+
while (currentY < document.body.scrollHeight) {
|
|
214
|
+
const targetY = Math.min(currentY + step, document.body.scrollHeight);
|
|
215
|
+
|
|
216
|
+
if (behavior === 'smooth') {
|
|
217
|
+
await smoothScroll(targetY, delay * 2);
|
|
218
|
+
} else {
|
|
219
|
+
window.scrollTo(0, targetY);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
currentY = targetY;
|
|
223
|
+
await new Promise(r => setTimeout(r, delay));
|
|
224
|
+
|
|
225
|
+
// Check if page height increased (lazy content loaded)
|
|
226
|
+
const newHeight = document.body.scrollHeight;
|
|
227
|
+
if (newHeight > lastHeight) {
|
|
228
|
+
lastHeight = newHeight;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Phase 2: Wait at bottom for any pending lazy loads
|
|
233
|
+
await new Promise(r => setTimeout(r, 500));
|
|
234
|
+
|
|
235
|
+
// Check if more content loaded while waiting
|
|
236
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
237
|
+
// Scroll to the new bottom
|
|
238
|
+
if (behavior === 'smooth') {
|
|
239
|
+
await smoothScroll(document.body.scrollHeight, 300);
|
|
240
|
+
} else {
|
|
241
|
+
window.scrollTo(0, document.body.scrollHeight);
|
|
242
|
+
}
|
|
243
|
+
await new Promise(r => setTimeout(r, 300));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Phase 3: Scroll back up slowly (some sites have scroll-up animations)
|
|
247
|
+
const scrollUpStep = step * 2; // Faster on the way up
|
|
248
|
+
currentY = window.scrollY;
|
|
249
|
+
|
|
250
|
+
while (currentY > 0) {
|
|
251
|
+
const targetY = Math.max(currentY - scrollUpStep, 0);
|
|
252
|
+
|
|
253
|
+
if (behavior === 'smooth') {
|
|
254
|
+
await smoothScroll(targetY, delay);
|
|
255
|
+
} else {
|
|
256
|
+
window.scrollTo(0, targetY);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
currentY = targetY;
|
|
260
|
+
await new Promise(r => setTimeout(r, delay / 2));
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Phase 4: Return to top and wait
|
|
264
|
+
window.scrollTo(0, 0);
|
|
265
|
+
await new Promise(r => setTimeout(r, 200));
|
|
266
|
+
},
|
|
267
|
+
{step: scrollStep, delay: scrollDelay, behavior: scrollBehavior},
|
|
268
|
+
);
|
|
269
|
+
/* eslint-enable no-undef */
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Force reveal all scroll-triggered content by disabling/triggering
|
|
274
|
+
* common animation libraries like GSAP ScrollTrigger, AOS, etc.
|
|
275
|
+
*/
|
|
276
|
+
async _revealAllContent() {
|
|
277
|
+
/* eslint-disable no-undef */
|
|
278
|
+
await this.page.evaluate(() => {
|
|
279
|
+
// Helper to safely access nested properties
|
|
280
|
+
const safeGet = (obj, path) => {
|
|
281
|
+
try {
|
|
282
|
+
return path.split('.').reduce((o, k) => o?.[k], obj);
|
|
283
|
+
} catch {
|
|
284
|
+
return undefined;
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
// 1. GSAP ScrollTrigger - kill all triggers and show content
|
|
289
|
+
const ScrollTrigger = safeGet(window, 'ScrollTrigger');
|
|
290
|
+
if (ScrollTrigger) {
|
|
291
|
+
try {
|
|
292
|
+
// Get all ScrollTrigger instances
|
|
293
|
+
const triggers = ScrollTrigger.getAll?.() || [];
|
|
294
|
+
triggers.forEach(trigger => {
|
|
295
|
+
try {
|
|
296
|
+
// Kill the trigger to prevent it from hiding content
|
|
297
|
+
trigger.kill?.();
|
|
298
|
+
} catch (e) {
|
|
299
|
+
/* ignore */
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
// Refresh to ensure proper state
|
|
303
|
+
ScrollTrigger.refresh?.();
|
|
304
|
+
} catch (e) {
|
|
305
|
+
/* ignore */
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Also check for gsap.ScrollTrigger
|
|
310
|
+
const gsapScrollTrigger = safeGet(window, 'gsap.ScrollTrigger');
|
|
311
|
+
if (gsapScrollTrigger && gsapScrollTrigger !== ScrollTrigger) {
|
|
312
|
+
try {
|
|
313
|
+
const triggers = gsapScrollTrigger.getAll?.() || [];
|
|
314
|
+
triggers.forEach(trigger => trigger.kill?.());
|
|
315
|
+
} catch (e) {
|
|
316
|
+
/* ignore */
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// 2. AOS (Animate On Scroll) - reveal all elements
|
|
321
|
+
const AOS = safeGet(window, 'AOS');
|
|
322
|
+
if (AOS) {
|
|
323
|
+
try {
|
|
324
|
+
// Disable AOS and show all elements
|
|
325
|
+
document.querySelectorAll('[data-aos]').forEach(el => {
|
|
326
|
+
el.classList.add('aos-animate');
|
|
327
|
+
el.style.opacity = '1';
|
|
328
|
+
el.style.transform = 'none';
|
|
329
|
+
el.style.visibility = 'visible';
|
|
330
|
+
});
|
|
331
|
+
} catch (e) {
|
|
332
|
+
/* ignore */
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// 3. WOW.js - reveal all elements
|
|
337
|
+
document.querySelectorAll('.wow').forEach(el => {
|
|
338
|
+
el.classList.add('animated');
|
|
339
|
+
el.style.visibility = 'visible';
|
|
340
|
+
el.style.opacity = '1';
|
|
341
|
+
el.style.animationName = 'none';
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
// 4. ScrollReveal - reveal all elements
|
|
345
|
+
const ScrollReveal = safeGet(window, 'ScrollReveal');
|
|
346
|
+
if (ScrollReveal) {
|
|
347
|
+
try {
|
|
348
|
+
document.querySelectorAll('[data-sr-id]').forEach(el => {
|
|
349
|
+
el.style.visibility = 'visible';
|
|
350
|
+
el.style.opacity = '1';
|
|
351
|
+
el.style.transform = 'none';
|
|
352
|
+
});
|
|
353
|
+
} catch (e) {
|
|
354
|
+
/* ignore */
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// 5. Intersection Observer based lazy loading - trigger all observers
|
|
359
|
+
// This is tricky since we can't access observers directly,
|
|
360
|
+
// but we can trigger the elements they're watching
|
|
361
|
+
|
|
362
|
+
// 6. Generic fixes for common hidden patterns
|
|
363
|
+
// Elements with opacity: 0 that are meant to fade in
|
|
364
|
+
document
|
|
365
|
+
.querySelectorAll('[style*="opacity: 0"], [style*="opacity:0"]')
|
|
366
|
+
.forEach(el => {
|
|
367
|
+
// Only reveal if it seems intentionally hidden for animation
|
|
368
|
+
const computedStyle = window.getComputedStyle(el);
|
|
369
|
+
if (
|
|
370
|
+
computedStyle.opacity === '0' &&
|
|
371
|
+
!el.hasAttribute('aria-hidden')
|
|
372
|
+
) {
|
|
373
|
+
el.style.opacity = '1';
|
|
374
|
+
}
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
// Elements with visibility: hidden that may animate in
|
|
378
|
+
document
|
|
379
|
+
.querySelectorAll(
|
|
380
|
+
'[style*="visibility: hidden"], [style*="visibility:hidden"]',
|
|
381
|
+
)
|
|
382
|
+
.forEach(el => {
|
|
383
|
+
el.style.visibility = 'visible';
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
// Elements with transform: translateY that slide in
|
|
387
|
+
document.querySelectorAll('[style*="translateY"]').forEach(el => {
|
|
388
|
+
const style = el.getAttribute('style') || '';
|
|
389
|
+
// Only fix if it looks like a scroll animation starting position
|
|
390
|
+
if (
|
|
391
|
+
style.includes('translateY(') &&
|
|
392
|
+
(style.includes('opacity') || el.classList.length > 0)
|
|
393
|
+
) {
|
|
394
|
+
el.style.transform = 'none';
|
|
395
|
+
}
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
// 7. Lazy-loaded images - force load
|
|
399
|
+
document
|
|
400
|
+
.querySelectorAll('img[data-src], img[data-lazy], img[loading="lazy"]')
|
|
401
|
+
.forEach(img => {
|
|
402
|
+
const src =
|
|
403
|
+
img.getAttribute('data-src') || img.getAttribute('data-lazy');
|
|
404
|
+
if (src && !img.src) {
|
|
405
|
+
img.src = src;
|
|
406
|
+
}
|
|
407
|
+
// Remove lazy loading to ensure images load
|
|
408
|
+
img.removeAttribute('loading');
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
// 8. Lazy-loaded iframes
|
|
412
|
+
document.querySelectorAll('iframe[data-src]').forEach(iframe => {
|
|
413
|
+
const src = iframe.getAttribute('data-src');
|
|
414
|
+
if (src && !iframe.src) {
|
|
415
|
+
iframe.src = src;
|
|
416
|
+
}
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
// 9. Picture elements with lazy loading
|
|
420
|
+
document
|
|
421
|
+
.querySelectorAll('picture source[data-srcset]')
|
|
422
|
+
.forEach(source => {
|
|
423
|
+
const srcset = source.getAttribute('data-srcset');
|
|
424
|
+
if (srcset) {
|
|
425
|
+
source.srcset = srcset;
|
|
426
|
+
}
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
// 10. Background images in data attributes
|
|
430
|
+
document.querySelectorAll('[data-bg], [data-background]').forEach(el => {
|
|
431
|
+
const bg =
|
|
432
|
+
el.getAttribute('data-bg') || el.getAttribute('data-background');
|
|
433
|
+
if (bg && !el.style.backgroundImage) {
|
|
434
|
+
el.style.backgroundImage = `url(${bg})`;
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// 11. Lottie animations - try to advance to final state
|
|
439
|
+
const lottieElements = document.querySelectorAll(
|
|
440
|
+
'lottie-player, [data-lottie]',
|
|
441
|
+
);
|
|
442
|
+
lottieElements.forEach(el => {
|
|
443
|
+
try {
|
|
444
|
+
if (el.goToAndStop) {
|
|
445
|
+
el.goToAndStop(el.totalFrames - 1, true);
|
|
446
|
+
}
|
|
447
|
+
} catch (e) {
|
|
448
|
+
/* ignore */
|
|
449
|
+
}
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
// 12. Force all CSS animations to complete
|
|
453
|
+
document.querySelectorAll('*').forEach(el => {
|
|
454
|
+
const style = window.getComputedStyle(el);
|
|
455
|
+
if (style.animationName && style.animationName !== 'none') {
|
|
456
|
+
// Set animation to end state
|
|
457
|
+
el.style.animationPlayState = 'paused';
|
|
458
|
+
el.style.animationDelay = '0s';
|
|
459
|
+
el.style.animationDuration = '0.001s';
|
|
460
|
+
}
|
|
461
|
+
});
|
|
462
|
+
});
|
|
463
|
+
/* eslint-enable no-undef */
|
|
464
|
+
}
|
|
151
465
|
}
|
package/src/server.js
CHANGED
|
@@ -6,6 +6,7 @@ import chalk from 'chalk';
|
|
|
6
6
|
import {exec} from 'child_process';
|
|
7
7
|
import * as p from '@clack/prompts';
|
|
8
8
|
import {readManifest} from './manifest.js';
|
|
9
|
+
import {getAllCapturedSites, getSitesDir} from './utils/home.js';
|
|
9
10
|
|
|
10
11
|
// MIME type mapping
|
|
11
12
|
const MIME_TYPES = {
|
|
@@ -256,7 +257,7 @@ async function generateDirectoryListing(dirPath, urlPath, rootDir) {
|
|
|
256
257
|
: ''
|
|
257
258
|
}
|
|
258
259
|
</div>
|
|
259
|
-
|
|
260
|
+
|
|
260
261
|
${
|
|
261
262
|
!isRoot
|
|
262
263
|
? `
|
|
@@ -273,7 +274,7 @@ async function generateDirectoryListing(dirPath, urlPath, rootDir) {
|
|
|
273
274
|
</div>`
|
|
274
275
|
: ''
|
|
275
276
|
}
|
|
276
|
-
|
|
277
|
+
|
|
277
278
|
<div class="listing">
|
|
278
279
|
<div class="listing-header">
|
|
279
280
|
${isRoot ? 'Captured Sites' : `Contents of ${urlPath}`}
|
|
@@ -317,7 +318,7 @@ async function generateDirectoryListing(dirPath, urlPath, rootDir) {
|
|
|
317
318
|
: ''
|
|
318
319
|
}
|
|
319
320
|
</div>
|
|
320
|
-
|
|
321
|
+
|
|
321
322
|
<div class="footer">
|
|
322
323
|
Powered by Smippo • Modern Website Copier
|
|
323
324
|
</div>
|
|
@@ -577,41 +578,103 @@ function truncatePath(p, maxLen) {
|
|
|
577
578
|
}
|
|
578
579
|
|
|
579
580
|
/**
|
|
580
|
-
* Get captured sites from a
|
|
581
|
+
* Get captured sites - either from global manifest or a specific directory
|
|
582
|
+
* @param {string|null} directory - Specific directory to serve, or null for global
|
|
581
583
|
*/
|
|
582
584
|
async function getCapturedSites(directory) {
|
|
583
585
|
const sites = [];
|
|
584
|
-
const smippoDir = path.join(directory, '.smippo');
|
|
585
586
|
|
|
587
|
+
// If no directory specified or it's the global sites dir, use global manifest
|
|
588
|
+
const globalSitesDir = getSitesDir();
|
|
589
|
+
const isGlobalMode =
|
|
590
|
+
!directory || path.resolve(directory) === path.resolve(globalSitesDir);
|
|
591
|
+
|
|
592
|
+
if (isGlobalMode) {
|
|
593
|
+
// Use global manifest to get all captured sites
|
|
594
|
+
const globalSites = await getAllCapturedSites();
|
|
595
|
+
for (const site of globalSites) {
|
|
596
|
+
// Find the domain subdirectory within the site path
|
|
597
|
+
const domainDir = path.join(site.path, site.domain);
|
|
598
|
+
const indexInDomain = path.join(domainDir, 'index.html');
|
|
599
|
+
const indexInRoot = path.join(site.path, 'index.html');
|
|
600
|
+
|
|
601
|
+
let hasIndex = false;
|
|
602
|
+
|
|
603
|
+
if (await fs.pathExists(indexInDomain)) {
|
|
604
|
+
hasIndex = true;
|
|
605
|
+
} else if (await fs.pathExists(indexInRoot)) {
|
|
606
|
+
hasIndex = true;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
sites.push({
|
|
610
|
+
domain: site.domain,
|
|
611
|
+
fullPath: site.path, // Absolute path to serve from
|
|
612
|
+
domainPath: site.domain, // Domain subdirectory
|
|
613
|
+
hasIndex,
|
|
614
|
+
rootUrl: site.rootUrl,
|
|
615
|
+
title: site.title || site.domain,
|
|
616
|
+
pagesCount: site.pagesCount || 0,
|
|
617
|
+
assetsCount: site.assetsCount || 0,
|
|
618
|
+
lastUpdated: site.updated || null,
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
return sites;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Specific directory mode: check for .smippo directory
|
|
625
|
+
const smippoDir = path.join(directory, '.smippo');
|
|
586
626
|
if (!(await fs.pathExists(smippoDir))) {
|
|
627
|
+
// Check if directory itself is a site directory (has index.html)
|
|
628
|
+
const indexPath = path.join(directory, 'index.html');
|
|
629
|
+
if (await fs.pathExists(indexPath)) {
|
|
630
|
+
const dirName = path.basename(directory);
|
|
631
|
+
sites.push({
|
|
632
|
+
domain: dirName,
|
|
633
|
+
fullPath: directory,
|
|
634
|
+
domainPath: '',
|
|
635
|
+
hasIndex: true,
|
|
636
|
+
rootUrl: null,
|
|
637
|
+
title: dirName,
|
|
638
|
+
pagesCount: 0,
|
|
639
|
+
assetsCount: 0,
|
|
640
|
+
lastUpdated: null,
|
|
641
|
+
});
|
|
642
|
+
}
|
|
587
643
|
return sites;
|
|
588
644
|
}
|
|
589
645
|
|
|
590
|
-
// Read manifest for site info
|
|
646
|
+
// Read local manifest for site info
|
|
591
647
|
const manifest = await readManifest(directory);
|
|
592
648
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
649
|
+
if (!manifest?.rootUrl) {
|
|
650
|
+
return sites;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// Extract domain from rootUrl
|
|
654
|
+
try {
|
|
655
|
+
const url = new URL(manifest.rootUrl);
|
|
656
|
+
const mainDomain = url.hostname;
|
|
657
|
+
|
|
658
|
+
// Check if the main domain directory exists
|
|
659
|
+
const domainPath = path.join(directory, mainDomain);
|
|
660
|
+
if (await fs.pathExists(domainPath)) {
|
|
661
|
+
const indexPath = path.join(domainPath, 'index.html');
|
|
602
662
|
const hasIndex = await fs.pathExists(indexPath);
|
|
603
663
|
|
|
604
664
|
sites.push({
|
|
605
|
-
domain:
|
|
606
|
-
|
|
665
|
+
domain: mainDomain,
|
|
666
|
+
fullPath: directory,
|
|
667
|
+
domainPath: mainDomain,
|
|
607
668
|
hasIndex,
|
|
608
|
-
rootUrl: manifest
|
|
609
|
-
title: manifest
|
|
610
|
-
pagesCount: manifest
|
|
611
|
-
assetsCount: manifest
|
|
612
|
-
lastUpdated: manifest
|
|
669
|
+
rootUrl: manifest.rootUrl,
|
|
670
|
+
title: manifest.pages?.[0]?.title || mainDomain,
|
|
671
|
+
pagesCount: manifest.stats?.pagesCapt || 0,
|
|
672
|
+
assetsCount: manifest.stats?.assetsCapt || 0,
|
|
673
|
+
lastUpdated: manifest.updated || null,
|
|
613
674
|
});
|
|
614
675
|
}
|
|
676
|
+
} catch {
|
|
677
|
+
// Invalid URL in manifest, fall back to directory scan
|
|
615
678
|
}
|
|
616
679
|
|
|
617
680
|
return sites;
|
|
@@ -622,12 +685,15 @@ async function getCapturedSites(directory) {
|
|
|
622
685
|
*/
|
|
623
686
|
export async function serve(options) {
|
|
624
687
|
try {
|
|
625
|
-
const directory = options.output || options.directory
|
|
626
|
-
const
|
|
688
|
+
const directory = options.output || options.directory;
|
|
689
|
+
const globalSitesDir = getSitesDir();
|
|
690
|
+
|
|
691
|
+
// Get captured sites (from global manifest if no directory specified)
|
|
692
|
+
const sites = await getCapturedSites(directory);
|
|
627
693
|
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
let
|
|
694
|
+
let serveDir = directory ? path.resolve(directory) : null;
|
|
695
|
+
let openPath = null;
|
|
696
|
+
let selectedSite = null;
|
|
631
697
|
|
|
632
698
|
if (sites.length > 0 && process.stdin.isTTY && !options.quiet) {
|
|
633
699
|
// Show interactive site selection
|
|
@@ -636,22 +702,30 @@ export async function serve(options) {
|
|
|
636
702
|
|
|
637
703
|
if (sites.length === 1) {
|
|
638
704
|
// Single site - auto-select but show info
|
|
639
|
-
|
|
705
|
+
selectedSite = sites[0];
|
|
640
706
|
console.log(
|
|
641
|
-
chalk.dim(' Found captured site: ') +
|
|
707
|
+
chalk.dim(' Found captured site: ') +
|
|
708
|
+
chalk.bold(selectedSite.domain),
|
|
642
709
|
);
|
|
643
|
-
if (
|
|
710
|
+
if (selectedSite.pagesCount > 0) {
|
|
644
711
|
console.log(
|
|
645
|
-
chalk.dim(
|
|
712
|
+
chalk.dim(
|
|
713
|
+
` ${selectedSite.pagesCount} pages, ${selectedSite.assetsCount} assets`,
|
|
714
|
+
),
|
|
646
715
|
);
|
|
647
716
|
}
|
|
648
|
-
|
|
717
|
+
if (selectedSite.fullPath) {
|
|
718
|
+
console.log(chalk.dim(` Location: ${selectedSite.fullPath}`));
|
|
719
|
+
}
|
|
649
720
|
} else {
|
|
650
721
|
// Multiple sites - let user choose
|
|
651
722
|
const siteOptions = sites.map(site => ({
|
|
652
|
-
value: site
|
|
723
|
+
value: site,
|
|
653
724
|
label: site.domain,
|
|
654
|
-
hint:
|
|
725
|
+
hint:
|
|
726
|
+
site.pagesCount > 0
|
|
727
|
+
? `${site.pagesCount} pages - ${site.fullPath}`
|
|
728
|
+
: site.fullPath,
|
|
655
729
|
}));
|
|
656
730
|
|
|
657
731
|
const selected = await p.select({
|
|
@@ -664,20 +738,34 @@ export async function serve(options) {
|
|
|
664
738
|
process.exit(0);
|
|
665
739
|
}
|
|
666
740
|
|
|
667
|
-
|
|
741
|
+
selectedSite = selected;
|
|
668
742
|
}
|
|
669
743
|
console.log('');
|
|
670
744
|
} else if (sites.length === 1) {
|
|
671
745
|
// Non-interactive mode with single site
|
|
672
|
-
|
|
746
|
+
selectedSite = sites[0];
|
|
747
|
+
} else if (sites.length === 0 && !directory) {
|
|
748
|
+
console.log(chalk.yellow('No captured sites found.'));
|
|
749
|
+
console.log(
|
|
750
|
+
chalk.dim(' Capture a site first: ') + chalk.cyan('smippo <url>'),
|
|
751
|
+
);
|
|
752
|
+
process.exit(0);
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Determine serve directory and open path
|
|
756
|
+
if (selectedSite) {
|
|
757
|
+
serveDir = selectedSite.fullPath;
|
|
758
|
+
openPath = selectedSite.domainPath || null;
|
|
759
|
+
} else if (!serveDir) {
|
|
760
|
+
serveDir = globalSitesDir;
|
|
673
761
|
}
|
|
674
762
|
|
|
675
763
|
const serverInfo = await createServer({
|
|
676
|
-
directory:
|
|
764
|
+
directory: serveDir,
|
|
677
765
|
port: options.port || 8080,
|
|
678
766
|
host: options.host || '127.0.0.1',
|
|
679
767
|
open: options.open,
|
|
680
|
-
openPath:
|
|
768
|
+
openPath: openPath,
|
|
681
769
|
cors: options.cors !== false,
|
|
682
770
|
verbose: options.verbose,
|
|
683
771
|
quiet: options.quiet,
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
// @flow
|
|
2
|
+
import fs from 'fs-extra';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
|
|
6
|
+
const SMIPPO_HOME_DIR = '.smippo';
|
|
7
|
+
const SITES_DIR = 'sites';
|
|
8
|
+
const GLOBAL_MANIFEST_FILE = 'manifest.json';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Get the global smippo home directory (~/.smippo/)
|
|
12
|
+
*/
|
|
13
|
+
export function getSmippoHome() {
|
|
14
|
+
return path.join(os.homedir(), SMIPPO_HOME_DIR);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Get the sites directory (~/.smippo/sites/)
|
|
19
|
+
*/
|
|
20
|
+
export function getSitesDir() {
|
|
21
|
+
return path.join(getSmippoHome(), SITES_DIR);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Get the output directory for a specific domain
|
|
26
|
+
* @param {string} domain - The domain name (e.g., 'example.com')
|
|
27
|
+
* @returns {string} The full path to the site directory
|
|
28
|
+
*/
|
|
29
|
+
export function getSiteDir(domain) {
|
|
30
|
+
return path.join(getSitesDir(), domain);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Extract domain from a URL
|
|
35
|
+
* @param {string} url - The URL to extract domain from
|
|
36
|
+
* @returns {string} The domain (hostname)
|
|
37
|
+
*/
|
|
38
|
+
export function getDomainFromUrl(url) {
|
|
39
|
+
try {
|
|
40
|
+
const parsed = new URL(url);
|
|
41
|
+
return parsed.hostname;
|
|
42
|
+
} catch {
|
|
43
|
+
// If URL parsing fails, return the original string
|
|
44
|
+
return url;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Ensure the smippo home directory exists
|
|
50
|
+
*/
|
|
51
|
+
export async function ensureSmippoHome() {
|
|
52
|
+
const homeDir = getSmippoHome();
|
|
53
|
+
const sitesDir = getSitesDir();
|
|
54
|
+
|
|
55
|
+
await fs.ensureDir(homeDir);
|
|
56
|
+
await fs.ensureDir(sitesDir);
|
|
57
|
+
|
|
58
|
+
return homeDir;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get the global manifest path (~/.smippo/manifest.json)
|
|
63
|
+
*/
|
|
64
|
+
export function getGlobalManifestPath() {
|
|
65
|
+
return path.join(getSmippoHome(), GLOBAL_MANIFEST_FILE);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Read the global manifest (list of all captured sites)
|
|
70
|
+
*/
|
|
71
|
+
export async function readGlobalManifest() {
|
|
72
|
+
const manifestPath = getGlobalManifestPath();
|
|
73
|
+
|
|
74
|
+
if (!(await fs.pathExists(manifestPath))) {
|
|
75
|
+
return {
|
|
76
|
+
version: '1.0.0',
|
|
77
|
+
sites: [],
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const content = await fs.readFile(manifestPath, 'utf8');
|
|
83
|
+
return JSON.parse(content);
|
|
84
|
+
} catch {
|
|
85
|
+
return {
|
|
86
|
+
version: '1.0.0',
|
|
87
|
+
sites: [],
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Write the global manifest
|
|
94
|
+
*/
|
|
95
|
+
export async function writeGlobalManifest(manifest) {
|
|
96
|
+
await ensureSmippoHome();
|
|
97
|
+
const manifestPath = getGlobalManifestPath();
|
|
98
|
+
await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2), 'utf8');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Add a site to the global manifest
|
|
103
|
+
* @param {Object} siteInfo - Site information
|
|
104
|
+
* @param {string} siteInfo.domain - Domain name
|
|
105
|
+
* @param {string} siteInfo.rootUrl - Original URL
|
|
106
|
+
* @param {string} siteInfo.outputDir - Directory where site was saved
|
|
107
|
+
* @param {string} [siteInfo.title] - Page title
|
|
108
|
+
* @param {number} [siteInfo.pagesCount] - Number of pages captured
|
|
109
|
+
* @param {number} [siteInfo.assetsCount] - Number of assets captured
|
|
110
|
+
*/
|
|
111
|
+
export async function addSiteToGlobalManifest(siteInfo) {
|
|
112
|
+
const manifest = await readGlobalManifest();
|
|
113
|
+
|
|
114
|
+
// Use outputDir as the unique key (same domain can be saved to different dirs)
|
|
115
|
+
const outputPath = path.resolve(siteInfo.outputDir);
|
|
116
|
+
const existingIndex = manifest.sites.findIndex(s => s.path === outputPath);
|
|
117
|
+
|
|
118
|
+
const siteEntry = {
|
|
119
|
+
domain: siteInfo.domain,
|
|
120
|
+
rootUrl: siteInfo.rootUrl,
|
|
121
|
+
title: siteInfo.title || siteInfo.domain,
|
|
122
|
+
path: outputPath,
|
|
123
|
+
created: siteInfo.created || new Date().toISOString(),
|
|
124
|
+
updated: new Date().toISOString(),
|
|
125
|
+
pagesCount: siteInfo.pagesCount || 0,
|
|
126
|
+
assetsCount: siteInfo.assetsCount || 0,
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
if (existingIndex >= 0) {
|
|
130
|
+
// Update existing entry (preserve created date)
|
|
131
|
+
manifest.sites[existingIndex] = {
|
|
132
|
+
...siteEntry,
|
|
133
|
+
created: manifest.sites[existingIndex].created,
|
|
134
|
+
};
|
|
135
|
+
} else {
|
|
136
|
+
// Add new entry
|
|
137
|
+
manifest.sites.push(siteEntry);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
await writeGlobalManifest(manifest);
|
|
141
|
+
return manifest;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Remove a site from the global manifest
|
|
146
|
+
*/
|
|
147
|
+
export async function removeSiteFromGlobalManifest(domain) {
|
|
148
|
+
const manifest = await readGlobalManifest();
|
|
149
|
+
manifest.sites = manifest.sites.filter(s => s.domain !== domain);
|
|
150
|
+
await writeGlobalManifest(manifest);
|
|
151
|
+
return manifest;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Get all captured sites from the global manifest
|
|
156
|
+
*/
|
|
157
|
+
export async function getAllCapturedSites() {
|
|
158
|
+
const manifest = await readGlobalManifest();
|
|
159
|
+
|
|
160
|
+
// Verify each site still exists
|
|
161
|
+
const validSites = [];
|
|
162
|
+
for (const site of manifest.sites) {
|
|
163
|
+
if (await fs.pathExists(site.path)) {
|
|
164
|
+
validSites.push(site);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Update manifest if some sites were removed
|
|
169
|
+
if (validSites.length !== manifest.sites.length) {
|
|
170
|
+
manifest.sites = validSites;
|
|
171
|
+
await writeGlobalManifest(manifest);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return validSites;
|
|
175
|
+
}
|