smippo 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ // @flow
2
+ import cliProgress from 'cli-progress';
3
+ import chalk from 'chalk';
4
+
5
+ /**
6
+ * Create a styled multi-bar progress display for Smippo
7
+ */
8
+ export function createProgressDisplay(options = {}) {
9
+ const {quiet = false, verbose = false} = options;
10
+
11
+ if (quiet) {
12
+ return {
13
+ start: () => {},
14
+ stop: () => {},
15
+ updatePage: () => {},
16
+ updateAsset: () => {},
17
+ log: () => {},
18
+ };
19
+ }
20
+
21
+ // Create multi-bar container
22
+ const multibar = new cliProgress.MultiBar(
23
+ {
24
+ clearOnComplete: false,
25
+ hideCursor: true,
26
+ format: (options, params, payload) => {
27
+ const {type, name} = payload;
28
+ const bar = options.barCompleteChar.repeat(
29
+ Math.round(params.progress * 30),
30
+ );
31
+ const empty = options.barIncompleteChar.repeat(
32
+ 30 - Math.round(params.progress * 30),
33
+ );
34
+ const percent = Math.round(params.progress * 100);
35
+
36
+ if (type === 'pages') {
37
+ return ` ${chalk.cyan('Pages')} ${chalk.cyan('[')}${chalk.cyan(bar)}${chalk.dim(empty)}${chalk.cyan(']')} ${chalk.bold(params.value)}/${params.total} ${chalk.dim(`(${percent}%)`)}`;
38
+ } else if (type === 'assets') {
39
+ return ` ${chalk.magenta('Assets')} ${chalk.magenta('[')}${chalk.magenta(bar)}${chalk.dim(empty)}${chalk.magenta(']')} ${chalk.bold(params.value)}/${params.total} ${chalk.dim(`(${percent}%)`)}`;
40
+ } else if (type === 'current') {
41
+ const truncated = name.length > 50 ? '...' + name.slice(-47) : name;
42
+ return ` ${chalk.dim('Current:')} ${chalk.white(truncated)}`;
43
+ }
44
+ return '';
45
+ },
46
+ barCompleteChar: '█',
47
+ barIncompleteChar: '░',
48
+ },
49
+ cliProgress.Presets.shades_classic,
50
+ );
51
+
52
+ let pageBar = null;
53
+ let assetBar = null;
54
+ let currentBar = null;
55
+ let totalPages = 0;
56
+ let totalAssets = 0;
57
+ let completedPages = 0;
58
+ let completedAssets = 0;
59
+
60
+ return {
61
+ /**
62
+ * Start the progress display
63
+ */
64
+ start(estimatedPages = 1, estimatedAssets = 10) {
65
+ totalPages = estimatedPages;
66
+ totalAssets = estimatedAssets;
67
+ completedPages = 0;
68
+ completedAssets = 0;
69
+
70
+ console.log('');
71
+ pageBar = multibar.create(totalPages, 0, {type: 'pages'});
72
+ assetBar = multibar.create(totalAssets, 0, {type: 'assets'});
73
+ currentBar = multibar.create(100, 0, {
74
+ type: 'current',
75
+ name: 'Initializing...',
76
+ });
77
+ },
78
+
79
+ /**
80
+ * Stop the progress display
81
+ */
82
+ stop() {
83
+ if (multibar) {
84
+ multibar.stop();
85
+ }
86
+ console.log('');
87
+ },
88
+
89
+ /**
90
+ * Update page progress
91
+ */
92
+ updatePage(url, total = null) {
93
+ completedPages++;
94
+ if (total && total > totalPages) {
95
+ totalPages = total;
96
+ if (pageBar) pageBar.setTotal(totalPages);
97
+ }
98
+ if (pageBar) pageBar.update(completedPages);
99
+ if (currentBar) currentBar.update(0, {name: url});
100
+ },
101
+
102
+ /**
103
+ * Update asset progress
104
+ */
105
+ updateAsset(url, total = null) {
106
+ completedAssets++;
107
+ if (total && total > totalAssets) {
108
+ totalAssets = total;
109
+ if (assetBar) assetBar.setTotal(totalAssets);
110
+ }
111
+ if (assetBar) assetBar.update(completedAssets);
112
+ if (currentBar && verbose) currentBar.update(0, {name: url});
113
+ },
114
+
115
+ /**
116
+ * Increment total assets estimate
117
+ */
118
+ addAssets(count) {
119
+ totalAssets += count;
120
+ if (assetBar) assetBar.setTotal(totalAssets);
121
+ },
122
+
123
+ /**
124
+ * Log a message (pauses bars)
125
+ */
126
+ log(message) {
127
+ if (verbose) {
128
+ multibar.log(message + '\n');
129
+ }
130
+ },
131
+
132
+ /**
133
+ * Update current status
134
+ */
135
+ setStatus(status) {
136
+ if (currentBar) currentBar.update(0, {name: status});
137
+ },
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Simple spinner-based progress for non-TTY environments
143
+ */
144
+ export function createSimpleProgress(options = {}) {
145
+ const {quiet = false, verbose = false} = options;
146
+
147
+ let pageCount = 0;
148
+ let assetCount = 0;
149
+ let lastUpdate = Date.now();
150
+
151
+ return {
152
+ start: () => {
153
+ if (!quiet) console.log(chalk.cyan('\n Starting capture...\n'));
154
+ },
155
+ stop: () => {},
156
+ updatePage: url => {
157
+ pageCount++;
158
+ if (!quiet && (verbose || Date.now() - lastUpdate > 500)) {
159
+ process.stdout.write(
160
+ `\r ${chalk.cyan('Pages:')} ${pageCount} ${chalk.magenta('Assets:')} ${assetCount} ${chalk.dim('Current:')} ${url.slice(0, 50)}${url.length > 50 ? '...' : ''} `,
161
+ );
162
+ lastUpdate = Date.now();
163
+ }
164
+ },
165
+ updateAsset: () => {
166
+ assetCount++;
167
+ if (!quiet && verbose) {
168
+ process.stdout.write(
169
+ `\r ${chalk.cyan('Pages:')} ${pageCount} ${chalk.magenta('Assets:')} ${assetCount} `,
170
+ );
171
+ }
172
+ },
173
+ addAssets: () => {},
174
+ log: message => {
175
+ if (verbose) console.log(message);
176
+ },
177
+ setStatus: () => {},
178
+ };
179
+ }
180
+
181
+ /**
182
+ * Choose the appropriate progress display based on environment
183
+ */
184
+ export function createProgress(options = {}) {
185
+ // Use simple progress if not a TTY or if specifically requested
186
+ if (!process.stdout.isTTY || options.simple) {
187
+ return createSimpleProgress(options);
188
+ }
189
+ return createProgressDisplay(options);
190
+ }
@@ -0,0 +1,210 @@
1
+ // @flow
2
+ import fs from 'fs-extra';
3
+ import path from 'path';
4
+ import {urlToPath} from './utils/url.js';
5
+ import {sanitizePath, joinPath} from './utils/path.js';
6
+ import mime from 'mime-types';
7
+
8
+ /**
9
+ * Save resources to disk
10
+ */
11
+ export class ResourceSaver {
12
+ constructor(options = {}) {
13
+ this.outputDir = options.output || './site';
14
+ this.structure = options.structure || 'original';
15
+ this.savedFiles = new Map(); // URL -> local path
16
+ this.stats = {
17
+ files: 0,
18
+ bytes: 0,
19
+ };
20
+ }
21
+
22
+ /**
23
+ * Get the local path for a URL
24
+ */
25
+ getLocalPath(url) {
26
+ const relativePath = urlToPath(url, this.structure);
27
+ return joinPath(this.outputDir, sanitizePath(relativePath));
28
+ }
29
+
30
+ /**
31
+ * Save HTML content
32
+ */
33
+ async saveHtml(url, html, _options = {}) {
34
+ const relativePath = urlToPath(url, this.structure);
35
+ const localPath = joinPath(this.outputDir, sanitizePath(relativePath));
36
+
37
+ // Ensure directory exists
38
+ await fs.ensureDir(path.dirname(localPath));
39
+
40
+ // Write the file
41
+ await fs.writeFile(localPath, html, 'utf8');
42
+
43
+ // Store the relative path (not full path) for link rewriting
44
+ this.savedFiles.set(url, sanitizePath(relativePath));
45
+ this.stats.files++;
46
+ this.stats.bytes += Buffer.byteLength(html, 'utf8');
47
+
48
+ return localPath;
49
+ }
50
+
51
+ /**
52
+ * Save a resource (binary or text)
53
+ */
54
+ async saveResource(url, resource) {
55
+ let relativePath = urlToPath(url, this.structure);
56
+ relativePath = sanitizePath(relativePath);
57
+ let localPath = joinPath(this.outputDir, relativePath);
58
+
59
+ // Fix extension based on content type if needed
60
+ localPath = this._fixExtension(localPath, resource.contentType);
61
+ relativePath = this._fixExtension(relativePath, resource.contentType);
62
+
63
+ // Ensure directory exists
64
+ await fs.ensureDir(path.dirname(localPath));
65
+
66
+ // Write the file
67
+ await fs.writeFile(localPath, resource.body);
68
+
69
+ // Store the relative path (not full path) for link rewriting
70
+ this.savedFiles.set(url, relativePath);
71
+ this.stats.files++;
72
+ this.stats.bytes += resource.size;
73
+
74
+ return localPath;
75
+ }
76
+
77
+ /**
78
+ * Save multiple resources
79
+ */
80
+ async saveResources(resources) {
81
+ const saved = [];
82
+
83
+ for (const [url, resource] of resources) {
84
+ try {
85
+ const localPath = await this.saveResource(url, resource);
86
+ saved.push({url, localPath, size: resource.size});
87
+ } catch (error) {
88
+ // Continue saving other resources
89
+ }
90
+ }
91
+
92
+ return saved;
93
+ }
94
+
95
+ /**
96
+ * Save a screenshot
97
+ */
98
+ async saveScreenshot(url, screenshot) {
99
+ const basePath = this.getLocalPath(url);
100
+ const screenshotPath = basePath.replace(/\.html?$/i, '.png');
101
+
102
+ await fs.ensureDir(path.dirname(screenshotPath));
103
+ await fs.writeFile(screenshotPath, screenshot);
104
+
105
+ return screenshotPath;
106
+ }
107
+
108
+ /**
109
+ * Save a PDF
110
+ */
111
+ async savePdf(url, pdf) {
112
+ const basePath = this.getLocalPath(url);
113
+ const pdfPath = basePath.replace(/\.html?$/i, '.pdf');
114
+
115
+ await fs.ensureDir(path.dirname(pdfPath));
116
+ await fs.writeFile(pdfPath, pdf);
117
+
118
+ return pdfPath;
119
+ }
120
+
121
+ /**
122
+ * Get URL to local path mapping
123
+ */
124
+ getUrlMap() {
125
+ return new Map(this.savedFiles);
126
+ }
127
+
128
+ /**
129
+ * Get relative path from output directory
130
+ */
131
+ getRelativePath(localPath) {
132
+ return path.relative(this.outputDir, localPath);
133
+ }
134
+
135
+ /**
136
+ * Fix file extension based on content type
137
+ */
138
+ _fixExtension(filePath, contentType) {
139
+ if (!contentType) return filePath;
140
+
141
+ const mimeType = contentType.split(';')[0].trim();
142
+ const expectedExt = mime.extension(mimeType);
143
+
144
+ if (!expectedExt) return filePath;
145
+
146
+ const currentExt = path.extname(filePath).slice(1).toLowerCase();
147
+
148
+ // Don't change if extension seems correct
149
+ const equivalentExtensions = {
150
+ jpeg: ['jpg', 'jpeg'],
151
+ htm: ['html', 'htm'],
152
+ js: ['js', 'mjs', 'cjs'],
153
+ };
154
+
155
+ const isEquivalent = Object.values(equivalentExtensions).some(
156
+ group => group.includes(currentExt) && group.includes(expectedExt),
157
+ );
158
+
159
+ if (isEquivalent || currentExt === expectedExt) {
160
+ return filePath;
161
+ }
162
+
163
+ // Only fix if current extension is wrong or missing
164
+ if (!currentExt || !isKnownExtension(currentExt)) {
165
+ return `${filePath}.${expectedExt}`;
166
+ }
167
+
168
+ return filePath;
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Check if extension is known
174
+ */
175
+ function isKnownExtension(ext) {
176
+ const known = [
177
+ 'html',
178
+ 'htm',
179
+ 'css',
180
+ 'js',
181
+ 'mjs',
182
+ 'json',
183
+ 'xml',
184
+ 'png',
185
+ 'jpg',
186
+ 'jpeg',
187
+ 'gif',
188
+ 'webp',
189
+ 'svg',
190
+ 'ico',
191
+ 'bmp',
192
+ 'woff',
193
+ 'woff2',
194
+ 'ttf',
195
+ 'eot',
196
+ 'otf',
197
+ 'mp3',
198
+ 'mp4',
199
+ 'webm',
200
+ 'ogg',
201
+ 'wav',
202
+ 'avi',
203
+ 'pdf',
204
+ 'zip',
205
+ 'tar',
206
+ 'gz',
207
+ ];
208
+
209
+ return known.includes(ext.toLowerCase());
210
+ }
package/src/robots.js ADDED
@@ -0,0 +1,104 @@
1
+ // @flow
2
+ import robotsParser from 'robots-parser';
3
+
4
+ /**
5
+ * robots.txt handler
6
+ */
7
+ export class RobotsHandler {
8
+ constructor(options = {}) {
9
+ this.enabled = !options.ignoreRobots;
10
+ this.userAgent = options.userAgent || 'Smippo/0.0.1';
11
+ this.cache = new Map();
12
+ }
13
+
14
+ /**
15
+ * Check if a URL is allowed by robots.txt
16
+ */
17
+ async isAllowed(url, fetchFn) {
18
+ if (!this.enabled) return true;
19
+
20
+ try {
21
+ const robots = await this.getRobots(url, fetchFn);
22
+ if (!robots) return true;
23
+
24
+ return robots.isAllowed(url, this.userAgent);
25
+ } catch {
26
+ // If we can't fetch/parse robots.txt, allow access
27
+ return true;
28
+ }
29
+ }
30
+
31
+ /**
32
+ * Get robots.txt parser for a URL
33
+ */
34
+ async getRobots(url, fetchFn) {
35
+ try {
36
+ const parsed = new URL(url);
37
+ const robotsUrl = `${parsed.origin}/robots.txt`;
38
+
39
+ // Check cache
40
+ if (this.cache.has(robotsUrl)) {
41
+ return this.cache.get(robotsUrl);
42
+ }
43
+
44
+ // Fetch robots.txt
45
+ const robotsContent = await fetchFn(robotsUrl);
46
+
47
+ if (!robotsContent) {
48
+ this.cache.set(robotsUrl, null);
49
+ return null;
50
+ }
51
+
52
+ // Parse robots.txt
53
+ const robots = robotsParser(robotsUrl, robotsContent);
54
+ this.cache.set(robotsUrl, robots);
55
+
56
+ return robots;
57
+ } catch {
58
+ return null;
59
+ }
60
+ }
61
+
62
+ /**
63
+ * Get crawl delay for a domain
64
+ */
65
+ getCrawlDelay(url) {
66
+ if (!this.enabled) return 0;
67
+
68
+ try {
69
+ const parsed = new URL(url);
70
+ const robotsUrl = `${parsed.origin}/robots.txt`;
71
+ const robots = this.cache.get(robotsUrl);
72
+
73
+ if (!robots) return 0;
74
+
75
+ return robots.getCrawlDelay(this.userAgent) || 0;
76
+ } catch {
77
+ return 0;
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Get sitemap URLs from robots.txt
83
+ */
84
+ getSitemaps(url) {
85
+ try {
86
+ const parsed = new URL(url);
87
+ const robotsUrl = `${parsed.origin}/robots.txt`;
88
+ const robots = this.cache.get(robotsUrl);
89
+
90
+ if (!robots) return [];
91
+
92
+ return robots.getSitemaps() || [];
93
+ } catch {
94
+ return [];
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Clear the cache
100
+ */
101
+ clearCache() {
102
+ this.cache.clear();
103
+ }
104
+ }
@@ -0,0 +1,185 @@
1
+ // @flow
2
+ import {chromium} from 'playwright';
3
+ import fs from 'fs-extra';
4
+ import path from 'path';
5
+ import chalk from 'chalk';
6
+
7
+ /**
8
+ * Capture a screenshot of a URL
9
+ * Based on Playwright Screenshots API: https://playwright.dev/docs/screenshots
10
+ */
11
+ export async function captureScreenshot(url, options = {}) {
12
+ const {
13
+ output,
14
+ fullPage = false,
15
+ format = 'png',
16
+ quality,
17
+ viewport = {width: 1920, height: 1080},
18
+ device,
19
+ selector,
20
+ wait = 'networkidle',
21
+ waitTime = 0,
22
+ timeout = 30000,
23
+ userAgent,
24
+ darkMode = false,
25
+ scale = 'device',
26
+ omitBackground = false,
27
+ verbose = false,
28
+ quiet = false,
29
+ } = options;
30
+
31
+ // Normalize URL
32
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
33
+ url = 'https://' + url;
34
+ }
35
+
36
+ // Determine output path
37
+ let outputPath = output;
38
+ if (!outputPath) {
39
+ const urlObj = new URL(url);
40
+ const timestamp = new Date()
41
+ .toISOString()
42
+ .replace(/[:.]/g, '-')
43
+ .slice(0, 19);
44
+ const ext = format === 'jpeg' ? 'jpg' : 'png';
45
+ outputPath = `${urlObj.hostname}-${timestamp}.${ext}`;
46
+ }
47
+
48
+ // Ensure output directory exists
49
+ const outputDir = path.dirname(outputPath);
50
+ if (outputDir && outputDir !== '.') {
51
+ await fs.ensureDir(outputDir);
52
+ }
53
+
54
+ if (!quiet) {
55
+ console.log('');
56
+ console.log(chalk.cyan(' 📸 Smippo Screenshot'));
57
+ console.log(chalk.dim(` URL: ${url}`));
58
+ console.log('');
59
+ }
60
+
61
+ const browser = await chromium.launch();
62
+
63
+ try {
64
+ // Set up context options
65
+ const contextOptions = {
66
+ viewport,
67
+ userAgent: userAgent || 'Smippo/0.0.1 Screenshot',
68
+ };
69
+
70
+ // Device emulation
71
+ if (device) {
72
+ const {devices} = await import('playwright');
73
+ if (devices[device]) {
74
+ Object.assign(contextOptions, devices[device]);
75
+ if (!quiet) console.log(chalk.dim(` Device: ${device}`));
76
+ } else {
77
+ console.warn(
78
+ chalk.yellow(
79
+ ` Warning: Unknown device "${device}", using default viewport`,
80
+ ),
81
+ );
82
+ }
83
+ }
84
+
85
+ // Dark mode
86
+ if (darkMode) {
87
+ contextOptions.colorScheme = 'dark';
88
+ }
89
+
90
+ const context = await browser.newContext(contextOptions);
91
+ const page = await context.newPage();
92
+
93
+ // Navigate
94
+ if (verbose) console.log(chalk.dim(` Navigating to ${url}...`));
95
+
96
+ await page.goto(url, {
97
+ waitUntil: wait,
98
+ timeout,
99
+ });
100
+
101
+ // Additional wait time
102
+ if (waitTime > 0) {
103
+ if (verbose) console.log(chalk.dim(` Waiting ${waitTime}ms...`));
104
+ await page.waitForTimeout(waitTime);
105
+ }
106
+
107
+ // Screenshot options
108
+ const screenshotOptions = {
109
+ path: outputPath,
110
+ type: format,
111
+ fullPage,
112
+ scale,
113
+ omitBackground,
114
+ };
115
+
116
+ // JPEG quality (only for jpeg format)
117
+ if (format === 'jpeg' && quality) {
118
+ screenshotOptions.quality = quality;
119
+ }
120
+
121
+ // Take screenshot
122
+ if (selector) {
123
+ // Element screenshot
124
+ if (verbose) console.log(chalk.dim(` Capturing element: ${selector}`));
125
+ const element = page.locator(selector);
126
+ await element.screenshot(screenshotOptions);
127
+ } else {
128
+ // Page screenshot
129
+ if (verbose) {
130
+ console.log(
131
+ chalk.dim(` Capturing ${fullPage ? 'full page' : 'viewport'}...`),
132
+ );
133
+ }
134
+ await page.screenshot(screenshotOptions);
135
+ }
136
+
137
+ // Get file size
138
+ const stats = await fs.stat(outputPath);
139
+ const fileSize = formatFileSize(stats.size);
140
+
141
+ if (!quiet) {
142
+ console.log(chalk.green(` ✓ Screenshot saved`));
143
+ console.log(chalk.dim(` File: ${outputPath}`));
144
+ console.log(chalk.dim(` Size: ${fileSize}`));
145
+ if (fullPage) {
146
+ const dimensions = await page.evaluate(() => ({
147
+ // eslint-disable-next-line no-undef
148
+ width: document.documentElement.scrollWidth,
149
+ // eslint-disable-next-line no-undef
150
+ height: document.documentElement.scrollHeight,
151
+ }));
152
+ console.log(
153
+ chalk.dim(` Dimensions: ${dimensions.width}x${dimensions.height}px`),
154
+ );
155
+ }
156
+ console.log('');
157
+ }
158
+
159
+ return {
160
+ path: outputPath,
161
+ size: stats.size,
162
+ url,
163
+ };
164
+ } finally {
165
+ await browser.close();
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Format file size for display
171
+ */
172
+ function formatFileSize(bytes) {
173
+ if (bytes < 1024) return `${bytes} B`;
174
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
175
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
176
+ }
177
+
178
+ /**
179
+ * Parse viewport string (e.g., "1920x1080")
180
+ */
181
+ export function parseViewport(viewportStr) {
182
+ if (!viewportStr) return {width: 1920, height: 1080};
183
+ const [width, height] = viewportStr.split('x').map(Number);
184
+ return {width: width || 1920, height: height || 1080};
185
+ }