site-mirror 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +101 -101
- package/bin/cli.mjs +286 -286
- package/bin/postinstall.mjs +15 -15
- package/lib/mirror.mjs +511 -463
- package/package.json +46 -46
package/bin/cli.mjs
CHANGED
|
@@ -1,286 +1,286 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* site-mirror CLI
|
|
4
|
-
*
|
|
5
|
-
* Usage:
|
|
6
|
-
* site-mirror init Interactive setup - creates site-mirror.config.json
|
|
7
|
-
* site-mirror run [options] Run the mirror (reads config + CLI overrides)
|
|
8
|
-
* site-mirror serve [port] Serve the offline folder locally
|
|
9
|
-
*
|
|
10
|
-
* Options (for run):
|
|
11
|
-
* --start <url> Start URL (required if not in config)
|
|
12
|
-
* --out <dir> Output directory (default: ./offline)
|
|
13
|
-
* --maxPages <n> Max pages to crawl (0 = unlimited)
|
|
14
|
-
* --maxDepth <n> Max link depth (0 = unlimited)
|
|
15
|
-
* --sameOriginOnly Only crawl same-origin pages (default: true)
|
|
16
|
-
* --seedSitemaps Seed URLs from sitemap.xml (default: false)
|
|
17
|
-
* --singlePage Download only this one page + all its assets (no crawling)
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
import fs from 'node:fs/promises';
|
|
21
|
-
import path from 'node:path';
|
|
22
|
-
import readline from 'node:readline';
|
|
23
|
-
import { fileURLToPath } from 'node:url';
|
|
24
|
-
import { execSync, spawn } from 'node:child_process';
|
|
25
|
-
|
|
26
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
27
|
-
const CONFIG_FILE = 'site-mirror.config.json';
|
|
28
|
-
|
|
29
|
-
const defaultConfig = {
|
|
30
|
-
start: '',
|
|
31
|
-
out: './offline',
|
|
32
|
-
maxPages: 0,
|
|
33
|
-
maxDepth: 0,
|
|
34
|
-
sameOriginOnly: true,
|
|
35
|
-
seedSitemaps: false,
|
|
36
|
-
singlePage: false,
|
|
37
|
-
userAgent:
|
|
38
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
async function loadConfig(cwd) {
|
|
42
|
-
const configPath = path.join(cwd, CONFIG_FILE);
|
|
43
|
-
try {
|
|
44
|
-
const raw = await fs.readFile(configPath, 'utf-8');
|
|
45
|
-
return { ...defaultConfig, ...JSON.parse(raw) };
|
|
46
|
-
} catch {
|
|
47
|
-
return { ...defaultConfig };
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function parseCliArgs(argv) {
|
|
52
|
-
const args = {};
|
|
53
|
-
for (let i = 0; i < argv.length; i++) {
|
|
54
|
-
const key = argv[i];
|
|
55
|
-
const next = argv[i + 1];
|
|
56
|
-
|
|
57
|
-
if (key === '--start') {
|
|
58
|
-
args.start = next;
|
|
59
|
-
i++;
|
|
60
|
-
} else if (key === '--out') {
|
|
61
|
-
args.out = next;
|
|
62
|
-
i++;
|
|
63
|
-
} else if (key === '--maxPages') {
|
|
64
|
-
args.maxPages = Number(next);
|
|
65
|
-
i++;
|
|
66
|
-
} else if (key === '--maxDepth') {
|
|
67
|
-
args.maxDepth = Number(next);
|
|
68
|
-
i++;
|
|
69
|
-
} else if (key === '--sameOriginOnly') {
|
|
70
|
-
args.sameOriginOnly = next !== 'false' && next !== '0';
|
|
71
|
-
i++;
|
|
72
|
-
} else if (key === '--seedSitemaps') {
|
|
73
|
-
args.seedSitemaps = next === 'true' || next === '1' || next === 'yes';
|
|
74
|
-
i++;
|
|
75
|
-
} else if (key === '--singlePage') {
|
|
76
|
-
args.singlePage = true;
|
|
77
|
-
} else if (key === '--userAgent') {
|
|
78
|
-
args.userAgent = next;
|
|
79
|
-
i++;
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
return args;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
async function prompt(rl, question, defaultValue) {
|
|
86
|
-
return new Promise((resolve) => {
|
|
87
|
-
const hint = defaultValue !== undefined ? ` (${defaultValue})` : '';
|
|
88
|
-
rl.question(`${question}${hint}: `, (answer) => {
|
|
89
|
-
const trimmed = answer.trim();
|
|
90
|
-
resolve(trimmed === '' && defaultValue !== undefined ? defaultValue : trimmed);
|
|
91
|
-
});
|
|
92
|
-
});
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
async function cmdInit(cwd) {
|
|
96
|
-
const configPath = path.join(cwd, CONFIG_FILE);
|
|
97
|
-
const exists = await fs
|
|
98
|
-
.access(configPath)
|
|
99
|
-
.then(() => true)
|
|
100
|
-
.catch(() => false);
|
|
101
|
-
|
|
102
|
-
if (exists) {
|
|
103
|
-
console.log(`Config already exists: ${configPath}`);
|
|
104
|
-
console.log('Delete it first if you want to reinitialize.');
|
|
105
|
-
return;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
console.log('');
|
|
109
|
-
console.log('Welcome to site-mirror! Let\'s set up your config.');
|
|
110
|
-
console.log('Press Enter to accept the default value shown in parentheses.');
|
|
111
|
-
console.log('');
|
|
112
|
-
|
|
113
|
-
const rl = readline.createInterface({
|
|
114
|
-
input: process.stdin,
|
|
115
|
-
output: process.stdout,
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
try {
|
|
119
|
-
const start = await prompt(rl, 'Website URL to mirror', 'https://example.com/');
|
|
120
|
-
const out = await prompt(rl, 'Output directory', './offline');
|
|
121
|
-
const singlePageStr = await prompt(rl, 'Single page only? (yes/no)', 'no');
|
|
122
|
-
const singlePage = singlePageStr === 'yes' || singlePageStr === 'y' || singlePageStr === 'true';
|
|
123
|
-
|
|
124
|
-
let maxPages = 0;
|
|
125
|
-
let maxDepth = 0;
|
|
126
|
-
let seedSitemaps = false;
|
|
127
|
-
|
|
128
|
-
if (!singlePage) {
|
|
129
|
-
const maxPagesStr = await prompt(rl, 'Max pages to crawl (0 = unlimited)', '200');
|
|
130
|
-
maxPages = parseInt(maxPagesStr, 10) || 0;
|
|
131
|
-
|
|
132
|
-
const maxDepthStr = await prompt(rl, 'Max link depth (0 = unlimited)', '6');
|
|
133
|
-
maxDepth = parseInt(maxDepthStr, 10) || 0;
|
|
134
|
-
|
|
135
|
-
const seedStr = await prompt(rl, 'Seed from sitemap.xml? (yes/no)', 'no');
|
|
136
|
-
seedSitemaps = seedStr === 'yes' || seedStr === 'y' || seedStr === 'true';
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const config = {
|
|
140
|
-
start,
|
|
141
|
-
out,
|
|
142
|
-
singlePage,
|
|
143
|
-
maxPages,
|
|
144
|
-
maxDepth,
|
|
145
|
-
sameOriginOnly: true,
|
|
146
|
-
seedSitemaps,
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
await fs.writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
150
|
-
console.log('');
|
|
151
|
-
console.log(`Created ${CONFIG_FILE}`);
|
|
152
|
-
console.log('');
|
|
153
|
-
console.log('Run "site-mirror run" to start mirroring!');
|
|
154
|
-
} finally {
|
|
155
|
-
rl.close();
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
async function cmdRun(cwd, cliArgs) {
|
|
160
|
-
const config = await loadConfig(cwd);
|
|
161
|
-
const merged = { ...config, ...cliArgs };
|
|
162
|
-
|
|
163
|
-
// Validate required fields
|
|
164
|
-
if (!merged.start) {
|
|
165
|
-
console.error('');
|
|
166
|
-
console.error('Error: No start URL specified.');
|
|
167
|
-
console.error('');
|
|
168
|
-
console.error('You can either:');
|
|
169
|
-
console.error(' 1. Run "site-mirror init" to create a config file');
|
|
170
|
-
console.error(' 2. Pass --start <url> directly, e.g.:');
|
|
171
|
-
console.error(' site-mirror run --start https://example.com/');
|
|
172
|
-
console.error(' site-mirror run --start https://example.com/page --singlePage');
|
|
173
|
-
console.error('');
|
|
174
|
-
process.exit(1);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Validate URL format
|
|
178
|
-
try {
|
|
179
|
-
new URL(merged.start);
|
|
180
|
-
} catch {
|
|
181
|
-
console.error('');
|
|
182
|
-
console.error(`Error: Invalid URL "${merged.start}"`);
|
|
183
|
-
console.error('Please provide a valid URL starting with http:// or https://');
|
|
184
|
-
console.error('');
|
|
185
|
-
process.exit(1);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
console.log('');
|
|
189
|
-
console.log('Configuration:');
|
|
190
|
-
console.log(` start: ${merged.start}`);
|
|
191
|
-
console.log(` out: ${merged.out}`);
|
|
192
|
-
if (merged.singlePage) {
|
|
193
|
-
console.log(` mode: SINGLE PAGE (no crawling)`);
|
|
194
|
-
} else {
|
|
195
|
-
console.log(` maxPages: ${merged.maxPages || 'unlimited'}`);
|
|
196
|
-
console.log(` maxDepth: ${merged.maxDepth || 'unlimited'}`);
|
|
197
|
-
console.log(` seedSitemaps: ${merged.seedSitemaps}`);
|
|
198
|
-
}
|
|
199
|
-
console.log(` sameOriginOnly: ${merged.sameOriginOnly}`);
|
|
200
|
-
console.log('');
|
|
201
|
-
|
|
202
|
-
// Dynamically import the mirror lib
|
|
203
|
-
const { mirrorSite } = await import('../lib/mirror.mjs');
|
|
204
|
-
await mirrorSite({
|
|
205
|
-
start: merged.start,
|
|
206
|
-
out: path.resolve(cwd, merged.out),
|
|
207
|
-
maxPages: merged.singlePage ? 1 : merged.maxPages,
|
|
208
|
-
maxDepth: merged.singlePage ? 0 : merged.maxDepth,
|
|
209
|
-
sameOriginOnly: merged.sameOriginOnly,
|
|
210
|
-
seedSitemaps: merged.singlePage ? false : merged.seedSitemaps,
|
|
211
|
-
singlePage: merged.singlePage,
|
|
212
|
-
userAgent: merged.userAgent,
|
|
213
|
-
});
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
async function cmdServe(cwd, port) {
|
|
217
|
-
const outDir = path.join(cwd, 'offline');
|
|
218
|
-
const exists = await fs
|
|
219
|
-
.access(outDir)
|
|
220
|
-
.then(() => true)
|
|
221
|
-
.catch(() => false);
|
|
222
|
-
|
|
223
|
-
if (!exists) {
|
|
224
|
-
console.error(`No offline folder found at ${outDir}`);
|
|
225
|
-
console.error('Run "site-mirror run" first to download the site.');
|
|
226
|
-
process.exit(1);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
console.log(`Serving ${outDir} on http://localhost:${port}/`);
|
|
230
|
-
try {
|
|
231
|
-
execSync(`npx http-server "${outDir}" -p ${port} -c-1`, { stdio: 'inherit' });
|
|
232
|
-
} catch {
|
|
233
|
-
// User closed with Ctrl+C
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
async function main() {
|
|
238
|
-
const cwd = process.cwd();
|
|
239
|
-
const args = process.argv.slice(2);
|
|
240
|
-
const command = args[0];
|
|
241
|
-
|
|
242
|
-
if (!command || command === 'help' || command === '--help' || command === '-h') {
|
|
243
|
-
console.log(`
|
|
244
|
-
site-mirror - Mirror websites for offline browsing
|
|
245
|
-
|
|
246
|
-
Usage:
|
|
247
|
-
site-mirror init Interactive setup - creates site-mirror.config.json
|
|
248
|
-
site-mirror run [options] Mirror the website (reads config + CLI overrides)
|
|
249
|
-
site-mirror serve [port] Serve the offline folder (default port: 8080)
|
|
250
|
-
|
|
251
|
-
Run options:
|
|
252
|
-
--start <url> Start URL (required if not in config)
|
|
253
|
-
--out <dir> Output directory (default: ./offline)
|
|
254
|
-
--maxPages <n> Max pages (0 = unlimited)
|
|
255
|
-
--maxDepth <n> Max depth (0 = unlimited)
|
|
256
|
-
--seedSitemaps true Seed from sitemap.xml
|
|
257
|
-
--singlePage Download only this page + all its assets (no crawling)
|
|
258
|
-
|
|
259
|
-
Examples:
|
|
260
|
-
site-mirror run --start https://example.com/page --singlePage
|
|
261
|
-
site-mirror init
|
|
262
|
-
site-mirror run
|
|
263
|
-
site-mirror serve 3000
|
|
264
|
-
`);
|
|
265
|
-
return;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (command === 'init') {
|
|
269
|
-
await cmdInit(cwd);
|
|
270
|
-
} else if (command === 'run') {
|
|
271
|
-
const cliArgs = parseCliArgs(args.slice(1));
|
|
272
|
-
await cmdRun(cwd, cliArgs);
|
|
273
|
-
} else if (command === 'serve') {
|
|
274
|
-
const port = args[1] || '8080';
|
|
275
|
-
await cmdServe(cwd, port);
|
|
276
|
-
} else {
|
|
277
|
-
// Assume it's run with options directly (no subcommand)
|
|
278
|
-
const cliArgs = parseCliArgs(args);
|
|
279
|
-
await cmdRun(cwd, cliArgs);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
main().catch((err) => {
|
|
284
|
-
console.error(err);
|
|
285
|
-
process.exit(1);
|
|
286
|
-
});
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* site-mirror CLI
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* site-mirror init Interactive setup - creates site-mirror.config.json
|
|
7
|
+
* site-mirror run [options] Run the mirror (reads config + CLI overrides)
|
|
8
|
+
* site-mirror serve [port] Serve the offline folder locally
|
|
9
|
+
*
|
|
10
|
+
* Options (for run):
|
|
11
|
+
* --start <url> Start URL (required if not in config)
|
|
12
|
+
* --out <dir> Output directory (default: ./offline)
|
|
13
|
+
* --maxPages <n> Max pages to crawl (0 = unlimited)
|
|
14
|
+
* --maxDepth <n> Max link depth (0 = unlimited)
|
|
15
|
+
* --sameOriginOnly Only crawl same-origin pages (default: true)
|
|
16
|
+
* --seedSitemaps Seed URLs from sitemap.xml (default: false)
|
|
17
|
+
* --singlePage Download only this one page + all its assets (no crawling)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import fs from 'node:fs/promises';
|
|
21
|
+
import path from 'node:path';
|
|
22
|
+
import readline from 'node:readline';
|
|
23
|
+
import { fileURLToPath } from 'node:url';
|
|
24
|
+
import { execSync, spawn } from 'node:child_process';
|
|
25
|
+
|
|
26
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const CONFIG_FILE = 'site-mirror.config.json';
|
|
28
|
+
|
|
29
|
+
const defaultConfig = {
|
|
30
|
+
start: '',
|
|
31
|
+
out: './offline',
|
|
32
|
+
maxPages: 0,
|
|
33
|
+
maxDepth: 0,
|
|
34
|
+
sameOriginOnly: true,
|
|
35
|
+
seedSitemaps: false,
|
|
36
|
+
singlePage: false,
|
|
37
|
+
userAgent:
|
|
38
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
async function loadConfig(cwd) {
|
|
42
|
+
const configPath = path.join(cwd, CONFIG_FILE);
|
|
43
|
+
try {
|
|
44
|
+
const raw = await fs.readFile(configPath, 'utf-8');
|
|
45
|
+
return { ...defaultConfig, ...JSON.parse(raw) };
|
|
46
|
+
} catch {
|
|
47
|
+
return { ...defaultConfig };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function parseCliArgs(argv) {
|
|
52
|
+
const args = {};
|
|
53
|
+
for (let i = 0; i < argv.length; i++) {
|
|
54
|
+
const key = argv[i];
|
|
55
|
+
const next = argv[i + 1];
|
|
56
|
+
|
|
57
|
+
if (key === '--start') {
|
|
58
|
+
args.start = next;
|
|
59
|
+
i++;
|
|
60
|
+
} else if (key === '--out') {
|
|
61
|
+
args.out = next;
|
|
62
|
+
i++;
|
|
63
|
+
} else if (key === '--maxPages') {
|
|
64
|
+
args.maxPages = Number(next);
|
|
65
|
+
i++;
|
|
66
|
+
} else if (key === '--maxDepth') {
|
|
67
|
+
args.maxDepth = Number(next);
|
|
68
|
+
i++;
|
|
69
|
+
} else if (key === '--sameOriginOnly') {
|
|
70
|
+
args.sameOriginOnly = next !== 'false' && next !== '0';
|
|
71
|
+
i++;
|
|
72
|
+
} else if (key === '--seedSitemaps') {
|
|
73
|
+
args.seedSitemaps = next === 'true' || next === '1' || next === 'yes';
|
|
74
|
+
i++;
|
|
75
|
+
} else if (key === '--singlePage') {
|
|
76
|
+
args.singlePage = true;
|
|
77
|
+
} else if (key === '--userAgent') {
|
|
78
|
+
args.userAgent = next;
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return args;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async function prompt(rl, question, defaultValue) {
|
|
86
|
+
return new Promise((resolve) => {
|
|
87
|
+
const hint = defaultValue !== undefined ? ` (${defaultValue})` : '';
|
|
88
|
+
rl.question(`${question}${hint}: `, (answer) => {
|
|
89
|
+
const trimmed = answer.trim();
|
|
90
|
+
resolve(trimmed === '' && defaultValue !== undefined ? defaultValue : trimmed);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function cmdInit(cwd) {
|
|
96
|
+
const configPath = path.join(cwd, CONFIG_FILE);
|
|
97
|
+
const exists = await fs
|
|
98
|
+
.access(configPath)
|
|
99
|
+
.then(() => true)
|
|
100
|
+
.catch(() => false);
|
|
101
|
+
|
|
102
|
+
if (exists) {
|
|
103
|
+
console.log(`Config already exists: ${configPath}`);
|
|
104
|
+
console.log('Delete it first if you want to reinitialize.');
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.log('');
|
|
109
|
+
console.log('Welcome to site-mirror! Let\'s set up your config.');
|
|
110
|
+
console.log('Press Enter to accept the default value shown in parentheses.');
|
|
111
|
+
console.log('');
|
|
112
|
+
|
|
113
|
+
const rl = readline.createInterface({
|
|
114
|
+
input: process.stdin,
|
|
115
|
+
output: process.stdout,
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const start = await prompt(rl, 'Website URL to mirror', 'https://example.com/');
|
|
120
|
+
const out = await prompt(rl, 'Output directory', './offline');
|
|
121
|
+
const singlePageStr = await prompt(rl, 'Single page only? (yes/no)', 'no');
|
|
122
|
+
const singlePage = singlePageStr === 'yes' || singlePageStr === 'y' || singlePageStr === 'true';
|
|
123
|
+
|
|
124
|
+
let maxPages = 0;
|
|
125
|
+
let maxDepth = 0;
|
|
126
|
+
let seedSitemaps = false;
|
|
127
|
+
|
|
128
|
+
if (!singlePage) {
|
|
129
|
+
const maxPagesStr = await prompt(rl, 'Max pages to crawl (0 = unlimited)', '200');
|
|
130
|
+
maxPages = parseInt(maxPagesStr, 10) || 0;
|
|
131
|
+
|
|
132
|
+
const maxDepthStr = await prompt(rl, 'Max link depth (0 = unlimited)', '6');
|
|
133
|
+
maxDepth = parseInt(maxDepthStr, 10) || 0;
|
|
134
|
+
|
|
135
|
+
const seedStr = await prompt(rl, 'Seed from sitemap.xml? (yes/no)', 'no');
|
|
136
|
+
seedSitemaps = seedStr === 'yes' || seedStr === 'y' || seedStr === 'true';
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const config = {
|
|
140
|
+
start,
|
|
141
|
+
out,
|
|
142
|
+
singlePage,
|
|
143
|
+
maxPages,
|
|
144
|
+
maxDepth,
|
|
145
|
+
sameOriginOnly: true,
|
|
146
|
+
seedSitemaps,
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
await fs.writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
150
|
+
console.log('');
|
|
151
|
+
console.log(`Created ${CONFIG_FILE}`);
|
|
152
|
+
console.log('');
|
|
153
|
+
console.log('Run "site-mirror run" to start mirroring!');
|
|
154
|
+
} finally {
|
|
155
|
+
rl.close();
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
async function cmdRun(cwd, cliArgs) {
|
|
160
|
+
const config = await loadConfig(cwd);
|
|
161
|
+
const merged = { ...config, ...cliArgs };
|
|
162
|
+
|
|
163
|
+
// Validate required fields
|
|
164
|
+
if (!merged.start) {
|
|
165
|
+
console.error('');
|
|
166
|
+
console.error('Error: No start URL specified.');
|
|
167
|
+
console.error('');
|
|
168
|
+
console.error('You can either:');
|
|
169
|
+
console.error(' 1. Run "site-mirror init" to create a config file');
|
|
170
|
+
console.error(' 2. Pass --start <url> directly, e.g.:');
|
|
171
|
+
console.error(' site-mirror run --start https://example.com/');
|
|
172
|
+
console.error(' site-mirror run --start https://example.com/page --singlePage');
|
|
173
|
+
console.error('');
|
|
174
|
+
process.exit(1);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Validate URL format
|
|
178
|
+
try {
|
|
179
|
+
new URL(merged.start);
|
|
180
|
+
} catch {
|
|
181
|
+
console.error('');
|
|
182
|
+
console.error(`Error: Invalid URL "${merged.start}"`);
|
|
183
|
+
console.error('Please provide a valid URL starting with http:// or https://');
|
|
184
|
+
console.error('');
|
|
185
|
+
process.exit(1);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
console.log('');
|
|
189
|
+
console.log('Configuration:');
|
|
190
|
+
console.log(` start: ${merged.start}`);
|
|
191
|
+
console.log(` out: ${merged.out}`);
|
|
192
|
+
if (merged.singlePage) {
|
|
193
|
+
console.log(` mode: SINGLE PAGE (no crawling)`);
|
|
194
|
+
} else {
|
|
195
|
+
console.log(` maxPages: ${merged.maxPages || 'unlimited'}`);
|
|
196
|
+
console.log(` maxDepth: ${merged.maxDepth || 'unlimited'}`);
|
|
197
|
+
console.log(` seedSitemaps: ${merged.seedSitemaps}`);
|
|
198
|
+
}
|
|
199
|
+
console.log(` sameOriginOnly: ${merged.sameOriginOnly}`);
|
|
200
|
+
console.log('');
|
|
201
|
+
|
|
202
|
+
// Dynamically import the mirror lib
|
|
203
|
+
const { mirrorSite } = await import('../lib/mirror.mjs');
|
|
204
|
+
await mirrorSite({
|
|
205
|
+
start: merged.start,
|
|
206
|
+
out: path.resolve(cwd, merged.out),
|
|
207
|
+
maxPages: merged.singlePage ? 1 : merged.maxPages,
|
|
208
|
+
maxDepth: merged.singlePage ? 0 : merged.maxDepth,
|
|
209
|
+
sameOriginOnly: merged.sameOriginOnly,
|
|
210
|
+
seedSitemaps: merged.singlePage ? false : merged.seedSitemaps,
|
|
211
|
+
singlePage: merged.singlePage,
|
|
212
|
+
userAgent: merged.userAgent,
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async function cmdServe(cwd, port) {
|
|
217
|
+
const outDir = path.join(cwd, 'offline');
|
|
218
|
+
const exists = await fs
|
|
219
|
+
.access(outDir)
|
|
220
|
+
.then(() => true)
|
|
221
|
+
.catch(() => false);
|
|
222
|
+
|
|
223
|
+
if (!exists) {
|
|
224
|
+
console.error(`No offline folder found at ${outDir}`);
|
|
225
|
+
console.error('Run "site-mirror run" first to download the site.');
|
|
226
|
+
process.exit(1);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
console.log(`Serving ${outDir} on http://localhost:${port}/`);
|
|
230
|
+
try {
|
|
231
|
+
execSync(`npx http-server "${outDir}" -p ${port} -c-1`, { stdio: 'inherit' });
|
|
232
|
+
} catch {
|
|
233
|
+
// User closed with Ctrl+C
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async function main() {
|
|
238
|
+
const cwd = process.cwd();
|
|
239
|
+
const args = process.argv.slice(2);
|
|
240
|
+
const command = args[0];
|
|
241
|
+
|
|
242
|
+
if (!command || command === 'help' || command === '--help' || command === '-h') {
|
|
243
|
+
console.log(`
|
|
244
|
+
site-mirror - Mirror websites for offline browsing
|
|
245
|
+
|
|
246
|
+
Usage:
|
|
247
|
+
site-mirror init Interactive setup - creates site-mirror.config.json
|
|
248
|
+
site-mirror run [options] Mirror the website (reads config + CLI overrides)
|
|
249
|
+
site-mirror serve [port] Serve the offline folder (default port: 8080)
|
|
250
|
+
|
|
251
|
+
Run options:
|
|
252
|
+
--start <url> Start URL (required if not in config)
|
|
253
|
+
--out <dir> Output directory (default: ./offline)
|
|
254
|
+
--maxPages <n> Max pages (0 = unlimited)
|
|
255
|
+
--maxDepth <n> Max depth (0 = unlimited)
|
|
256
|
+
--seedSitemaps true Seed from sitemap.xml
|
|
257
|
+
--singlePage Download only this page + all its assets (no crawling)
|
|
258
|
+
|
|
259
|
+
Examples:
|
|
260
|
+
site-mirror run --start https://example.com/page --singlePage
|
|
261
|
+
site-mirror init
|
|
262
|
+
site-mirror run
|
|
263
|
+
site-mirror serve 3000
|
|
264
|
+
`);
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (command === 'init') {
|
|
269
|
+
await cmdInit(cwd);
|
|
270
|
+
} else if (command === 'run') {
|
|
271
|
+
const cliArgs = parseCliArgs(args.slice(1));
|
|
272
|
+
await cmdRun(cwd, cliArgs);
|
|
273
|
+
} else if (command === 'serve') {
|
|
274
|
+
const port = args[1] || '8080';
|
|
275
|
+
await cmdServe(cwd, port);
|
|
276
|
+
} else {
|
|
277
|
+
// Assume it's run with options directly (no subcommand)
|
|
278
|
+
const cliArgs = parseCliArgs(args);
|
|
279
|
+
await cmdRun(cwd, cliArgs);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
main().catch((err) => {
|
|
284
|
+
console.error(err);
|
|
285
|
+
process.exit(1);
|
|
286
|
+
});
|
package/bin/postinstall.mjs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Post-install script to ensure Playwright browsers are available.
|
|
4
|
-
* Runs automatically after `npm install`.
|
|
5
|
-
*/
|
|
6
|
-
import { execSync } from 'node:child_process';
|
|
7
|
-
|
|
8
|
-
console.log('[site-mirror] Checking Playwright browsers...');
|
|
9
|
-
|
|
10
|
-
try {
|
|
11
|
-
execSync('npx playwright install chromium', { stdio: 'inherit' });
|
|
12
|
-
console.log('[site-mirror] Chromium browser ready.');
|
|
13
|
-
} catch (err) {
|
|
14
|
-
console.warn('[site-mirror] Could not auto-install Chromium. Run manually: npx playwright install chromium');
|
|
15
|
-
}
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Post-install script to ensure Playwright browsers are available.
|
|
4
|
+
* Runs automatically after `npm install`.
|
|
5
|
+
*/
|
|
6
|
+
import { execSync } from 'node:child_process';
|
|
7
|
+
|
|
8
|
+
console.log('[site-mirror] Checking Playwright browsers...');
|
|
9
|
+
|
|
10
|
+
try {
|
|
11
|
+
execSync('npx playwright install chromium', { stdio: 'inherit' });
|
|
12
|
+
console.log('[site-mirror] Chromium browser ready.');
|
|
13
|
+
} catch (err) {
|
|
14
|
+
console.warn('[site-mirror] Could not auto-install Chromium. Run manually: npx playwright install chromium');
|
|
15
|
+
}
|