@opendatalabs/darshana 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/capture.mjs +14 -11
- package/src/config.mjs +10 -4
- package/src/index.mjs +133 -10
package/package.json
CHANGED
package/src/capture.mjs
CHANGED
|
@@ -63,17 +63,20 @@ export async function captureAll(browser, config, urls) {
|
|
|
63
63
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
|
|
64
64
|
await page.addStyleTag({ content: NEXTJS_HIDE_STYLE }).catch(() => {});
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
html.
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
66
|
+
// 'system' = no injection; let the page's own prefers-color-scheme take effect
|
|
67
|
+
if (theme !== 'system') {
|
|
68
|
+
await page.evaluate((t) => {
|
|
69
|
+
const html = document.documentElement;
|
|
70
|
+
html.setAttribute('data-theme', t);
|
|
71
|
+
if (t === 'dark') {
|
|
72
|
+
html.classList.add('dark');
|
|
73
|
+
html.classList.remove('light');
|
|
74
|
+
} else {
|
|
75
|
+
html.classList.add('light');
|
|
76
|
+
html.classList.remove('dark');
|
|
77
|
+
}
|
|
78
|
+
}, theme);
|
|
79
|
+
}
|
|
77
80
|
|
|
78
81
|
if (waitFor) {
|
|
79
82
|
if (waitFor.startsWith('$')) {
|
package/src/config.mjs
CHANGED
|
@@ -19,10 +19,16 @@ export function loadConfig(configPath) {
|
|
|
19
19
|
if (!raw.url) throw new Error('Config missing required field: url');
|
|
20
20
|
if (!raw.start) throw new Error('Config missing required field: start');
|
|
21
21
|
|
|
22
|
+
return buildConfig(raw, configDir);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Build a config object from a plain object (used by both loadConfig and CLI --url mode).
|
|
26
|
+
// configDir is used to resolve relative paths; defaults to cwd when not loading from a file.
|
|
27
|
+
export function buildConfig(raw, configDir = process.cwd()) {
|
|
22
28
|
const config = {
|
|
23
29
|
title: raw.title ?? 'Design Review',
|
|
24
30
|
url: raw.url.replace(/\/$/, ''),
|
|
25
|
-
start: raw.start,
|
|
31
|
+
start: raw.start ?? '/',
|
|
26
32
|
public: raw.public ?? false,
|
|
27
33
|
authStorage: raw.authStorage ?? './auth.json',
|
|
28
34
|
authScript: raw.authScript ?? null,
|
|
@@ -37,7 +43,7 @@ export function loadConfig(configPath) {
|
|
|
37
43
|
},
|
|
38
44
|
|
|
39
45
|
capture: {
|
|
40
|
-
themes: raw.capture?.themes ?? ['
|
|
46
|
+
themes: raw.capture?.themes ?? ['system'],
|
|
41
47
|
viewports: raw.capture?.viewports ?? ['desktop'],
|
|
42
48
|
fullPage: raw.capture?.fullPage ?? true,
|
|
43
49
|
delay: raw.capture?.delay ?? 400,
|
|
@@ -50,11 +56,11 @@ export function loadConfig(configPath) {
|
|
|
50
56
|
},
|
|
51
57
|
|
|
52
58
|
pdf: {
|
|
53
|
-
output: raw.pdf?.output ?? './
|
|
59
|
+
output: raw.pdf?.output ?? './darshana-output/review.pdf',
|
|
54
60
|
pageSize: raw.pdf?.pageSize ?? 'A4',
|
|
55
61
|
},
|
|
56
62
|
|
|
57
|
-
outputs: raw.outputs ?? ['pdf'],
|
|
63
|
+
outputs: raw.outputs ?? ['pdf', 'html'],
|
|
58
64
|
outputDir: raw.outputDir ? path.resolve(configDir, raw.outputDir) : null,
|
|
59
65
|
};
|
|
60
66
|
|
package/src/index.mjs
CHANGED
|
@@ -2,33 +2,156 @@
|
|
|
2
2
|
import fs from 'node:fs';
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { chromium } from 'playwright';
|
|
5
|
-
import { loadConfig } from './config.mjs';
|
|
5
|
+
import { loadConfig, buildConfig } from './config.mjs';
|
|
6
6
|
import { ensureAuth } from './auth.mjs';
|
|
7
7
|
import { crawl } from './crawl.mjs';
|
|
8
8
|
import { captureAll } from './capture.mjs';
|
|
9
9
|
import { assemblePdf } from './pdf.mjs';
|
|
10
10
|
import { assembleHtml } from './html.mjs';
|
|
11
11
|
|
|
12
|
+
const USAGE = `
|
|
13
|
+
Usage:
|
|
14
|
+
darshana --url <url> [options] # zero-config mode
|
|
15
|
+
darshana --config <path> [options] # file-based config
|
|
16
|
+
|
|
17
|
+
Options:
|
|
18
|
+
--url <url> Base URL to crawl (required if no --config)
|
|
19
|
+
--config <path> Path to a JSON config file
|
|
20
|
+
--title <string> Review title (default: hostname)
|
|
21
|
+
--start <path> Starting path (default: /)
|
|
22
|
+
--public Skip auth entirely
|
|
23
|
+
--auth-storage <path> Path to saved Playwright storageState (default: ./auth.json)
|
|
24
|
+
--auth-script <path> Path to a headless auth script
|
|
25
|
+
--themes <list> Comma-separated: dark,light,system (default: system)
|
|
26
|
+
--viewports <list> Comma-separated: desktop,mobile (default: desktop)
|
|
27
|
+
--max-depth <n> BFS depth limit (default: 5)
|
|
28
|
+
--max-pages <n> Page cap (default: 100)
|
|
29
|
+
--delay <ms> Wait after page load before capture (default: 400)
|
|
30
|
+
--outputs <list> Comma-separated: pdf,html,images (default: pdf,html)
|
|
31
|
+
--output-dir <path> Directory for output files (default: ./darshana-output)
|
|
32
|
+
--include <regex> Crawl only paths matching this pattern (repeatable)
|
|
33
|
+
--exclude <regex> Skip paths matching this pattern (repeatable)
|
|
34
|
+
--dry-run Discover URLs without capturing
|
|
35
|
+
--route <path> Capture a single route only
|
|
36
|
+
--auth-only Save auth session and exit
|
|
37
|
+
`.trim();
|
|
38
|
+
|
|
12
39
|
function parseArgs(argv) {
|
|
13
|
-
const args = {
|
|
40
|
+
const args = {
|
|
41
|
+
config: null,
|
|
42
|
+
url: null,
|
|
43
|
+
title: null,
|
|
44
|
+
start: null,
|
|
45
|
+
public: false,
|
|
46
|
+
authStorage: null,
|
|
47
|
+
authScript: null,
|
|
48
|
+
themes: null,
|
|
49
|
+
viewports: null,
|
|
50
|
+
maxDepth: null,
|
|
51
|
+
maxPages: null,
|
|
52
|
+
delay: null,
|
|
53
|
+
outputs: null,
|
|
54
|
+
outputDir: null,
|
|
55
|
+
include: [],
|
|
56
|
+
exclude: [],
|
|
57
|
+
dryRun: false,
|
|
58
|
+
route: null,
|
|
59
|
+
authOnly: false,
|
|
60
|
+
};
|
|
61
|
+
|
|
14
62
|
for (let i = 0; i < argv.length; i++) {
|
|
15
|
-
|
|
16
|
-
if (argv[i
|
|
17
|
-
if (
|
|
18
|
-
if (
|
|
63
|
+
const a = argv[i];
|
|
64
|
+
const next = () => { if (!argv[i + 1]) { console.error(`Missing value for ${a}`); process.exit(1); } return argv[++i]; };
|
|
65
|
+
if (a === '--config') { args.config = next(); continue; }
|
|
66
|
+
if (a === '--url') { args.url = next(); continue; }
|
|
67
|
+
if (a === '--title') { args.title = next(); continue; }
|
|
68
|
+
if (a === '--start') { args.start = next(); continue; }
|
|
69
|
+
if (a === '--public') { args.public = true; continue; }
|
|
70
|
+
if (a === '--auth-storage') { args.authStorage = next(); continue; }
|
|
71
|
+
if (a === '--auth-script') { args.authScript = next(); continue; }
|
|
72
|
+
if (a === '--themes') { args.themes = next().split(',').map(s => s.trim()); continue; }
|
|
73
|
+
if (a === '--viewports') { args.viewports = next().split(',').map(s => s.trim()); continue; }
|
|
74
|
+
if (a === '--max-depth') { args.maxDepth = parseInt(next(), 10); continue; }
|
|
75
|
+
if (a === '--max-pages') { args.maxPages = parseInt(next(), 10); continue; }
|
|
76
|
+
if (a === '--delay') { args.delay = parseInt(next(), 10); continue; }
|
|
77
|
+
if (a === '--outputs') { args.outputs = next().split(',').map(s => s.trim()); continue; }
|
|
78
|
+
if (a === '--output-dir') { args.outputDir = next(); continue; }
|
|
79
|
+
if (a === '--include') { args.include.push(next()); continue; }
|
|
80
|
+
if (a === '--exclude') { args.exclude.push(next()); continue; }
|
|
81
|
+
if (a === '--dry-run') { args.dryRun = true; continue; }
|
|
82
|
+
if (a === '--route') { args.route = next(); continue; }
|
|
83
|
+
if (a === '--auth-only') { args.authOnly = true; continue; }
|
|
84
|
+
if (a === '--help' || a === '-h') { console.log(USAGE); process.exit(0); }
|
|
85
|
+
console.error(`Unknown argument: ${a}\n\n${USAGE}`);
|
|
86
|
+
process.exit(1);
|
|
19
87
|
}
|
|
20
88
|
return args;
|
|
21
89
|
}
|
|
22
90
|
|
|
91
|
+
function configFromArgs(args) {
|
|
92
|
+
const url = args.url.replace(/\/$/, '');
|
|
93
|
+
const hostname = new URL(url).hostname;
|
|
94
|
+
const outputDir = args.outputDir ?? './darshana-output';
|
|
95
|
+
|
|
96
|
+
const raw = {
|
|
97
|
+
title: args.title ?? hostname,
|
|
98
|
+
url,
|
|
99
|
+
start: args.start ?? '/',
|
|
100
|
+
public: args.public,
|
|
101
|
+
authStorage: args.authStorage ?? path.join(outputDir, 'auth.json'),
|
|
102
|
+
authScript: args.authScript ?? null,
|
|
103
|
+
crawl: {
|
|
104
|
+
include: args.include,
|
|
105
|
+
exclude: args.exclude,
|
|
106
|
+
maxDepth: args.maxDepth ?? 5,
|
|
107
|
+
maxPages: args.maxPages ?? 100,
|
|
108
|
+
},
|
|
109
|
+
capture: {
|
|
110
|
+
themes: args.themes ?? ['system'],
|
|
111
|
+
viewports: args.viewports ?? ['desktop'],
|
|
112
|
+
delay: args.delay ?? 400,
|
|
113
|
+
},
|
|
114
|
+
outputs: args.outputs ?? ['pdf', 'html'],
|
|
115
|
+
outputDir,
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
return buildConfig(raw, process.cwd());
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function applyCliOverrides(config, args) {
|
|
122
|
+
if (args.title) config.title = args.title;
|
|
123
|
+
if (args.start) config.start = args.start;
|
|
124
|
+
if (args.public) config.public = true;
|
|
125
|
+
if (args.authStorage) config.authStorage = path.resolve(args.authStorage);
|
|
126
|
+
if (args.authScript) config.authScript = path.resolve(args.authScript);
|
|
127
|
+
if (args.themes) config.capture.themes = args.themes;
|
|
128
|
+
if (args.viewports) config.capture.viewports = args.viewports;
|
|
129
|
+
if (args.maxDepth) config.crawl.maxDepth = args.maxDepth;
|
|
130
|
+
if (args.maxPages) config.crawl.maxPages = args.maxPages;
|
|
131
|
+
if (args.delay) config.capture.delay = args.delay;
|
|
132
|
+
if (args.outputs) config.outputs = args.outputs;
|
|
133
|
+
if (args.outputDir) config.outputDir = path.resolve(args.outputDir);
|
|
134
|
+
if (args.include.length) config.crawl.include = [...config.crawl.include, ...args.include];
|
|
135
|
+
if (args.exclude.length) config.crawl.exclude = [...config.crawl.exclude, ...args.exclude];
|
|
136
|
+
return config;
|
|
137
|
+
}
|
|
138
|
+
|
|
23
139
|
const args = parseArgs(process.argv.slice(2));
|
|
24
140
|
|
|
25
|
-
if (!args.config) {
|
|
26
|
-
console.error('
|
|
141
|
+
if (!args.config && !args.url) {
|
|
142
|
+
console.error('Error: --url or --config is required\n\n' + USAGE);
|
|
27
143
|
process.exit(1);
|
|
28
144
|
}
|
|
29
145
|
|
|
30
146
|
async function main() {
|
|
31
|
-
|
|
147
|
+
let config;
|
|
148
|
+
if (args.config) {
|
|
149
|
+
config = loadConfig(args.config);
|
|
150
|
+
config = applyCliOverrides(config, args);
|
|
151
|
+
} else {
|
|
152
|
+
config = configFromArgs(args);
|
|
153
|
+
}
|
|
154
|
+
|
|
32
155
|
console.log(`[darshana] ${config.title} — ${config.url}`);
|
|
33
156
|
|
|
34
157
|
const storageStatePath = await ensureAuth(config);
|
|
@@ -87,7 +210,7 @@ async function main() {
|
|
|
87
210
|
const outputDir = config.outputDir;
|
|
88
211
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
89
212
|
|
|
90
|
-
const outputs = config.outputs ?? ['pdf'];
|
|
213
|
+
const outputs = config.outputs ?? ['pdf', 'html'];
|
|
91
214
|
|
|
92
215
|
if (outputs.includes('pdf')) {
|
|
93
216
|
await assemblePdf(captures, config, outputDir);
|