@opendatalabs/darshana 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -85
- package/package.json +4 -1
- package/src/capture.mjs +14 -11
- package/src/config.mjs +10 -4
- package/src/index.mjs +133 -10
package/README.md
CHANGED
|
@@ -1,26 +1,62 @@
|
|
|
1
1
|
# darshana
|
|
2
2
|
|
|
3
|
-
Crawl any web app and generate a labeled PDF, HTML viewer, or image set for
|
|
3
|
+
Crawl any web app and generate a labeled PDF, HTML viewer, or image set for design review.
|
|
4
4
|
|
|
5
5
|
*Darśana* — Sanskrit for "the act of seeing clearly."
|
|
6
6
|
|
|
7
|
+
## Try it now
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx @opendatalabs/darshana --url https://vana.org --public
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Output lands in `./darshana-output/` — a PDF and a self-contained HTML viewer with sidebar nav, filters, and keyboard navigation.
|
|
14
|
+
|
|
15
|
+
For a private app, darshana opens a browser so you can log in, then saves the session:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npx @opendatalabs/darshana --url https://app.vana.org
|
|
19
|
+
# A browser opens → log in → press Enter → capture begins
|
|
20
|
+
```
|
|
21
|
+
|
|
7
22
|
## Install
|
|
8
23
|
|
|
9
24
|
```bash
|
|
10
25
|
npm install -g @opendatalabs/darshana
|
|
11
|
-
# or use directly:
|
|
12
|
-
npx @opendatalabs/darshana --config review.config.json
|
|
13
26
|
```
|
|
14
27
|
|
|
15
|
-
|
|
28
|
+
Chromium is installed automatically. Or skip the install entirely and use `npx @opendatalabs/darshana`.
|
|
16
29
|
|
|
17
|
-
|
|
18
|
-
|
|
30
|
+
## CLI reference
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
darshana --url <url> [options] # zero-config
|
|
34
|
+
darshana --config <path> [options] # file-based (CLI args override config)
|
|
35
|
+
|
|
36
|
+
--url <url> Base URL to crawl
|
|
37
|
+
--config <path> Path to a JSON config file
|
|
38
|
+
--title <string> Review title (default: hostname)
|
|
39
|
+
--start <path> Starting path (default: /)
|
|
40
|
+
--public Skip auth — use for public sites
|
|
41
|
+
--auth-storage <path> Where to save/load the session (default: ./darshana-output/auth.json)
|
|
42
|
+
--auth-script <path> Headless login script (see Auth below)
|
|
43
|
+
--themes <list> Comma-separated: system,dark,light (default: system)
|
|
44
|
+
--viewports <list> Comma-separated: desktop,mobile (default: desktop)
|
|
45
|
+
--max-depth <n> BFS depth limit (default: 5)
|
|
46
|
+
--max-pages <n> Page cap (default: 100)
|
|
47
|
+
--delay <ms> Wait after page load before capture (default: 400)
|
|
48
|
+
--outputs <list> Comma-separated: pdf,html,images (default: pdf,html)
|
|
49
|
+
--output-dir <path> Output directory (default: ./darshana-output)
|
|
50
|
+
--include <regex> Crawl only paths matching this pattern (repeatable)
|
|
51
|
+
--exclude <regex> Skip paths matching this pattern (repeatable)
|
|
52
|
+
--dry-run Discover URLs without capturing
|
|
53
|
+
--route <path> Capture a single route only
|
|
54
|
+
--auth-only Save auth session and exit
|
|
19
55
|
```
|
|
20
56
|
|
|
21
|
-
##
|
|
57
|
+
## Config file
|
|
22
58
|
|
|
23
|
-
|
|
59
|
+
For complex projects, a JSON config gives you per-route sampling rules and capture overrides. CLI args always override config file values.
|
|
24
60
|
|
|
25
61
|
```json
|
|
26
62
|
{
|
|
@@ -29,103 +65,92 @@ npx playwright install chromium
|
|
|
29
65
|
"start": "/dashboard",
|
|
30
66
|
"public": false,
|
|
31
67
|
"authStorage": "./auth.json",
|
|
68
|
+
"authScript": "./auth.mjs",
|
|
32
69
|
"crawl": {
|
|
33
70
|
"include": ["^/dashboard"],
|
|
34
71
|
"exclude": ["logout", "delete"],
|
|
35
72
|
"maxDepth": 3,
|
|
36
73
|
"maxPages": 50,
|
|
37
|
-
"
|
|
74
|
+
"routes": [
|
|
75
|
+
{ "pattern": "/dashboard/records/:id", "sample": 1, "follow": false },
|
|
76
|
+
{ "pattern": "/dashboard/runs/:id", "sample": 2, "follow": false },
|
|
77
|
+
{ "pattern": "/dashboard/**", "follow": true }
|
|
78
|
+
]
|
|
38
79
|
},
|
|
39
80
|
"capture": {
|
|
40
|
-
"themes": ["dark"],
|
|
81
|
+
"themes": ["dark", "light"],
|
|
41
82
|
"viewports": ["desktop", "mobile"],
|
|
42
|
-
"delay": 400
|
|
83
|
+
"delay": 400,
|
|
84
|
+
"overrides": [
|
|
85
|
+
{ "route": "/dashboard/records/", "delay": 1000 }
|
|
86
|
+
]
|
|
43
87
|
},
|
|
44
88
|
"outputs": ["pdf", "html"],
|
|
45
89
|
"outputDir": "./output"
|
|
46
90
|
}
|
|
47
91
|
```
|
|
48
92
|
|
|
49
|
-
|
|
93
|
+
### Routes DSL
|
|
50
94
|
|
|
51
|
-
|
|
52
|
-
npx darshana --config review.config.json --auth-only
|
|
53
|
-
```
|
|
95
|
+
Without routes, darshana visits every discovered URL. For apps with millions of records or runs, use routes to sample:
|
|
54
96
|
|
|
55
|
-
|
|
97
|
+
| Field | Type | Default | Description |
|
|
98
|
+
|---|---|---|---|
|
|
99
|
+
| `pattern` | string | required | Express-style path using `:param` and `/**` |
|
|
100
|
+
| `sample` | number | unlimited | Max pages to capture matching this pattern |
|
|
101
|
+
| `follow` | boolean | `true` | Whether to BFS-follow links on matching pages |
|
|
56
102
|
|
|
57
|
-
|
|
103
|
+
First match wins.
|
|
58
104
|
|
|
59
|
-
|
|
60
|
-
npx darshana --config review.config.json
|
|
61
|
-
```
|
|
105
|
+
### Config reference
|
|
62
106
|
|
|
63
|
-
|
|
107
|
+
**Top-level**
|
|
64
108
|
|
|
65
109
|
| Field | Type | Default | Description |
|
|
66
110
|
|---|---|---|---|
|
|
67
|
-
| `title` | string |
|
|
68
|
-
| `url` | string | required | Base URL
|
|
69
|
-
| `start` | string |
|
|
70
|
-
| `public` | boolean | `false` | Skip auth
|
|
71
|
-
| `authStorage` | string |
|
|
72
|
-
| `authScript` | string | — |
|
|
73
|
-
| `outputs` | string[] | `["pdf"]` | Any of `"pdf"`, `"html"`, `"images"` |
|
|
74
|
-
| `outputDir` | string |
|
|
111
|
+
| `title` | string | hostname | Cover page and HTML header title |
|
|
112
|
+
| `url` | string | required | Base URL |
|
|
113
|
+
| `start` | string | `/` | Path to start crawling from |
|
|
114
|
+
| `public` | boolean | `false` | Skip auth |
|
|
115
|
+
| `authStorage` | string | `./auth.json` | Saved session path |
|
|
116
|
+
| `authScript` | string | — | Headless login script |
|
|
117
|
+
| `outputs` | string[] | `["pdf","html"]` | Any of `"pdf"`, `"html"`, `"images"` |
|
|
118
|
+
| `outputDir` | string | `./darshana-output` | Output directory |
|
|
75
119
|
|
|
76
|
-
|
|
120
|
+
**`crawl`**
|
|
77
121
|
|
|
78
122
|
| Field | Type | Default | Description |
|
|
79
123
|
|---|---|---|---|
|
|
80
|
-
| `include` | string[] | `[]` | Regex patterns —
|
|
81
|
-
| `exclude` | string[] | `[]` | Regex patterns —
|
|
82
|
-
| `maxDepth` | number | `5` | Max BFS depth
|
|
83
|
-
| `maxPages` | number | `100` | Hard cap
|
|
84
|
-
| `extraRoutes` | string[] | `[]` |
|
|
85
|
-
| `routes` | Route[] | `[]` | Per-pattern sampling rules
|
|
124
|
+
| `include` | string[] | `[]` | Regex patterns — pathname must match all |
|
|
125
|
+
| `exclude` | string[] | `[]` | Regex patterns — pathname must not match any |
|
|
126
|
+
| `maxDepth` | number | `5` | Max BFS depth |
|
|
127
|
+
| `maxPages` | number | `100` | Hard page cap |
|
|
128
|
+
| `extraRoutes` | string[] | `[]` | Extra paths to capture (not crawled for links) |
|
|
129
|
+
| `routes` | Route[] | `[]` | Per-pattern sampling rules |
|
|
86
130
|
|
|
87
|
-
|
|
131
|
+
**`capture`**
|
|
88
132
|
|
|
89
133
|
| Field | Type | Default | Description |
|
|
90
134
|
|---|---|---|---|
|
|
91
|
-
| `themes` | string[] | `["
|
|
135
|
+
| `themes` | string[] | `["system"]` | `"system"` (no injection), `"dark"`, `"light"` |
|
|
92
136
|
| `viewports` | string[] | `["desktop"]` | `"desktop"` (1440×900) or `"mobile"` (390×844) |
|
|
93
|
-
| `fullPage` | boolean | `true` | Capture full scrollable
|
|
94
|
-
| `delay` | number | `400` | ms to wait
|
|
95
|
-
| `waitFor` | string | — | CSS selector (prefix `$`) or JS expression to
|
|
96
|
-
| `overrides` | Override[] | `[]` | Per-route capture
|
|
97
|
-
| `contextOptions` | object | `{}` | Passed
|
|
98
|
-
| `launchOptions` | object | `{}` | Passed
|
|
99
|
-
| `playwrightOptions` | object | `{}` | Passed
|
|
100
|
-
| `routeOptions` | object | — | `{ blockPatterns: string[] }` — abort matching
|
|
137
|
+
| `fullPage` | boolean | `true` | Capture full scrollable height |
|
|
138
|
+
| `delay` | number | `400` | ms to wait before capture |
|
|
139
|
+
| `waitFor` | string | — | CSS selector (prefix `$`) or JS expression to await |
|
|
140
|
+
| `overrides` | Override[] | `[]` | Per-route overrides for any capture field |
|
|
141
|
+
| `contextOptions` | object | `{}` | Passed to `browser.newContext()` |
|
|
142
|
+
| `launchOptions` | object | `{}` | Passed to `chromium.launch()` |
|
|
143
|
+
| `playwrightOptions` | object | `{}` | Passed to `page.screenshot()` |
|
|
144
|
+
| `routeOptions` | object | — | `{ blockPatterns: string[] }` — abort matching requests |
|
|
101
145
|
|
|
102
|
-
|
|
146
|
+
## Auth
|
|
103
147
|
|
|
104
|
-
|
|
148
|
+
**Headed handover** (default): darshana opens a Chromium window, you log in, press Enter. The session is saved to `authStorage` and reused for 12 hours.
|
|
105
149
|
|
|
106
|
-
|
|
107
|
-
"routes": [
|
|
108
|
-
{ "pattern": "/dashboard/records/:id", "sample": 1, "follow": false },
|
|
109
|
-
{ "pattern": "/dashboard/runs/:id", "sample": 2, "follow": false },
|
|
110
|
-
{ "pattern": "/dashboard/**", "follow": true }
|
|
111
|
-
]
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
| Field | Type | Default | Description |
|
|
115
|
-
|---|---|---|---|
|
|
116
|
-
| `pattern` | string | required | Path pattern using `:param` and `/**` |
|
|
117
|
-
| `sample` | number | unlimited | Max pages to visit matching this pattern |
|
|
118
|
-
| `follow` | boolean | `true` | Whether to BFS-follow links on matching pages |
|
|
119
|
-
|
|
120
|
-
Patterns are matched in order — first match wins.
|
|
121
|
-
|
|
122
|
-
## Auth options
|
|
123
|
-
|
|
124
|
-
**Headed handover** (default when no `authScript`): darshana opens a browser, you log in manually, press Enter — session is saved to `authStorage`. Sessions are reused for 12 hours.
|
|
125
|
-
|
|
126
|
-
**Headless auth script**: Create a JS file that exports a default function:
|
|
150
|
+
**Headless auth script**: export a default function that receives a `Browser` and returns the path to a saved `storageState`:
|
|
127
151
|
|
|
128
152
|
```javascript
|
|
153
|
+
// auth.mjs
|
|
129
154
|
export default async function login(browser) {
|
|
130
155
|
const context = await browser.newContext();
|
|
131
156
|
const page = await context.newPage();
|
|
@@ -133,29 +158,19 @@ export default async function login(browser) {
|
|
|
133
158
|
await page.fill('#password', process.env.APP_PASSWORD);
|
|
134
159
|
await page.click('button[type="submit"]');
|
|
135
160
|
await page.waitForURL(/\/dashboard/);
|
|
136
|
-
|
|
137
|
-
await context.storageState({ path: storagePath });
|
|
161
|
+
await context.storageState({ path: './auth.json' });
|
|
138
162
|
await context.close();
|
|
139
|
-
return
|
|
163
|
+
return './auth.json';
|
|
140
164
|
}
|
|
141
165
|
```
|
|
142
166
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
## CLI
|
|
146
|
-
|
|
147
|
-
```bash
|
|
148
|
-
darshana --config <path> # run full pipeline
|
|
149
|
-
darshana --config <path> --dry-run # discover URLs without capturing
|
|
150
|
-
darshana --config <path> --route /dashboard # capture one route only
|
|
151
|
-
darshana --config <path> --auth-only # save auth session and exit
|
|
152
|
-
```
|
|
167
|
+
See [examples/auth-example.mjs](examples/auth-example.mjs) for a full example.
|
|
153
168
|
|
|
154
169
|
## Outputs
|
|
155
170
|
|
|
156
|
-
- **`pdf`** —
|
|
157
|
-
- **`html`** —
|
|
158
|
-
- **`images`** — `<outputDir>/images/<viewport>/NNN-slug-theme.png`
|
|
171
|
+
- **`pdf`** — one page per capture, labeled header, cover page
|
|
172
|
+
- **`html`** — self-contained file with sidebar nav, theme/viewport filters, keyboard navigation (↑↓), viewport-correct image sizing
|
|
173
|
+
- **`images`** — `<outputDir>/images/<viewport>/NNN-slug-theme.png`
|
|
159
174
|
|
|
160
175
|
## License
|
|
161
176
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opendatalabs/darshana",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Crawl any web app and generate a labeled PDF, HTML viewer, or image set for design review.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
"pdf-lib": "^1.17.1",
|
|
17
17
|
"path-to-regexp": "^8.0.0"
|
|
18
18
|
},
|
|
19
|
+
"scripts": {
|
|
20
|
+
"postinstall": "playwright install chromium --with-deps 2>/dev/null || playwright install chromium"
|
|
21
|
+
},
|
|
19
22
|
"devDependencies": {
|
|
20
23
|
"semantic-release": "^25.0.0",
|
|
21
24
|
"@semantic-release/commit-analyzer": "^13.0.0",
|
package/src/capture.mjs
CHANGED
|
@@ -63,17 +63,20 @@ export async function captureAll(browser, config, urls) {
|
|
|
63
63
|
await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
|
|
64
64
|
await page.addStyleTag({ content: NEXTJS_HIDE_STYLE }).catch(() => {});
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
html.
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
66
|
+
// 'system' = no injection; let the page's own prefers-color-scheme take effect
|
|
67
|
+
if (theme !== 'system') {
|
|
68
|
+
await page.evaluate((t) => {
|
|
69
|
+
const html = document.documentElement;
|
|
70
|
+
html.setAttribute('data-theme', t);
|
|
71
|
+
if (t === 'dark') {
|
|
72
|
+
html.classList.add('dark');
|
|
73
|
+
html.classList.remove('light');
|
|
74
|
+
} else {
|
|
75
|
+
html.classList.add('light');
|
|
76
|
+
html.classList.remove('dark');
|
|
77
|
+
}
|
|
78
|
+
}, theme);
|
|
79
|
+
}
|
|
77
80
|
|
|
78
81
|
if (waitFor) {
|
|
79
82
|
if (waitFor.startsWith('$')) {
|
package/src/config.mjs
CHANGED
|
@@ -19,10 +19,16 @@ export function loadConfig(configPath) {
|
|
|
19
19
|
if (!raw.url) throw new Error('Config missing required field: url');
|
|
20
20
|
if (!raw.start) throw new Error('Config missing required field: start');
|
|
21
21
|
|
|
22
|
+
return buildConfig(raw, configDir);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Build a config object from a plain object (used by both loadConfig and CLI --url mode).
|
|
26
|
+
// configDir is used to resolve relative paths; defaults to cwd when not loading from a file.
|
|
27
|
+
export function buildConfig(raw, configDir = process.cwd()) {
|
|
22
28
|
const config = {
|
|
23
29
|
title: raw.title ?? 'Design Review',
|
|
24
30
|
url: raw.url.replace(/\/$/, ''),
|
|
25
|
-
start: raw.start,
|
|
31
|
+
start: raw.start ?? '/',
|
|
26
32
|
public: raw.public ?? false,
|
|
27
33
|
authStorage: raw.authStorage ?? './auth.json',
|
|
28
34
|
authScript: raw.authScript ?? null,
|
|
@@ -37,7 +43,7 @@ export function loadConfig(configPath) {
|
|
|
37
43
|
},
|
|
38
44
|
|
|
39
45
|
capture: {
|
|
40
|
-
themes: raw.capture?.themes ?? ['
|
|
46
|
+
themes: raw.capture?.themes ?? ['system'],
|
|
41
47
|
viewports: raw.capture?.viewports ?? ['desktop'],
|
|
42
48
|
fullPage: raw.capture?.fullPage ?? true,
|
|
43
49
|
delay: raw.capture?.delay ?? 400,
|
|
@@ -50,11 +56,11 @@ export function loadConfig(configPath) {
|
|
|
50
56
|
},
|
|
51
57
|
|
|
52
58
|
pdf: {
|
|
53
|
-
output: raw.pdf?.output ?? './
|
|
59
|
+
output: raw.pdf?.output ?? './darshana-output/review.pdf',
|
|
54
60
|
pageSize: raw.pdf?.pageSize ?? 'A4',
|
|
55
61
|
},
|
|
56
62
|
|
|
57
|
-
outputs: raw.outputs ?? ['pdf'],
|
|
63
|
+
outputs: raw.outputs ?? ['pdf', 'html'],
|
|
58
64
|
outputDir: raw.outputDir ? path.resolve(configDir, raw.outputDir) : null,
|
|
59
65
|
};
|
|
60
66
|
|
package/src/index.mjs
CHANGED
|
@@ -2,33 +2,156 @@
|
|
|
2
2
|
import fs from 'node:fs';
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { chromium } from 'playwright';
|
|
5
|
-
import { loadConfig } from './config.mjs';
|
|
5
|
+
import { loadConfig, buildConfig } from './config.mjs';
|
|
6
6
|
import { ensureAuth } from './auth.mjs';
|
|
7
7
|
import { crawl } from './crawl.mjs';
|
|
8
8
|
import { captureAll } from './capture.mjs';
|
|
9
9
|
import { assemblePdf } from './pdf.mjs';
|
|
10
10
|
import { assembleHtml } from './html.mjs';
|
|
11
11
|
|
|
12
|
+
const USAGE = `
|
|
13
|
+
Usage:
|
|
14
|
+
darshana --url <url> [options] # zero-config mode
|
|
15
|
+
darshana --config <path> [options] # file-based config
|
|
16
|
+
|
|
17
|
+
Options:
|
|
18
|
+
--url <url> Base URL to crawl (required if no --config)
|
|
19
|
+
--config <path> Path to a JSON config file
|
|
20
|
+
--title <string> Review title (default: hostname)
|
|
21
|
+
--start <path> Starting path (default: /)
|
|
22
|
+
--public Skip auth entirely
|
|
23
|
+
--auth-storage <path> Path to saved Playwright storageState (default: ./auth.json)
|
|
24
|
+
--auth-script <path> Path to a headless auth script
|
|
25
|
+
--themes <list> Comma-separated: dark,light,system (default: system)
|
|
26
|
+
--viewports <list> Comma-separated: desktop,mobile (default: desktop)
|
|
27
|
+
--max-depth <n> BFS depth limit (default: 5)
|
|
28
|
+
--max-pages <n> Page cap (default: 100)
|
|
29
|
+
--delay <ms> Wait after page load before capture (default: 400)
|
|
30
|
+
--outputs <list> Comma-separated: pdf,html,images (default: pdf,html)
|
|
31
|
+
--output-dir <path> Directory for output files (default: ./darshana-output)
|
|
32
|
+
--include <regex> Crawl only paths matching this pattern (repeatable)
|
|
33
|
+
--exclude <regex> Skip paths matching this pattern (repeatable)
|
|
34
|
+
--dry-run Discover URLs without capturing
|
|
35
|
+
--route <path> Capture a single route only
|
|
36
|
+
--auth-only Save auth session and exit
|
|
37
|
+
`.trim();
|
|
38
|
+
|
|
12
39
|
function parseArgs(argv) {
|
|
13
|
-
const args = {
|
|
40
|
+
const args = {
|
|
41
|
+
config: null,
|
|
42
|
+
url: null,
|
|
43
|
+
title: null,
|
|
44
|
+
start: null,
|
|
45
|
+
public: false,
|
|
46
|
+
authStorage: null,
|
|
47
|
+
authScript: null,
|
|
48
|
+
themes: null,
|
|
49
|
+
viewports: null,
|
|
50
|
+
maxDepth: null,
|
|
51
|
+
maxPages: null,
|
|
52
|
+
delay: null,
|
|
53
|
+
outputs: null,
|
|
54
|
+
outputDir: null,
|
|
55
|
+
include: [],
|
|
56
|
+
exclude: [],
|
|
57
|
+
dryRun: false,
|
|
58
|
+
route: null,
|
|
59
|
+
authOnly: false,
|
|
60
|
+
};
|
|
61
|
+
|
|
14
62
|
for (let i = 0; i < argv.length; i++) {
|
|
15
|
-
|
|
16
|
-
if (argv[i
|
|
17
|
-
if (
|
|
18
|
-
if (
|
|
63
|
+
const a = argv[i];
|
|
64
|
+
const next = () => { if (!argv[i + 1]) { console.error(`Missing value for ${a}`); process.exit(1); } return argv[++i]; };
|
|
65
|
+
if (a === '--config') { args.config = next(); continue; }
|
|
66
|
+
if (a === '--url') { args.url = next(); continue; }
|
|
67
|
+
if (a === '--title') { args.title = next(); continue; }
|
|
68
|
+
if (a === '--start') { args.start = next(); continue; }
|
|
69
|
+
if (a === '--public') { args.public = true; continue; }
|
|
70
|
+
if (a === '--auth-storage') { args.authStorage = next(); continue; }
|
|
71
|
+
if (a === '--auth-script') { args.authScript = next(); continue; }
|
|
72
|
+
if (a === '--themes') { args.themes = next().split(',').map(s => s.trim()); continue; }
|
|
73
|
+
if (a === '--viewports') { args.viewports = next().split(',').map(s => s.trim()); continue; }
|
|
74
|
+
if (a === '--max-depth') { args.maxDepth = parseInt(next(), 10); continue; }
|
|
75
|
+
if (a === '--max-pages') { args.maxPages = parseInt(next(), 10); continue; }
|
|
76
|
+
if (a === '--delay') { args.delay = parseInt(next(), 10); continue; }
|
|
77
|
+
if (a === '--outputs') { args.outputs = next().split(',').map(s => s.trim()); continue; }
|
|
78
|
+
if (a === '--output-dir') { args.outputDir = next(); continue; }
|
|
79
|
+
if (a === '--include') { args.include.push(next()); continue; }
|
|
80
|
+
if (a === '--exclude') { args.exclude.push(next()); continue; }
|
|
81
|
+
if (a === '--dry-run') { args.dryRun = true; continue; }
|
|
82
|
+
if (a === '--route') { args.route = next(); continue; }
|
|
83
|
+
if (a === '--auth-only') { args.authOnly = true; continue; }
|
|
84
|
+
if (a === '--help' || a === '-h') { console.log(USAGE); process.exit(0); }
|
|
85
|
+
console.error(`Unknown argument: ${a}\n\n${USAGE}`);
|
|
86
|
+
process.exit(1);
|
|
19
87
|
}
|
|
20
88
|
return args;
|
|
21
89
|
}
|
|
22
90
|
|
|
91
|
+
function configFromArgs(args) {
|
|
92
|
+
const url = args.url.replace(/\/$/, '');
|
|
93
|
+
const hostname = new URL(url).hostname;
|
|
94
|
+
const outputDir = args.outputDir ?? './darshana-output';
|
|
95
|
+
|
|
96
|
+
const raw = {
|
|
97
|
+
title: args.title ?? hostname,
|
|
98
|
+
url,
|
|
99
|
+
start: args.start ?? '/',
|
|
100
|
+
public: args.public,
|
|
101
|
+
authStorage: args.authStorage ?? path.join(outputDir, 'auth.json'),
|
|
102
|
+
authScript: args.authScript ?? null,
|
|
103
|
+
crawl: {
|
|
104
|
+
include: args.include,
|
|
105
|
+
exclude: args.exclude,
|
|
106
|
+
maxDepth: args.maxDepth ?? 5,
|
|
107
|
+
maxPages: args.maxPages ?? 100,
|
|
108
|
+
},
|
|
109
|
+
capture: {
|
|
110
|
+
themes: args.themes ?? ['system'],
|
|
111
|
+
viewports: args.viewports ?? ['desktop'],
|
|
112
|
+
delay: args.delay ?? 400,
|
|
113
|
+
},
|
|
114
|
+
outputs: args.outputs ?? ['pdf', 'html'],
|
|
115
|
+
outputDir,
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
return buildConfig(raw, process.cwd());
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function applyCliOverrides(config, args) {
|
|
122
|
+
if (args.title) config.title = args.title;
|
|
123
|
+
if (args.start) config.start = args.start;
|
|
124
|
+
if (args.public) config.public = true;
|
|
125
|
+
if (args.authStorage) config.authStorage = path.resolve(args.authStorage);
|
|
126
|
+
if (args.authScript) config.authScript = path.resolve(args.authScript);
|
|
127
|
+
if (args.themes) config.capture.themes = args.themes;
|
|
128
|
+
if (args.viewports) config.capture.viewports = args.viewports;
|
|
129
|
+
if (args.maxDepth) config.crawl.maxDepth = args.maxDepth;
|
|
130
|
+
if (args.maxPages) config.crawl.maxPages = args.maxPages;
|
|
131
|
+
if (args.delay) config.capture.delay = args.delay;
|
|
132
|
+
if (args.outputs) config.outputs = args.outputs;
|
|
133
|
+
if (args.outputDir) config.outputDir = path.resolve(args.outputDir);
|
|
134
|
+
if (args.include.length) config.crawl.include = [...config.crawl.include, ...args.include];
|
|
135
|
+
if (args.exclude.length) config.crawl.exclude = [...config.crawl.exclude, ...args.exclude];
|
|
136
|
+
return config;
|
|
137
|
+
}
|
|
138
|
+
|
|
23
139
|
const args = parseArgs(process.argv.slice(2));
|
|
24
140
|
|
|
25
|
-
if (!args.config) {
|
|
26
|
-
console.error('
|
|
141
|
+
if (!args.config && !args.url) {
|
|
142
|
+
console.error('Error: --url or --config is required\n\n' + USAGE);
|
|
27
143
|
process.exit(1);
|
|
28
144
|
}
|
|
29
145
|
|
|
30
146
|
async function main() {
|
|
31
|
-
|
|
147
|
+
let config;
|
|
148
|
+
if (args.config) {
|
|
149
|
+
config = loadConfig(args.config);
|
|
150
|
+
config = applyCliOverrides(config, args);
|
|
151
|
+
} else {
|
|
152
|
+
config = configFromArgs(args);
|
|
153
|
+
}
|
|
154
|
+
|
|
32
155
|
console.log(`[darshana] ${config.title} — ${config.url}`);
|
|
33
156
|
|
|
34
157
|
const storageStatePath = await ensureAuth(config);
|
|
@@ -87,7 +210,7 @@ async function main() {
|
|
|
87
210
|
const outputDir = config.outputDir;
|
|
88
211
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
89
212
|
|
|
90
|
-
const outputs = config.outputs ?? ['pdf'];
|
|
213
|
+
const outputs = config.outputs ?? ['pdf', 'html'];
|
|
91
214
|
|
|
92
215
|
if (outputs.includes('pdf')) {
|
|
93
216
|
await assemblePdf(captures, config, outputDir);
|