vault-fetch 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +9 -4
- package/README.md +9 -4
- package/dist/cli.js +163 -24
- package/dist/cli.js.map +1 -1
- package/package.json +4 -3
package/README.ja.md
CHANGED
|
@@ -4,11 +4,12 @@ Obsidian Clipper では取得できない、JavaScript レンダリングや認
|
|
|
4
4
|
|
|
5
5
|
## 特徴
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- [CloakBrowser](https://github.com/CloakHQ/CloakBrowser)(ソースレベルのボット対策パッチ適用済みステルス Chromium)による JS レンダリング後のページ取得
|
|
8
8
|
- **PDF から Markdown への変換**(Content-Type で自動判定)
|
|
9
9
|
- Readability.js による記事本文の抽出(広告・ナビゲーション除去)、`--raw` モードでフルページ変換も可能
|
|
10
10
|
- リソースブロッキング(画像・フォント・メディア)による高速フェッチ
|
|
11
|
-
-
|
|
11
|
+
- ボット検出回避(CDP リークパッチ、フィンガープリントランダム化、自動化シグナル除去)
|
|
12
|
+
- `--proxy` オプションまたは `VAULT_FETCH_PROXY` 環境変数による HTTP/HTTPS プロキシ対応
|
|
12
13
|
- Obsidian Clipper 互換のフロントマター(title, source, author, published, created, description, tags)
|
|
13
14
|
- セッション管理(`storageState`)によるログイン済みページの取得
|
|
14
15
|
- 設定の 3 層解決(CLI オプション > 環境変数 > 設定ファイル)
|
|
@@ -19,8 +20,7 @@ Obsidian Clipper では取得できない、JavaScript レンダリングや認
|
|
|
19
20
|
# グローバルインストール
|
|
20
21
|
npm install -g vault-fetch
|
|
21
22
|
|
|
22
|
-
#
|
|
23
|
-
npx playwright install chromium
|
|
23
|
+
# CloakBrowser のステルス Chromium バイナリは初回実行時に自動ダウンロードされます(約200MB、~/.cloakbrowser/ にキャッシュ)
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
## 使い方
|
|
@@ -57,6 +57,9 @@ vault-fetch fetch https://example.com/article --dest ~/Documents/Obsidian/Clippi
|
|
|
57
57
|
|
|
58
58
|
# PDF を取得して Markdown に変換(自動判定)
|
|
59
59
|
vault-fetch fetch https://example.com/report.pdf --dest ~/Documents/Obsidian/Clippings
|
|
60
|
+
|
|
61
|
+
# プロキシ経由でフェッチ
|
|
62
|
+
vault-fetch fetch https://example.com/article --dest ~/Documents/Obsidian/Clippings --proxy http://proxy:8080
|
|
60
63
|
```
|
|
61
64
|
|
|
62
65
|
### PDF 対応
|
|
@@ -97,6 +100,7 @@ vault-fetch login https://note.com
|
|
|
97
100
|
| `--no-block-images` | 画像リクエストのブロックを無効化 |
|
|
98
101
|
| `--no-block-fonts` | フォントリクエストのブロックを無効化 |
|
|
99
102
|
| `--no-block-media` | メディアリクエストのブロックを無効化 |
|
|
103
|
+
| `--proxy <url>` | HTTP/HTTPS プロキシ URL(例: `http://host:port`) |
|
|
100
104
|
|
|
101
105
|
## 設定
|
|
102
106
|
|
|
@@ -121,6 +125,7 @@ timeout: 30
|
|
|
121
125
|
|---|---|
|
|
122
126
|
| `VAULT_FETCH_DEST` | 保存先ディレクトリ |
|
|
123
127
|
| `VAULT_FETCH_TIMEOUT` | タイムアウト秒数 |
|
|
128
|
+
| `VAULT_FETCH_PROXY` | HTTP/HTTPS プロキシ URL |
|
|
124
129
|
|
|
125
130
|
### 優先順位
|
|
126
131
|
|
package/README.md
CHANGED
|
@@ -4,11 +4,12 @@ A CLI tool that uses Playwright to fetch web pages and PDF files — pages that
|
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- Page fetching with JS rendering via
|
|
7
|
+
- Page fetching with JS rendering via [CloakBrowser](https://github.com/CloakHQ/CloakBrowser) (stealth Chromium with source-level anti-bot patches)
|
|
8
8
|
- **PDF to Markdown conversion** (auto-detected via Content-Type)
|
|
9
9
|
- Article content extraction using Readability.js (removes ads and navigation), with `--raw` mode for full-page conversion
|
|
10
10
|
- Resource blocking (images, fonts, media) for faster fetching
|
|
11
|
-
-
|
|
11
|
+
- Anti-bot detection evasion (CDP leak patching, fingerprint randomization, automation signal removal)
|
|
12
|
+
- HTTP/HTTPS proxy support via `--proxy` option or `VAULT_FETCH_PROXY` environment variable
|
|
12
13
|
- Obsidian Clipper-compatible frontmatter (title, source, author, published, created, description, tags)
|
|
13
14
|
- Session management (`storageState`) for fetching authenticated pages
|
|
14
15
|
- 3-layer configuration resolution (CLI options > environment variables > config file)
|
|
@@ -19,8 +20,7 @@ A CLI tool that uses Playwright to fetch web pages and PDF files — pages that
|
|
|
19
20
|
# Global install
|
|
20
21
|
npm install -g vault-fetch
|
|
21
22
|
|
|
22
|
-
#
|
|
23
|
-
npx playwright install chromium
|
|
23
|
+
# CloakBrowser's stealth Chromium binary is downloaded automatically on first run (~200MB, cached at ~/.cloakbrowser/)
|
|
24
24
|
```
|
|
25
25
|
|
|
26
26
|
## Usage
|
|
@@ -57,6 +57,9 @@ vault-fetch fetch https://example.com/article --dest ~/Documents/Obsidian/Clippi
|
|
|
57
57
|
|
|
58
58
|
# Fetch a PDF and convert to Markdown (auto-detected)
|
|
59
59
|
vault-fetch fetch https://example.com/report.pdf --dest ~/Documents/Obsidian/Clippings
|
|
60
|
+
|
|
61
|
+
# Fetch via proxy
|
|
62
|
+
vault-fetch fetch https://example.com/article --dest ~/Documents/Obsidian/Clippings --proxy http://proxy:8080
|
|
60
63
|
```
|
|
61
64
|
|
|
62
65
|
### PDF Support
|
|
@@ -97,6 +100,7 @@ Subsequent `fetch` commands will automatically use the saved session for that do
|
|
|
97
100
|
| `--no-block-images` | Disable image request blocking |
|
|
98
101
|
| `--no-block-fonts` | Disable font request blocking |
|
|
99
102
|
| `--no-block-media` | Disable media request blocking |
|
|
103
|
+
| `--proxy <url>` | HTTP/HTTPS proxy URL (e.g. `http://host:port`) |
|
|
100
104
|
|
|
101
105
|
## Configuration
|
|
102
106
|
|
|
@@ -121,6 +125,7 @@ timeout: 30
|
|
|
121
125
|
|---|---|
|
|
122
126
|
| `VAULT_FETCH_DEST` | Destination directory |
|
|
123
127
|
| `VAULT_FETCH_TIMEOUT` | Timeout in seconds |
|
|
128
|
+
| `VAULT_FETCH_PROXY` | HTTP/HTTPS proxy URL |
|
|
124
129
|
|
|
125
130
|
### Priority
|
|
126
131
|
|
package/dist/cli.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import { Command } from "commander";
|
|
5
5
|
import { once } from "events";
|
|
6
|
-
import { existsSync as
|
|
6
|
+
import { existsSync as existsSync3 } from "fs";
|
|
7
7
|
import { homedir as homedir3 } from "os";
|
|
8
8
|
import { join as join3 } from "path";
|
|
9
9
|
|
|
@@ -12,6 +12,43 @@ import { readFileSync } from "fs";
|
|
|
12
12
|
import { homedir } from "os";
|
|
13
13
|
import { resolve } from "path";
|
|
14
14
|
import yaml from "js-yaml";
|
|
15
|
+
|
|
16
|
+
// src/types.ts
|
|
17
|
+
var RESERVED_FRONTMATTER_KEYS = [
|
|
18
|
+
"title",
|
|
19
|
+
"source",
|
|
20
|
+
"author",
|
|
21
|
+
"published",
|
|
22
|
+
"created",
|
|
23
|
+
"description",
|
|
24
|
+
"tags"
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
// src/config.ts
|
|
28
|
+
function parseFields(raw) {
|
|
29
|
+
const result = {};
|
|
30
|
+
for (const entry of raw) {
|
|
31
|
+
const eq = entry.indexOf("=");
|
|
32
|
+
if (eq === -1) {
|
|
33
|
+
throw new Error(`Invalid --field "${entry}": expected key=value`);
|
|
34
|
+
}
|
|
35
|
+
const key = entry.slice(0, eq).trim();
|
|
36
|
+
const rawValue = entry.slice(eq + 1);
|
|
37
|
+
if (key.length === 0) {
|
|
38
|
+
throw new Error(`Invalid --field "${entry}": key must not be empty`);
|
|
39
|
+
}
|
|
40
|
+
if (RESERVED_FRONTMATTER_KEYS.includes(key)) {
|
|
41
|
+
throw new Error(
|
|
42
|
+
`--field key "${key}" is reserved. title/tags can be set via --title/--tag; the others (source, author, published, created, description) are extracted automatically.`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
if (key === "__proto__" || key === "constructor" || key === "prototype") {
|
|
46
|
+
throw new Error(`--field key "${key}" is not allowed`);
|
|
47
|
+
}
|
|
48
|
+
result[key] = yaml.load(rawValue);
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
15
52
|
var DEFAULT_TIMEOUT = 30;
|
|
16
53
|
var DEFAULT_WAIT_UNTIL = "networkidle";
|
|
17
54
|
var REQUIRED_TAG = "clippings";
|
|
@@ -21,6 +58,35 @@ function expandTilde(filePath) {
|
|
|
21
58
|
}
|
|
22
59
|
return filePath;
|
|
23
60
|
}
|
|
61
|
+
var SUPPORTED_PROXY_SCHEMES = ["http:", "https:"];
|
|
62
|
+
function sanitizeProxyUrl(proxy) {
|
|
63
|
+
try {
|
|
64
|
+
const parsed = new URL(proxy);
|
|
65
|
+
if (parsed.username || parsed.password) {
|
|
66
|
+
parsed.username = "***";
|
|
67
|
+
parsed.password = "***";
|
|
68
|
+
}
|
|
69
|
+
return parsed.toString();
|
|
70
|
+
} catch {
|
|
71
|
+
return "<invalid URL>";
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function validateProxyUrl(proxy) {
|
|
75
|
+
let parsed;
|
|
76
|
+
try {
|
|
77
|
+
parsed = new URL(proxy);
|
|
78
|
+
} catch {
|
|
79
|
+
throw new Error(`Invalid proxy URL: ${sanitizeProxyUrl(proxy)}`);
|
|
80
|
+
}
|
|
81
|
+
const scheme = parsed.protocol;
|
|
82
|
+
if (!SUPPORTED_PROXY_SCHEMES.includes(
|
|
83
|
+
scheme
|
|
84
|
+
)) {
|
|
85
|
+
throw new Error(
|
|
86
|
+
`Unsupported proxy scheme: "${scheme}" in ${sanitizeProxyUrl(proxy)}. Only HTTP and HTTPS proxies are supported.`
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
24
90
|
var VALID_WAIT_UNTIL = ["load", "domcontentloaded", "networkidle"];
|
|
25
91
|
function validateWaitUntil(value) {
|
|
26
92
|
if (!VALID_WAIT_UNTIL.includes(value)) {
|
|
@@ -56,6 +122,13 @@ function loadConfigFile(configPath) {
|
|
|
56
122
|
}
|
|
57
123
|
return config;
|
|
58
124
|
}
|
|
125
|
+
function resolveProxy(cliProxy, envProxy) {
|
|
126
|
+
const proxy = cliProxy ?? envProxy ?? null;
|
|
127
|
+
if (proxy !== null) {
|
|
128
|
+
validateProxyUrl(proxy);
|
|
129
|
+
}
|
|
130
|
+
return proxy;
|
|
131
|
+
}
|
|
59
132
|
function resolveConfig(cliOptions, configPath) {
|
|
60
133
|
let fileConfig = {};
|
|
61
134
|
if (configPath) {
|
|
@@ -63,6 +136,7 @@ function resolveConfig(cliOptions, configPath) {
|
|
|
63
136
|
}
|
|
64
137
|
const envDest = process.env.VAULT_FETCH_DEST;
|
|
65
138
|
const envTimeout = process.env.VAULT_FETCH_TIMEOUT;
|
|
139
|
+
const envProxy = process.env.VAULT_FETCH_PROXY;
|
|
66
140
|
const dest = cliOptions.dest ?? envDest ?? fileConfig.dest;
|
|
67
141
|
if (dest === void 0) {
|
|
68
142
|
throw new Error(
|
|
@@ -102,12 +176,14 @@ function resolveConfig(cliOptions, configPath) {
|
|
|
102
176
|
blockImages: cliOptions.blockImages ?? true,
|
|
103
177
|
blockFonts: cliOptions.blockFonts ?? true,
|
|
104
178
|
blockMedia: cliOptions.blockMedia ?? true,
|
|
105
|
-
raw: cliOptions.raw ?? false
|
|
179
|
+
raw: cliOptions.raw ?? false,
|
|
180
|
+
proxy: resolveProxy(cliOptions.proxy, envProxy),
|
|
181
|
+
fields: cliOptions.fields ?? {}
|
|
106
182
|
};
|
|
107
183
|
}
|
|
108
184
|
|
|
109
185
|
// src/fetcher.ts
|
|
110
|
-
import {
|
|
186
|
+
import { launch } from "cloakbrowser";
|
|
111
187
|
|
|
112
188
|
// src/session.ts
|
|
113
189
|
import { existsSync, mkdirSync } from "fs";
|
|
@@ -137,7 +213,6 @@ function ensureSessionDir(sessionsDir) {
|
|
|
137
213
|
}
|
|
138
214
|
|
|
139
215
|
// src/fetcher.ts
|
|
140
|
-
var CHROME_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36";
|
|
141
216
|
function isPdfContentType(contentType) {
|
|
142
217
|
return contentType.toLowerCase().includes("application/pdf");
|
|
143
218
|
}
|
|
@@ -172,13 +247,12 @@ async function downloadPdf(context, url, timeoutMs) {
|
|
|
172
247
|
return { pdfBuffer, finalUrl };
|
|
173
248
|
}
|
|
174
249
|
async function fetchPage(url, config, sessionsDir) {
|
|
175
|
-
const browser = await
|
|
176
|
-
headless: !config.headed
|
|
250
|
+
const browser = await launch({
|
|
251
|
+
headless: !config.headed,
|
|
252
|
+
...config.proxy !== null && { proxy: config.proxy }
|
|
177
253
|
});
|
|
178
254
|
try {
|
|
179
|
-
const contextOptions = {
|
|
180
|
-
userAgent: CHROME_USER_AGENT
|
|
181
|
-
};
|
|
255
|
+
const contextOptions = {};
|
|
182
256
|
if (!config.noSession && sessionExists(url, sessionsDir)) {
|
|
183
257
|
const sessionPath = getSessionPath(url, sessionsDir);
|
|
184
258
|
contextOptions.storageState = sessionPath;
|
|
@@ -346,7 +420,7 @@ function convertToMarkdown(html) {
|
|
|
346
420
|
}
|
|
347
421
|
|
|
348
422
|
// src/writer.ts
|
|
349
|
-
import { writeFileSync } from "fs";
|
|
423
|
+
import { writeFileSync, readFileSync as readFileSync2, existsSync as existsSync2 } from "fs";
|
|
350
424
|
import { join as join2 } from "path";
|
|
351
425
|
import yaml2 from "js-yaml";
|
|
352
426
|
var UNSAFE_CHARS = /[/\\:*?"<>|]/g;
|
|
@@ -357,7 +431,7 @@ function sanitizeFilename(title) {
|
|
|
357
431
|
const base = sanitized.slice(0, MAX_FILENAME_LENGTH) || "Untitled";
|
|
358
432
|
return `${base}.md`;
|
|
359
433
|
}
|
|
360
|
-
function buildFrontmatter(metadata, tags) {
|
|
434
|
+
function buildFrontmatter(metadata, tags, fields = {}) {
|
|
361
435
|
const data = {
|
|
362
436
|
title: metadata.title,
|
|
363
437
|
source: metadata.source
|
|
@@ -373,6 +447,9 @@ function buildFrontmatter(metadata, tags) {
|
|
|
373
447
|
data.description = metadata.description;
|
|
374
448
|
}
|
|
375
449
|
data.tags = tags;
|
|
450
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
451
|
+
data[key] = value;
|
|
452
|
+
}
|
|
376
453
|
const yamlStr = yaml2.dump(data, {
|
|
377
454
|
quotingType: '"',
|
|
378
455
|
forceQuotes: false,
|
|
@@ -382,10 +459,54 @@ function buildFrontmatter(metadata, tags) {
|
|
|
382
459
|
return `---
|
|
383
460
|
${yamlStr}---`;
|
|
384
461
|
}
|
|
385
|
-
function
|
|
462
|
+
function readSource(filePath) {
|
|
463
|
+
if (!existsSync2(filePath)) {
|
|
464
|
+
return null;
|
|
465
|
+
}
|
|
466
|
+
const content = readFileSync2(filePath, "utf-8");
|
|
467
|
+
if (!content.startsWith("---\n")) {
|
|
468
|
+
return null;
|
|
469
|
+
}
|
|
470
|
+
const end = content.indexOf("\n---", 4);
|
|
471
|
+
if (end === -1) {
|
|
472
|
+
return null;
|
|
473
|
+
}
|
|
474
|
+
const frontmatter = content.slice(4, end);
|
|
475
|
+
let parsed;
|
|
476
|
+
try {
|
|
477
|
+
parsed = yaml2.load(frontmatter);
|
|
478
|
+
} catch {
|
|
479
|
+
return null;
|
|
480
|
+
}
|
|
481
|
+
if (parsed === null || typeof parsed !== "object") {
|
|
482
|
+
return null;
|
|
483
|
+
}
|
|
484
|
+
const source = parsed.source;
|
|
485
|
+
return typeof source === "string" ? source : null;
|
|
486
|
+
}
|
|
487
|
+
function resolveTargetPath(dest, baseFilename, source) {
|
|
488
|
+
const ext = ".md";
|
|
489
|
+
const base = baseFilename.endsWith(ext) ? baseFilename.slice(0, -ext.length) : baseFilename;
|
|
490
|
+
let candidate = join2(dest, `${base}${ext}`);
|
|
491
|
+
if (!existsSync2(candidate)) {
|
|
492
|
+
return candidate;
|
|
493
|
+
}
|
|
494
|
+
if (readSource(candidate) === source) {
|
|
495
|
+
return candidate;
|
|
496
|
+
}
|
|
497
|
+
let n = 2;
|
|
498
|
+
for (; ; ) {
|
|
499
|
+
candidate = join2(dest, `${base}-${n}${ext}`);
|
|
500
|
+
if (!existsSync2(candidate) || readSource(candidate) === source) {
|
|
501
|
+
return candidate;
|
|
502
|
+
}
|
|
503
|
+
n += 1;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
function writeMarkdownFile(dest, metadata, markdownContent, tags, fields = {}) {
|
|
386
507
|
const filename = sanitizeFilename(metadata.title);
|
|
387
|
-
const filePath =
|
|
388
|
-
const frontmatter = buildFrontmatter(metadata, tags);
|
|
508
|
+
const filePath = resolveTargetPath(dest, filename, metadata.source);
|
|
509
|
+
const frontmatter = buildFrontmatter(metadata, tags, fields);
|
|
389
510
|
const fullContent = `${frontmatter}
|
|
390
511
|
|
|
391
512
|
${markdownContent}
|
|
@@ -399,16 +520,24 @@ var CONFIG_PATH = join3(homedir3(), ".config", "vault-fetch", "config.yaml");
|
|
|
399
520
|
var program = new Command();
|
|
400
521
|
program.name("vault-fetch").description(
|
|
401
522
|
"Fetch JS-rendered web pages and save as Markdown to Obsidian Vault"
|
|
402
|
-
).version("0.
|
|
523
|
+
).version("0.4.0");
|
|
403
524
|
program.command("fetch").description("Fetch a page and save as Markdown").argument("<url>", "URL to fetch").option("--dest <path>", "Destination directory").option("--headed", "Run browser in headed mode").option("--selector <css>", "CSS selector to extract").option("--timeout <seconds>", "Timeout in seconds", parseInt).option("--tag <name>", "Add tag (repeatable)", (val, acc) => {
|
|
404
525
|
acc.push(val);
|
|
405
526
|
return acc;
|
|
406
527
|
}, []).option(
|
|
407
528
|
"--wait-until <event>",
|
|
408
529
|
"Wait condition: load, domcontentloaded, networkidle"
|
|
409
|
-
).option("--skip-session", "Do not use saved session").option("--dry-run", "Output to stdout instead of saving").option("--no-block-images", "Do not block image requests").option("--no-block-fonts", "Do not block font requests").option("--no-block-media", "Do not block media requests").option("--raw", "Convert full page HTML without Readability extraction").option("--title <text>", "Override the page title for the output filename").
|
|
530
|
+
).option("--skip-session", "Do not use saved session").option("--dry-run", "Output to stdout instead of saving").option("--no-block-images", "Do not block image requests").option("--no-block-fonts", "Do not block font requests").option("--no-block-media", "Do not block media requests").option("--raw", "Convert full page HTML without Readability extraction").option("--title <text>", "Override the page title for the output filename").option(
|
|
531
|
+
"--field <kv>",
|
|
532
|
+
"Add custom frontmatter field key=value (repeatable)",
|
|
533
|
+
(val, acc) => {
|
|
534
|
+
acc.push(val);
|
|
535
|
+
return acc;
|
|
536
|
+
},
|
|
537
|
+
[]
|
|
538
|
+
).option("--proxy <url>", "HTTP/HTTPS proxy URL (e.g. http://host:port)").action(async (url, options) => {
|
|
410
539
|
try {
|
|
411
|
-
const configPath =
|
|
540
|
+
const configPath = existsSync3(CONFIG_PATH) ? CONFIG_PATH : void 0;
|
|
412
541
|
const config = resolveConfig(
|
|
413
542
|
{
|
|
414
543
|
dest: options.dest,
|
|
@@ -423,14 +552,16 @@ program.command("fetch").description("Fetch a page and save as Markdown").argume
|
|
|
423
552
|
blockImages: options.blockImages,
|
|
424
553
|
blockFonts: options.blockFonts,
|
|
425
554
|
blockMedia: options.blockMedia,
|
|
426
|
-
raw: options.raw
|
|
555
|
+
raw: options.raw,
|
|
556
|
+
proxy: options.proxy,
|
|
557
|
+
fields: parseFields(options.field)
|
|
427
558
|
},
|
|
428
559
|
configPath
|
|
429
560
|
);
|
|
430
561
|
if (config.raw && config.selector) {
|
|
431
562
|
throw new Error("--raw and --selector cannot be used together.");
|
|
432
563
|
}
|
|
433
|
-
if (!config.dryRun && !
|
|
564
|
+
if (!config.dryRun && !existsSync3(config.dest)) {
|
|
434
565
|
throw new Error(`Destination directory does not exist: ${config.dest}`);
|
|
435
566
|
}
|
|
436
567
|
const sessionsDir = getSessionDir();
|
|
@@ -466,7 +597,7 @@ program.command("fetch").description("Fetch a page and save as Markdown").argume
|
|
|
466
597
|
metadata = { ...metadata, title: config.title };
|
|
467
598
|
}
|
|
468
599
|
if (config.dryRun) {
|
|
469
|
-
const frontmatter = buildFrontmatter(metadata, config.tags);
|
|
600
|
+
const frontmatter = buildFrontmatter(metadata, config.tags, config.fields);
|
|
470
601
|
process.stdout.write(`${frontmatter}
|
|
471
602
|
|
|
472
603
|
${markdown}
|
|
@@ -476,7 +607,8 @@ ${markdown}
|
|
|
476
607
|
config.dest,
|
|
477
608
|
metadata,
|
|
478
609
|
markdown,
|
|
479
|
-
config.tags
|
|
610
|
+
config.tags,
|
|
611
|
+
config.fields
|
|
480
612
|
);
|
|
481
613
|
console.error(`Saved: ${filePath}`);
|
|
482
614
|
}
|
|
@@ -486,12 +618,19 @@ ${markdown}
|
|
|
486
618
|
process.exit(1);
|
|
487
619
|
}
|
|
488
620
|
});
|
|
489
|
-
program.command("login").description("Login to a site and save session").argument("<url>", "URL to login").option("--timeout <seconds>", "Login timeout in seconds", parseInt).action(async (url, options) => {
|
|
490
|
-
const {
|
|
621
|
+
program.command("login").description("Login to a site and save session").argument("<url>", "URL to login").option("--timeout <seconds>", "Login timeout in seconds", parseInt).option("--proxy <url>", "HTTP/HTTPS proxy URL (e.g. http://host:port)").action(async (url, options) => {
|
|
622
|
+
const { launch: launch2 } = await import("cloakbrowser");
|
|
491
623
|
const sessionsDir = getSessionDir();
|
|
492
624
|
ensureSessionDir(sessionsDir);
|
|
493
625
|
const timeoutSec = options.timeout ?? 300;
|
|
494
|
-
const
|
|
626
|
+
const proxyUrl = resolveProxy(
|
|
627
|
+
options.proxy,
|
|
628
|
+
process.env.VAULT_FETCH_PROXY
|
|
629
|
+
);
|
|
630
|
+
const browser = await launch2({
|
|
631
|
+
headless: false,
|
|
632
|
+
...proxyUrl !== null && { proxy: proxyUrl }
|
|
633
|
+
});
|
|
495
634
|
try {
|
|
496
635
|
const context = await browser.newContext();
|
|
497
636
|
const page = await context.newPage();
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts","../src/config.ts","../src/fetcher.ts","../src/session.ts","../src/extractor.ts","../src/converter.ts","../src/writer.ts"],"sourcesContent":["import { Command } from \"commander\";\nimport { once } from \"node:events\";\nimport { existsSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { join } from \"node:path\";\nimport { resolveConfig } from \"./config.js\";\nimport { fetchPage } from \"./fetcher.js\";\nimport { extract, extractMetadata } from \"./extractor.js\";\nimport { convertToMarkdown } from \"./converter.js\";\nimport { writeMarkdownFile, buildFrontmatter } from \"./writer.js\";\nimport {\n getSessionDir,\n getSessionPath,\n ensureSessionDir,\n} from \"./session.js\";\nimport type { Metadata, WaitUntilOption } from \"./types.js\";\n\nconst CONFIG_PATH = join(homedir(), \".config\", \"vault-fetch\", \"config.yaml\");\n\nconst program = new Command();\n\nprogram\n .name(\"vault-fetch\")\n .description(\n \"Fetch JS-rendered web pages and save as Markdown to Obsidian Vault\",\n )\n .version(\"0.3.0\");\n\nprogram\n .command(\"fetch\")\n .description(\"Fetch a page and save as Markdown\")\n .argument(\"<url>\", \"URL to fetch\")\n .option(\"--dest <path>\", \"Destination directory\")\n .option(\"--headed\", \"Run browser in headed mode\")\n .option(\"--selector <css>\", \"CSS selector to extract\")\n .option(\"--timeout <seconds>\", \"Timeout in seconds\", parseInt)\n .option(\"--tag <name>\", \"Add tag (repeatable)\", (val: string, acc: string[]) => {\n acc.push(val);\n return acc;\n }, [] as string[])\n .option(\n \"--wait-until <event>\",\n \"Wait condition: load, domcontentloaded, networkidle\",\n )\n .option(\"--skip-session\", \"Do not use saved session\")\n .option(\"--dry-run\", \"Output to stdout instead of saving\")\n .option(\"--no-block-images\", \"Do not block image requests\")\n .option(\"--no-block-fonts\", \"Do not block font requests\")\n .option(\"--no-block-media\", \"Do not block media requests\")\n .option(\"--raw\", \"Convert full page HTML without Readability extraction\")\n .option(\"--title <text>\", \"Override the page title for the output filename\")\n .action(async (url: string, options: Record<string, unknown>) => {\n try {\n const configPath = existsSync(CONFIG_PATH) ? CONFIG_PATH : undefined;\n const config = resolveConfig(\n {\n dest: options.dest as string | undefined,\n tags: options.tag as string[] | undefined,\n timeout: options.timeout as number | undefined,\n waitUntil: options.waitUntil as WaitUntilOption | undefined,\n headed: options.headed as boolean | undefined,\n selector: options.selector as string | undefined,\n title: options.title as string | undefined,\n noSession: options.skipSession as boolean | undefined,\n dryRun: options.dryRun as boolean | undefined,\n blockImages: options.blockImages as boolean | undefined,\n blockFonts: options.blockFonts as boolean | undefined,\n blockMedia: options.blockMedia as boolean | undefined,\n raw: options.raw as boolean | undefined,\n },\n configPath,\n );\n\n if (config.raw && config.selector) {\n throw new Error(\"--raw and --selector cannot be used together.\");\n }\n\n // Validate dest directory exists\n if (!config.dryRun && !existsSync(config.dest)) {\n throw new Error(`Destination directory does not exist: ${config.dest}`);\n }\n\n const sessionsDir = getSessionDir();\n const fetchResult = await fetchPage(url, config, sessionsDir);\n\n let markdown: string;\n let metadata: Metadata;\n\n if (fetchResult.kind === \"pdf\") {\n if (config.selector) {\n throw new Error(\"--selector cannot be used with PDF URLs.\");\n }\n if (config.raw) {\n throw new Error(\"--raw cannot be used with PDF URLs.\");\n }\n const { convertPdfToMarkdown } = await import(\"./pdf-converter.js\");\n const pdfResult = await convertPdfToMarkdown(\n fetchResult.pdfBuffer,\n fetchResult.finalUrl,\n );\n markdown = pdfResult.markdown;\n metadata = pdfResult.metadata;\n } else if (config.selector) {\n // --selector mode: skip Readability, extract metadata from full page\n metadata = extractMetadata(fetchResult.fullHtml, fetchResult.finalUrl);\n markdown = convertToMarkdown(fetchResult.html);\n } else if (config.raw) {\n // --raw mode: skip Readability, convert full page HTML directly\n metadata = extractMetadata(fetchResult.fullHtml, fetchResult.finalUrl);\n markdown = convertToMarkdown(fetchResult.fullHtml);\n } else {\n const result = extract(fetchResult.html, fetchResult.finalUrl);\n metadata = result.metadata;\n markdown = convertToMarkdown(result.content);\n }\n\n if (config.title !== null) {\n metadata = { ...metadata, title: config.title };\n }\n\n if (config.dryRun) {\n const frontmatter = buildFrontmatter(metadata, config.tags);\n process.stdout.write(`${frontmatter}\\n\\n${markdown}\\n`);\n } else {\n const filePath = writeMarkdownFile(\n config.dest,\n metadata,\n markdown,\n config.tags,\n );\n console.error(`Saved: ${filePath}`);\n }\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n console.error(`Error: ${message}`);\n process.exit(1);\n }\n });\n\nprogram\n .command(\"login\")\n .description(\"Login to a site and save session\")\n .argument(\"<url>\", \"URL to login\")\n .option(\"--timeout <seconds>\", \"Login timeout in seconds\", parseInt)\n .action(async (url: string, options: Record<string, unknown>) => {\n const { chromium } = await import(\"playwright\");\n const sessionsDir = getSessionDir();\n ensureSessionDir(sessionsDir);\n\n const timeoutSec = (options.timeout as number | undefined) ?? 300;\n const browser = await chromium.launch({ headless: false });\n\n try {\n const context = await browser.newContext();\n const page = await context.newPage();\n\n await page.goto(url, { waitUntil: \"networkidle\", timeout: timeoutSec * 1000 });\n\n console.error(\"Browser opened. Log in manually, then press Enter here to save session.\");\n\n process.stdin.resume();\n await once(process.stdin, \"data\");\n process.stdin.pause();\n process.stdin.unref();\n\n const sessionPath = getSessionPath(url, sessionsDir);\n await context.storageState({ path: sessionPath });\n console.error(`Session saved: ${sessionPath}`);\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n console.error(`Error: ${message}`);\n process.exit(1);\n } finally {\n await browser.close();\n }\n });\n\nprogram.parse();\n","import { readFileSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { resolve } from \"node:path\";\nimport yaml from \"js-yaml\";\nimport type { ResolvedConfig, WaitUntilOption } from \"./types.js\";\n\nconst DEFAULT_TIMEOUT = 30;\nconst DEFAULT_WAIT_UNTIL: WaitUntilOption = \"networkidle\";\nconst REQUIRED_TAG = \"clippings\";\n\ninterface FileConfig {\n dest?: string;\n tags?: string[];\n timeout?: number;\n waitUntil?: WaitUntilOption;\n}\n\ninterface CliOptions {\n dest?: string;\n tags?: string[];\n timeout?: number;\n waitUntil?: WaitUntilOption;\n headed?: boolean;\n selector?: string;\n title?: string;\n noSession?: boolean;\n dryRun?: boolean;\n blockImages?: boolean;\n blockFonts?: boolean;\n blockMedia?: boolean;\n raw?: boolean;\n}\n\nfunction expandTilde(filePath: string): string {\n if (filePath.startsWith(\"~/\")) {\n return resolve(homedir(), filePath.slice(2));\n }\n return filePath;\n}\n\nconst VALID_WAIT_UNTIL: readonly string[] = [\"load\", \"domcontentloaded\", \"networkidle\"];\n\nfunction validateWaitUntil(value: string): WaitUntilOption {\n if (!VALID_WAIT_UNTIL.includes(value)) {\n throw new Error(\n `Invalid waitUntil value: \"${value}\". Must be one of: ${VALID_WAIT_UNTIL.join(\", \")}`,\n );\n }\n return value as WaitUntilOption;\n}\n\nfunction loadConfigFile(configPath: string): FileConfig {\n const content = readFileSync(configPath, \"utf-8\");\n const parsed = yaml.load(content);\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(`Invalid config file: ${configPath}`);\n }\n const config = parsed as Record<string, unknown>;\n\n if (config.timeout !== undefined && typeof config.timeout !== \"number\") {\n throw new Error(`Invalid timeout in config file: expected number, got ${typeof config.timeout}`);\n }\n if (config.dest !== undefined && typeof config.dest !== \"string\") {\n throw new Error(`Invalid dest in config file: expected string, got ${typeof config.dest}`);\n }\n if (config.waitUntil !== undefined) {\n if (typeof config.waitUntil !== \"string\") {\n throw new Error(`Invalid waitUntil in config file: expected string, got ${typeof config.waitUntil}`);\n }\n validateWaitUntil(config.waitUntil);\n }\n if (config.tags !== undefined) {\n if (!Array.isArray(config.tags) || !config.tags.every((t: unknown) => typeof t === \"string\")) {\n throw new Error(\"Invalid tags in config file: expected array of strings\");\n }\n }\n\n return config as FileConfig;\n}\n\nexport function resolveConfig(\n cliOptions: CliOptions,\n configPath: string | undefined,\n): ResolvedConfig {\n // Layer 1: Config file\n let fileConfig: FileConfig = {};\n if (configPath) {\n fileConfig = loadConfigFile(configPath);\n }\n\n // Layer 2: Environment variables\n const envDest = process.env.VAULT_FETCH_DEST;\n const envTimeout = process.env.VAULT_FETCH_TIMEOUT;\n\n // Resolve each field: CLI > env > file > default\n const dest = cliOptions.dest ?? envDest ?? fileConfig.dest;\n if (dest === undefined) {\n throw new Error(\n \"dest is required. Set via --dest, VAULT_FETCH_DEST, or config file.\",\n );\n }\n\n let timeout: number;\n if (cliOptions.timeout !== undefined) {\n timeout = cliOptions.timeout;\n } else if (envTimeout !== undefined) {\n const parsed = Number(envTimeout);\n if (Number.isNaN(parsed)) {\n throw new Error(`Invalid VAULT_FETCH_TIMEOUT value: ${envTimeout}`);\n }\n timeout = parsed;\n } else {\n timeout = fileConfig.timeout ?? DEFAULT_TIMEOUT;\n }\n\n const rawWaitUntil = cliOptions.waitUntil ?? fileConfig.waitUntil ?? DEFAULT_WAIT_UNTIL;\n const waitUntil = validateWaitUntil(rawWaitUntil);\n\n // Merge tags: file tags + CLI tags + always clippings\n const allTags = [\n ...(fileConfig.tags ?? []),\n ...(cliOptions.tags ?? []),\n REQUIRED_TAG,\n ];\n const tags = [...new Set(allTags)];\n\n return {\n dest: expandTilde(dest),\n tags,\n timeout,\n waitUntil,\n headed: cliOptions.headed ?? false,\n selector: cliOptions.selector ?? null,\n title: cliOptions.title ?? null,\n noSession: cliOptions.noSession ?? false,\n dryRun: cliOptions.dryRun ?? false,\n blockImages: cliOptions.blockImages ?? true,\n blockFonts: cliOptions.blockFonts ?? true,\n blockMedia: cliOptions.blockMedia ?? true,\n raw: cliOptions.raw ?? false,\n };\n}\n","import { chromium, type BrowserContext } from \"playwright\";\nimport type { FetchResult, ResolvedConfig } from \"./types.js\";\nimport { getSessionPath, sessionExists } from \"./session.js\";\n\nexport const CHROME_USER_AGENT =\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \" +\n \"AppleWebKit/537.36 (KHTML, like Gecko) \" +\n \"Chrome/134.0.0.0 Safari/537.36\";\n\ninterface BlockingOptions {\n blockImages: boolean;\n blockFonts: boolean;\n blockMedia: boolean;\n}\n\nexport function isPdfContentType(contentType: string): boolean {\n return contentType.toLowerCase().includes(\"application/pdf\");\n}\n\nexport function buildBlockedResourceTypes(options: BlockingOptions): Set<string> {\n const blocked = new Set<string>();\n if (options.blockImages) blocked.add(\"image\");\n if (options.blockFonts) blocked.add(\"font\");\n if (options.blockMedia) blocked.add(\"media\");\n return blocked;\n}\n\nconst PDF_MAGIC_BYTES = \"%PDF\";\n\nexport function validatePdfBuffer(pdfBuffer: Buffer, sourceUrl: string): void {\n if (pdfBuffer.length === 0) {\n throw new Error(`Empty PDF response received from ${sourceUrl}`);\n }\n const header = pdfBuffer.subarray(0, PDF_MAGIC_BYTES.length).toString(\"ascii\");\n if (!header.startsWith(PDF_MAGIC_BYTES)) {\n throw new Error(\n `Response Content-Type is application/pdf but body is not valid PDF data from ${sourceUrl}`,\n );\n }\n}\n\nasync function downloadPdf(\n context: BrowserContext,\n url: string,\n timeoutMs: number,\n): Promise<{ pdfBuffer: Buffer; finalUrl: string }> {\n const apiResponse = await context.request.get(url, { timeout: timeoutMs });\n const status = apiResponse.status();\n if (status >= 400) {\n throw new Error(`HTTP ${status} received when downloading PDF from ${url}`);\n }\n const pdfBuffer = Buffer.from(await apiResponse.body());\n const finalUrl = apiResponse.url();\n validatePdfBuffer(pdfBuffer, finalUrl);\n return { pdfBuffer, finalUrl };\n}\n\nexport async function fetchPage(\n url: string,\n config: ResolvedConfig,\n sessionsDir: string,\n): Promise<FetchResult> {\n const browser = await chromium.launch({\n headless: !config.headed,\n });\n\n try {\n const contextOptions: Parameters<typeof browser.newContext>[0] = {\n userAgent: CHROME_USER_AGENT,\n };\n\n // Load session if available and not disabled\n if (!config.noSession && sessionExists(url, sessionsDir)) {\n const sessionPath = getSessionPath(url, sessionsDir);\n contextOptions.storageState = sessionPath;\n }\n\n const context: BrowserContext = await browser.newContext(contextOptions);\n const page = await context.newPage();\n\n // Block specified resource types for faster loading\n const blockedTypes = buildBlockedResourceTypes(config);\n if (blockedTypes.size > 0) {\n await page.route(\"**/*\", async (route) => {\n if (blockedTypes.has(route.request().resourceType())) {\n await route.abort();\n } else {\n await route.continue();\n }\n });\n }\n\n const timeoutMs = config.timeout * 1000;\n\n // page.goto throws \"Download is starting\" when the server returns\n // Content-Disposition: attachment (common for PDF downloads).\n // Catch this and download the PDF via the context's HTTP client.\n let response;\n try {\n response = await page.goto(url, {\n waitUntil: config.waitUntil,\n timeout: timeoutMs,\n });\n } catch (error) {\n if (\n error instanceof Error &&\n error.message.includes(\"Download is starting\")\n ) {\n const result = await downloadPdf(context, url, timeoutMs);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: result.pdfBuffer, url, finalUrl: result.finalUrl };\n }\n throw error;\n }\n\n if (!response) {\n throw new Error(`No response received from ${url}`);\n }\n\n const status = response.status();\n if (status >= 400) {\n throw new Error(`HTTP ${status} received from ${response.url()}`);\n }\n\n const finalUrl = response.url();\n const contentType = response.headers()[\"content-type\"] ?? \"\";\n\n // Inline PDF (Content-Disposition: inline or absent).\n // Try response.body() first; fall back to context.request if the\n // browser returned its PDF viewer HTML instead of the actual bytes.\n if (isPdfContentType(contentType)) {\n const body = await response.body();\n try {\n validatePdfBuffer(body, finalUrl);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: body, url, finalUrl };\n } catch {\n // response.body() returned PDF viewer HTML; re-download via API\n const result = await downloadPdf(context, finalUrl, timeoutMs);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: result.pdfBuffer, url, finalUrl: result.finalUrl };\n }\n }\n\n const fullHtml = await page.content();\n let html: string;\n\n if (config.selector) {\n const element = await page.$(config.selector);\n if (!element) {\n throw new Error(`Selector not found: ${config.selector}`);\n }\n html = await element.innerHTML();\n } else {\n html = fullHtml;\n }\n\n await context.close();\n\n return { kind: \"html\", html, fullHtml, url, finalUrl };\n } finally {\n await browser.close();\n }\n}\n","import { existsSync, mkdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { homedir } from \"node:os\";\n\nconst CONFIG_DIR = join(homedir(), \".config\", \"vault-fetch\");\nconst SESSIONS_DIR = join(CONFIG_DIR, \"sessions\");\n\nexport function getSessionDir(): string {\n return SESSIONS_DIR;\n}\n\nfunction extractDomain(url: string): string {\n const parsed = new URL(url);\n return parsed.hostname ?? \"\";\n}\n\nexport function getSessionPath(url: string, sessionsDir: string): string {\n const domain = extractDomain(url);\n return join(sessionsDir, `${domain}.json`);\n}\n\nexport function sessionExists(url: string, sessionsDir: string): boolean {\n const sessionPath = getSessionPath(url, sessionsDir);\n return existsSync(sessionPath);\n}\n\nexport function ensureSessionDir(sessionsDir: string): void {\n if (!existsSync(sessionsDir)) {\n mkdirSync(sessionsDir, { recursive: true });\n }\n}\n","import { Readability } from \"@mozilla/readability\";\nimport { JSDOM } from \"jsdom\";\nimport type { Metadata } from \"./types.js\";\n\nfunction getMetaContent(doc: Document, selector: string): string | null {\n const el = doc.querySelector(selector);\n return el?.getAttribute(\"content\") ?? null;\n}\n\nfunction formatAuthor(raw: string): string {\n return `[[${raw.trim()}]]`;\n}\n\nfunction extractPublishedDate(doc: Document): string | null {\n const published =\n getMetaContent(doc, 'meta[property=\"article:published_time\"]') ??\n getMetaContent(doc, 'meta[name=\"datePublished\"]');\n\n if (!published) {\n const jsonLd = doc.querySelector('script[type=\"application/ld+json\"]');\n if (jsonLd?.textContent) {\n try {\n const data = JSON.parse(jsonLd.textContent) as Record<string, unknown>;\n if (typeof data.datePublished === \"string\") {\n return data.datePublished.split(\"T\")[0];\n }\n } catch {\n // JSON-LD parse failed\n }\n }\n return null;\n }\n\n return published.split(\"T\")[0];\n}\n\nfunction extractAuthors(\n doc: Document,\n readabilityByline: string | null,\n): string[] {\n const articleAuthors = doc.querySelectorAll('meta[property=\"article:author\"]');\n if (articleAuthors.length > 0) {\n return Array.from(articleAuthors)\n .map((el) => el.getAttribute(\"content\"))\n .filter((v): v is string => v !== null)\n .map(formatAuthor);\n }\n\n const ogAuthor = getMetaContent(doc, 'meta[property=\"og:author\"]');\n if (ogAuthor) {\n return [formatAuthor(ogAuthor)];\n }\n\n if (readabilityByline) {\n return [formatAuthor(readabilityByline)];\n }\n\n return [];\n}\n\nexport interface ExtractResult {\n metadata: Metadata;\n content: string;\n}\n\ninterface ReadabilityArticle {\n title: string;\n byline: string | null;\n excerpt: string;\n content: string;\n}\n\nfunction buildMetadata(\n doc: Document,\n article: ReadabilityArticle | null,\n finalUrl: string,\n): Metadata {\n const title = article?.title ?? doc.title;\n const authors = extractAuthors(doc, article?.byline ?? null);\n const published = extractPublishedDate(doc);\n\n const description =\n getMetaContent(doc, 'meta[property=\"og:description\"]') ??\n getMetaContent(doc, 'meta[name=\"description\"]') ??\n (article?.excerpt ?? null);\n\n const today = new Date().toISOString().split(\"T\")[0];\n\n return {\n title,\n source: finalUrl,\n author: authors,\n published,\n created: today,\n description,\n };\n}\n\nfunction parseWithReadability(html: string, url: string): ReadabilityArticle | null {\n const dom = new JSDOM(html, { url });\n const reader = new Readability(dom.window.document);\n return reader.parse() as ReadabilityArticle | null;\n}\n\nexport function extract(html: string, finalUrl: string): ExtractResult {\n const metaDom = new JSDOM(html, { url: finalUrl });\n const doc = metaDom.window.document;\n\n const article = parseWithReadability(html, finalUrl);\n\n if (!article) {\n throw new Error(\n \"Readability failed to extract content from the page. \" +\n \"Try --raw to convert the full page, or --selector <css> to target specific content.\",\n );\n }\n\n if (!article.content) {\n throw new Error(\"Readability returned empty content for the page\");\n }\n\n return {\n metadata: buildMetadata(doc, article, finalUrl),\n content: article.content,\n };\n}\n\nexport function extractMetadata(html: string, finalUrl: string): Metadata {\n const metaDom = new JSDOM(html, { url: finalUrl });\n const doc = metaDom.window.document;\n\n const article = parseWithReadability(html, finalUrl);\n\n return buildMetadata(doc, article, finalUrl);\n}\n","import TurndownService from \"turndown\";\n\nexport function convertToMarkdown(html: string): string {\n const turndown = new TurndownService({\n headingStyle: \"atx\",\n codeBlockStyle: \"fenced\",\n bulletListMarker: \"-\",\n });\n\n return turndown.turndown(html);\n}\n","import { writeFileSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport yaml from \"js-yaml\";\nimport type { Metadata } from \"./types.js\";\n\nconst UNSAFE_CHARS = /[/\\\\:*?\"<>|]/g;\nconst CONTROL_CHARS = /[\\x00-\\x1f\\x7f]/g;\nconst MAX_FILENAME_LENGTH = 200;\n\nexport function sanitizeFilename(title: string): string {\n const sanitized = title\n .replace(CONTROL_CHARS, \"\")\n .replace(UNSAFE_CHARS, \"\")\n .replace(/\\s+/g, \" \")\n .trim();\n const base = sanitized.slice(0, MAX_FILENAME_LENGTH) || \"Untitled\";\n return `${base}.md`;\n}\n\nexport function buildFrontmatter(metadata: Metadata, tags: string[]): string {\n const data: Record<string, unknown> = {\n title: metadata.title,\n source: metadata.source,\n };\n\n if (metadata.author.length > 0) {\n data.author = metadata.author;\n }\n\n if (metadata.published) {\n data.published = metadata.published;\n }\n\n data.created = metadata.created;\n\n if (metadata.description) {\n data.description = metadata.description;\n }\n\n data.tags = tags;\n\n const yamlStr = yaml.dump(data, {\n quotingType: '\"',\n forceQuotes: false,\n lineWidth: -1,\n sortKeys: false,\n });\n\n return `---\\n${yamlStr}---`;\n}\n\nexport function writeMarkdownFile(\n dest: string,\n metadata: Metadata,\n markdownContent: string,\n tags: string[],\n): string {\n const filename = sanitizeFilename(metadata.title);\n const filePath = join(dest, filename);\n const frontmatter = buildFrontmatter(metadata, tags);\n const fullContent = `${frontmatter}\\n\\n${markdownContent}\\n`;\n\n writeFileSync(filePath, fullContent, \"utf-8\");\n\n return filePath;\n}\n"],"mappings":";;;AAAA,SAAS,eAAe;AACxB,SAAS,YAAY;AACrB,SAAS,cAAAA,mBAAkB;AAC3B,SAAS,WAAAC,gBAAe;AACxB,SAAS,QAAAC,aAAY;;;ACJrB,SAAS,oBAAoB;AAC7B,SAAS,eAAe;AACxB,SAAS,eAAe;AACxB,OAAO,UAAU;AAGjB,IAAM,kBAAkB;AACxB,IAAM,qBAAsC;AAC5C,IAAM,eAAe;AAyBrB,SAAS,YAAY,UAA0B;AAC7C,MAAI,SAAS,WAAW,IAAI,GAAG;AAC7B,WAAO,QAAQ,QAAQ,GAAG,SAAS,MAAM,CAAC,CAAC;AAAA,EAC7C;AACA,SAAO;AACT;AAEA,IAAM,mBAAsC,CAAC,QAAQ,oBAAoB,aAAa;AAEtF,SAAS,kBAAkB,OAAgC;AACzD,MAAI,CAAC,iBAAiB,SAAS,KAAK,GAAG;AACrC,UAAM,IAAI;AAAA,MACR,6BAA6B,KAAK,sBAAsB,iBAAiB,KAAK,IAAI,CAAC;AAAA,IACrF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAgC;AACtD,QAAM,UAAU,aAAa,YAAY,OAAO;AAChD,QAAM,SAAS,KAAK,KAAK,OAAO;AAChC,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,wBAAwB,UAAU,EAAE;AAAA,EACtD;AACA,QAAM,SAAS;AAEf,MAAI,OAAO,YAAY,UAAa,OAAO,OAAO,YAAY,UAAU;AACtE,UAAM,IAAI,MAAM,wDAAwD,OAAO,OAAO,OAAO,EAAE;AAAA,EACjG;AACA,MAAI,OAAO,SAAS,UAAa,OAAO,OAAO,SAAS,UAAU;AAChE,UAAM,IAAI,MAAM,qDAAqD,OAAO,OAAO,IAAI,EAAE;AAAA,EAC3F;AACA,MAAI,OAAO,cAAc,QAAW;AAClC,QAAI,OAAO,OAAO,cAAc,UAAU;AACxC,YAAM,IAAI,MAAM,0DAA0D,OAAO,OAAO,SAAS,EAAE;AAAA,IACrG;AACA,sBAAkB,OAAO,SAAS;AAAA,EACpC;AACA,MAAI,OAAO,SAAS,QAAW;AAC7B,QAAI,CAAC,MAAM,QAAQ,OAAO,IAAI,KAAK,CAAC,OAAO,KAAK,MAAM,CAAC,MAAe,OAAO,MAAM,QAAQ,GAAG;AAC5F,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,cACd,YACA,YACgB;AAEhB,MAAI,aAAyB,CAAC;AAC9B,MAAI,YAAY;AACd,iBAAa,eAAe,UAAU;AAAA,EACxC;AAGA,QAAM,UAAU,QAAQ,IAAI;AAC5B,QAAM,aAAa,QAAQ,IAAI;AAG/B,QAAM,OAAO,WAAW,QAAQ,WAAW,WAAW;AACtD,MAAI,SAAS,QAAW;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,MAAI;AACJ,MAAI,WAAW,YAAY,QAAW;AACpC,cAAU,WAAW;AAAA,EACvB,WAAW,eAAe,QAAW;AACnC,UAAM,SAAS,OAAO,UAAU;AAChC,QAAI,OAAO,MAAM,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,sCAAsC,UAAU,EAAE;AAAA,IACpE;AACA,cAAU;AAAA,EACZ,OAAO;AACL,cAAU,WAAW,WAAW;AAAA,EAClC;AAEA,QAAM,eAAe,WAAW,aAAa,WAAW,aAAa;AACrE,QAAM,YAAY,kBAAkB,YAAY;AAGhD,QAAM,UAAU;AAAA,IACd,GAAI,WAAW,QAAQ,CAAC;AAAA,IACxB,GAAI,WAAW,QAAQ,CAAC;AAAA,IACxB;AAAA,EACF;AACA,QAAM,OAAO,CAAC,GAAG,IAAI,IAAI,OAAO,CAAC;AAEjC,SAAO;AAAA,IACL,MAAM,YAAY,IAAI;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ,WAAW,UAAU;AAAA,IAC7B,UAAU,WAAW,YAAY;AAAA,IACjC,OAAO,WAAW,SAAS;AAAA,IAC3B,WAAW,WAAW,aAAa;AAAA,IACnC,QAAQ,WAAW,UAAU;AAAA,IAC7B,aAAa,WAAW,eAAe;AAAA,IACvC,YAAY,WAAW,cAAc;AAAA,IACrC,YAAY,WAAW,cAAc;AAAA,IACrC,KAAK,WAAW,OAAO;AAAA,EACzB;AACF;;;AC7IA,SAAS,gBAAqC;;;ACA9C,SAAS,YAAY,iBAAiB;AACtC,SAAS,YAAY;AACrB,SAAS,WAAAC,gBAAe;AAExB,IAAM,aAAa,KAAKA,SAAQ,GAAG,WAAW,aAAa;AAC3D,IAAM,eAAe,KAAK,YAAY,UAAU;AAEzC,SAAS,gBAAwB;AACtC,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,SAAO,OAAO,YAAY;AAC5B;AAEO,SAAS,eAAe,KAAa,aAA6B;AACvE,QAAM,SAAS,cAAc,GAAG;AAChC,SAAO,KAAK,aAAa,GAAG,MAAM,OAAO;AAC3C;AAEO,SAAS,cAAc,KAAa,aAA8B;AACvE,QAAM,cAAc,eAAe,KAAK,WAAW;AACnD,SAAO,WAAW,WAAW;AAC/B;AAEO,SAAS,iBAAiB,aAA2B;AAC1D,MAAI,CAAC,WAAW,WAAW,GAAG;AAC5B,cAAU,aAAa,EAAE,WAAW,KAAK,CAAC;AAAA,EAC5C;AACF;;;AD1BO,IAAM,oBACX;AAUK,SAAS,iBAAiB,aAA8B;AAC7D,SAAO,YAAY,YAAY,EAAE,SAAS,iBAAiB;AAC7D;AAEO,SAAS,0BAA0B,SAAuC;AAC/E,QAAM,UAAU,oBAAI,IAAY;AAChC,MAAI,QAAQ,YAAa,SAAQ,IAAI,OAAO;AAC5C,MAAI,QAAQ,WAAY,SAAQ,IAAI,MAAM;AAC1C,MAAI,QAAQ,WAAY,SAAQ,IAAI,OAAO;AAC3C,SAAO;AACT;AAEA,IAAM,kBAAkB;AAEjB,SAAS,kBAAkB,WAAmB,WAAyB;AAC5E,MAAI,UAAU,WAAW,GAAG;AAC1B,UAAM,IAAI,MAAM,oCAAoC,SAAS,EAAE;AAAA,EACjE;AACA,QAAM,SAAS,UAAU,SAAS,GAAG,gBAAgB,MAAM,EAAE,SAAS,OAAO;AAC7E,MAAI,CAAC,OAAO,WAAW,eAAe,GAAG;AACvC,UAAM,IAAI;AAAA,MACR,gFAAgF,SAAS;AAAA,IAC3F;AAAA,EACF;AACF;AAEA,eAAe,YACb,SACA,KACA,WACkD;AAClD,QAAM,cAAc,MAAM,QAAQ,QAAQ,IAAI,KAAK,EAAE,SAAS,UAAU,CAAC;AACzE,QAAM,SAAS,YAAY,OAAO;AAClC,MAAI,UAAU,KAAK;AACjB,UAAM,IAAI,MAAM,QAAQ,MAAM,uCAAuC,GAAG,EAAE;AAAA,EAC5E;AACA,QAAM,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,CAAC;AACtD,QAAM,WAAW,YAAY,IAAI;AACjC,oBAAkB,WAAW,QAAQ;AACrC,SAAO,EAAE,WAAW,SAAS;AAC/B;AAEA,eAAsB,UACpB,KACA,QACA,aACsB;AACtB,QAAM,UAAU,MAAM,SAAS,OAAO;AAAA,IACpC,UAAU,CAAC,OAAO;AAAA,EACpB,CAAC;AAED,MAAI;AACF,UAAM,iBAA2D;AAAA,MAC/D,WAAW;AAAA,IACb;AAGA,QAAI,CAAC,OAAO,aAAa,cAAc,KAAK,WAAW,GAAG;AACxD,YAAM,cAAc,eAAe,KAAK,WAAW;AACnD,qBAAe,eAAe;AAAA,IAChC;AAEA,UAAM,UAA0B,MAAM,QAAQ,WAAW,cAAc;AACvE,UAAM,OAAO,MAAM,QAAQ,QAAQ;AAGnC,UAAM,eAAe,0BAA0B,MAAM;AACrD,QAAI,aAAa,OAAO,GAAG;AACzB,YAAM,KAAK,MAAM,QAAQ,OAAO,UAAU;AACxC,YAAI,aAAa,IAAI,MAAM,QAAQ,EAAE,aAAa,CAAC,GAAG;AACpD,gBAAM,MAAM,MAAM;AAAA,QACpB,OAAO;AACL,gBAAM,MAAM,SAAS;AAAA,QACvB;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,YAAY,OAAO,UAAU;AAKnC,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,KAAK,KAAK;AAAA,QAC9B,WAAW,OAAO;AAAA,QAClB,SAAS;AAAA,MACX,CAAC;AAAA,IACH,SAAS,OAAO;AACd,UACE,iBAAiB,SACjB,MAAM,QAAQ,SAAS,sBAAsB,GAC7C;AACA,cAAM,SAAS,MAAM,YAAY,SAAS,KAAK,SAAS;AACxD,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,OAAO,WAAW,KAAK,UAAU,OAAO,SAAS;AAAA,MACpF;AACA,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,6BAA6B,GAAG,EAAE;AAAA,IACpD;AAEA,UAAM,SAAS,SAAS,OAAO;AAC/B,QAAI,UAAU,KAAK;AACjB,YAAM,IAAI,MAAM,QAAQ,MAAM,kBAAkB,SAAS,IAAI,CAAC,EAAE;AAAA,IAClE;AAEA,UAAM,WAAW,SAAS,IAAI;AAC9B,UAAM,cAAc,SAAS,QAAQ,EAAE,cAAc,KAAK;AAK1D,QAAI,iBAAiB,WAAW,GAAG;AACjC,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAI;AACF,0BAAkB,MAAM,QAAQ;AAChC,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,MAAM,KAAK,SAAS;AAAA,MACvD,QAAQ;AAEN,cAAM,SAAS,MAAM,YAAY,SAAS,UAAU,SAAS;AAC7D,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,OAAO,WAAW,KAAK,UAAU,OAAO,SAAS;AAAA,MACpF;AAAA,IACF;AAEA,UAAM,WAAW,MAAM,KAAK,QAAQ;AACpC,QAAI;AAEJ,QAAI,OAAO,UAAU;AACnB,YAAM,UAAU,MAAM,KAAK,EAAE,OAAO,QAAQ;AAC5C,UAAI,CAAC,SAAS;AACZ,cAAM,IAAI,MAAM,uBAAuB,OAAO,QAAQ,EAAE;AAAA,MAC1D;AACA,aAAO,MAAM,QAAQ,UAAU;AAAA,IACjC,OAAO;AACL,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,MAAM;AAEpB,WAAO,EAAE,MAAM,QAAQ,MAAM,UAAU,KAAK,SAAS;AAAA,EACvD,UAAE;AACA,UAAM,QAAQ,MAAM;AAAA,EACtB;AACF;;;AEnKA,SAAS,mBAAmB;AAC5B,SAAS,aAAa;AAGtB,SAAS,eAAe,KAAe,UAAiC;AACtE,QAAM,KAAK,IAAI,cAAc,QAAQ;AACrC,SAAO,IAAI,aAAa,SAAS,KAAK;AACxC;AAEA,SAAS,aAAa,KAAqB;AACzC,SAAO,KAAK,IAAI,KAAK,CAAC;AACxB;AAEA,SAAS,qBAAqB,KAA8B;AAC1D,QAAM,YACJ,eAAe,KAAK,yCAAyC,KAC7D,eAAe,KAAK,4BAA4B;AAElD,MAAI,CAAC,WAAW;AACd,UAAM,SAAS,IAAI,cAAc,oCAAoC;AACrE,QAAI,QAAQ,aAAa;AACvB,UAAI;AACF,cAAM,OAAO,KAAK,MAAM,OAAO,WAAW;AAC1C,YAAI,OAAO,KAAK,kBAAkB,UAAU;AAC1C,iBAAO,KAAK,cAAc,MAAM,GAAG,EAAE,CAAC;AAAA,QACxC;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO,UAAU,MAAM,GAAG,EAAE,CAAC;AAC/B;AAEA,SAAS,eACP,KACA,mBACU;AACV,QAAM,iBAAiB,IAAI,iBAAiB,iCAAiC;AAC7E,MAAI,eAAe,SAAS,GAAG;AAC7B,WAAO,MAAM,KAAK,cAAc,EAC7B,IAAI,CAAC,OAAO,GAAG,aAAa,SAAS,CAAC,EACtC,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,IAAI,YAAY;AAAA,EACrB;AAEA,QAAM,WAAW,eAAe,KAAK,4BAA4B;AACjE,MAAI,UAAU;AACZ,WAAO,CAAC,aAAa,QAAQ,CAAC;AAAA,EAChC;AAEA,MAAI,mBAAmB;AACrB,WAAO,CAAC,aAAa,iBAAiB,CAAC;AAAA,EACzC;AAEA,SAAO,CAAC;AACV;AAcA,SAAS,cACP,KACA,SACA,UACU;AACV,QAAM,QAAQ,SAAS,SAAS,IAAI;AACpC,QAAM,UAAU,eAAe,KAAK,SAAS,UAAU,IAAI;AAC3D,QAAM,YAAY,qBAAqB,GAAG;AAE1C,QAAM,cACJ,eAAe,KAAK,iCAAiC,KACrD,eAAe,KAAK,0BAA0B,MAC7C,SAAS,WAAW;AAEvB,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,qBAAqB,MAAc,KAAwC;AAClF,QAAM,MAAM,IAAI,MAAM,MAAM,EAAE,IAAI,CAAC;AACnC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,SAAO,OAAO,MAAM;AACtB;AAEO,SAAS,QAAQ,MAAc,UAAiC;AACrE,QAAM,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,SAAS,CAAC;AACjD,QAAM,MAAM,QAAQ,OAAO;AAE3B,QAAM,UAAU,qBAAqB,MAAM,QAAQ;AAEnD,MAAI,CAAC,SAAS;AACZ,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,MAAI,CAAC,QAAQ,SAAS;AACpB,UAAM,IAAI,MAAM,iDAAiD;AAAA,EACnE;AAEA,SAAO;AAAA,IACL,UAAU,cAAc,KAAK,SAAS,QAAQ;AAAA,IAC9C,SAAS,QAAQ;AAAA,EACnB;AACF;AAEO,SAAS,gBAAgB,MAAc,UAA4B;AACxE,QAAM,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,SAAS,CAAC;AACjD,QAAM,MAAM,QAAQ,OAAO;AAE3B,QAAM,UAAU,qBAAqB,MAAM,QAAQ;AAEnD,SAAO,cAAc,KAAK,SAAS,QAAQ;AAC7C;;;ACtIA,OAAO,qBAAqB;AAErB,SAAS,kBAAkB,MAAsB;AACtD,QAAM,WAAW,IAAI,gBAAgB;AAAA,IACnC,cAAc;AAAA,IACd,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,EACpB,CAAC;AAED,SAAO,SAAS,SAAS,IAAI;AAC/B;;;ACVA,SAAS,qBAAqB;AAC9B,SAAS,QAAAC,aAAY;AACrB,OAAOC,WAAU;AAGjB,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,sBAAsB;AAErB,SAAS,iBAAiB,OAAuB;AACtD,QAAM,YAAY,MACf,QAAQ,eAAe,EAAE,EACzB,QAAQ,cAAc,EAAE,EACxB,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACR,QAAM,OAAO,UAAU,MAAM,GAAG,mBAAmB,KAAK;AACxD,SAAO,GAAG,IAAI;AAChB;AAEO,SAAS,iBAAiB,UAAoB,MAAwB;AAC3E,QAAM,OAAgC;AAAA,IACpC,OAAO,SAAS;AAAA,IAChB,QAAQ,SAAS;AAAA,EACnB;AAEA,MAAI,SAAS,OAAO,SAAS,GAAG;AAC9B,SAAK,SAAS,SAAS;AAAA,EACzB;AAEA,MAAI,SAAS,WAAW;AACtB,SAAK,YAAY,SAAS;AAAA,EAC5B;AAEA,OAAK,UAAU,SAAS;AAExB,MAAI,SAAS,aAAa;AACxB,SAAK,cAAc,SAAS;AAAA,EAC9B;AAEA,OAAK,OAAO;AAEZ,QAAM,UAAUA,MAAK,KAAK,MAAM;AAAA,IAC9B,aAAa;AAAA,IACb,aAAa;AAAA,IACb,WAAW;AAAA,IACX,UAAU;AAAA,EACZ,CAAC;AAED,SAAO;AAAA,EAAQ,OAAO;AACxB;AAEO,SAAS,kBACd,MACA,UACA,iBACA,MACQ;AACR,QAAM,WAAW,iBAAiB,SAAS,KAAK;AAChD,QAAM,WAAWD,MAAK,MAAM,QAAQ;AACpC,QAAM,cAAc,iBAAiB,UAAU,IAAI;AACnD,QAAM,cAAc,GAAG,WAAW;AAAA;AAAA,EAAO,eAAe;AAAA;AAExD,gBAAc,UAAU,aAAa,OAAO;AAE5C,SAAO;AACT;;;ANhDA,IAAM,cAAcE,MAAKC,SAAQ,GAAG,WAAW,eAAe,aAAa;AAE3E,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,aAAa,EAClB;AAAA,EACC;AACF,EACC,QAAQ,OAAO;AAElB,QACG,QAAQ,OAAO,EACf,YAAY,mCAAmC,EAC/C,SAAS,SAAS,cAAc,EAChC,OAAO,iBAAiB,uBAAuB,EAC/C,OAAO,YAAY,4BAA4B,EAC/C,OAAO,oBAAoB,yBAAyB,EACpD,OAAO,uBAAuB,sBAAsB,QAAQ,EAC5D,OAAO,gBAAgB,wBAAwB,CAAC,KAAa,QAAkB;AAC9E,MAAI,KAAK,GAAG;AACZ,SAAO;AACT,GAAG,CAAC,CAAa,EAChB;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,kBAAkB,0BAA0B,EACnD,OAAO,aAAa,oCAAoC,EACxD,OAAO,qBAAqB,6BAA6B,EACzD,OAAO,oBAAoB,4BAA4B,EACvD,OAAO,oBAAoB,6BAA6B,EACxD,OAAO,SAAS,uDAAuD,EACvE,OAAO,kBAAkB,iDAAiD,EAC1E,OAAO,OAAO,KAAa,YAAqC;AAC/D,MAAI;AACF,UAAM,aAAaC,YAAW,WAAW,IAAI,cAAc;AAC3D,UAAM,SAAS;AAAA,MACb;AAAA,QACE,MAAM,QAAQ;AAAA,QACd,MAAM,QAAQ;AAAA,QACd,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,QAAQ,QAAQ;AAAA,QAChB,UAAU,QAAQ;AAAA,QAClB,OAAO,QAAQ;AAAA,QACf,WAAW,QAAQ;AAAA,QACnB,QAAQ,QAAQ;AAAA,QAChB,aAAa,QAAQ;AAAA,QACrB,YAAY,QAAQ;AAAA,QACpB,YAAY,QAAQ;AAAA,QACpB,KAAK,QAAQ;AAAA,MACf;AAAA,MACA;AAAA,IACF;AAEA,QAAI,OAAO,OAAO,OAAO,UAAU;AACjC,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAGA,QAAI,CAAC,OAAO,UAAU,CAACA,YAAW,OAAO,IAAI,GAAG;AAC9C,YAAM,IAAI,MAAM,yCAAyC,OAAO,IAAI,EAAE;AAAA,IACxE;AAEA,UAAM,cAAc,cAAc;AAClC,UAAM,cAAc,MAAM,UAAU,KAAK,QAAQ,WAAW;AAE5D,QAAI;AACJ,QAAI;AAEJ,QAAI,YAAY,SAAS,OAAO;AAC9B,UAAI,OAAO,UAAU;AACnB,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AACA,UAAI,OAAO,KAAK;AACd,cAAM,IAAI,MAAM,qCAAqC;AAAA,MACvD;AACA,YAAM,EAAE,qBAAqB,IAAI,MAAM,OAAO,6BAAoB;AAClE,YAAM,YAAY,MAAM;AAAA,QACtB,YAAY;AAAA,QACZ,YAAY;AAAA,MACd;AACA,iBAAW,UAAU;AACrB,iBAAW,UAAU;AAAA,IACvB,WAAW,OAAO,UAAU;AAE1B,iBAAW,gBAAgB,YAAY,UAAU,YAAY,QAAQ;AACrE,iBAAW,kBAAkB,YAAY,IAAI;AAAA,IAC/C,WAAW,OAAO,KAAK;AAErB,iBAAW,gBAAgB,YAAY,UAAU,YAAY,QAAQ;AACrE,iBAAW,kBAAkB,YAAY,QAAQ;AAAA,IACnD,OAAO;AACL,YAAM,SAAS,QAAQ,YAAY,MAAM,YAAY,QAAQ;AAC7D,iBAAW,OAAO;AAClB,iBAAW,kBAAkB,OAAO,OAAO;AAAA,IAC7C;AAEA,QAAI,OAAO,UAAU,MAAM;AACzB,iBAAW,EAAE,GAAG,UAAU,OAAO,OAAO,MAAM;AAAA,IAChD;AAEA,QAAI,OAAO,QAAQ;AACjB,YAAM,cAAc,iBAAiB,UAAU,OAAO,IAAI;AAC1D,cAAQ,OAAO,MAAM,GAAG,WAAW;AAAA;AAAA,EAAO,QAAQ;AAAA,CAAI;AAAA,IACxD,OAAO;AACL,YAAM,WAAW;AAAA,QACf,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA,OAAO;AAAA,MACT;AACA,cAAQ,MAAM,UAAU,QAAQ,EAAE;AAAA,IACpC;AAAA,EACF,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,YAAQ,MAAM,UAAU,OAAO,EAAE;AACjC,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,OAAO,EACf,YAAY,kCAAkC,EAC9C,SAAS,SAAS,cAAc,EAChC,OAAO,uBAAuB,4BAA4B,QAAQ,EAClE,OAAO,OAAO,KAAa,YAAqC;AAC/D,QAAM,EAAE,UAAAC,UAAS,IAAI,MAAM,OAAO,YAAY;AAC9C,QAAM,cAAc,cAAc;AAClC,mBAAiB,WAAW;AAE5B,QAAM,aAAc,QAAQ,WAAkC;AAC9D,QAAM,UAAU,MAAMA,UAAS,OAAO,EAAE,UAAU,MAAM,CAAC;AAEzD,MAAI;AACF,UAAM,UAAU,MAAM,QAAQ,WAAW;AACzC,UAAM,OAAO,MAAM,QAAQ,QAAQ;AAEnC,UAAM,KAAK,KAAK,KAAK,EAAE,WAAW,eAAe,SAAS,aAAa,IAAK,CAAC;AAE7E,YAAQ,MAAM,yEAAyE;AAEvF,YAAQ,MAAM,OAAO;AACrB,UAAM,KAAK,QAAQ,OAAO,MAAM;AAChC,YAAQ,MAAM,MAAM;AACpB,YAAQ,MAAM,MAAM;AAEpB,UAAM,cAAc,eAAe,KAAK,WAAW;AACnD,UAAM,QAAQ,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,YAAQ,MAAM,kBAAkB,WAAW,EAAE;AAAA,EAC/C,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,YAAQ,MAAM,UAAU,OAAO,EAAE;AACjC,YAAQ,KAAK,CAAC;AAAA,EAChB,UAAE;AACA,UAAM,QAAQ,MAAM;AAAA,EACtB;AACF,CAAC;AAEH,QAAQ,MAAM;","names":["existsSync","homedir","join","homedir","join","yaml","join","homedir","existsSync","chromium"]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts","../src/config.ts","../src/types.ts","../src/fetcher.ts","../src/session.ts","../src/extractor.ts","../src/converter.ts","../src/writer.ts"],"sourcesContent":["import { Command } from \"commander\";\nimport { once } from \"node:events\";\nimport { existsSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { join } from \"node:path\";\nimport { resolveConfig, resolveProxy, parseFields } from \"./config.js\";\nimport { fetchPage } from \"./fetcher.js\";\nimport { extract, extractMetadata } from \"./extractor.js\";\nimport { convertToMarkdown } from \"./converter.js\";\nimport { writeMarkdownFile, buildFrontmatter } from \"./writer.js\";\nimport {\n getSessionDir,\n getSessionPath,\n ensureSessionDir,\n} from \"./session.js\";\nimport type { Metadata, WaitUntilOption } from \"./types.js\";\n\nconst CONFIG_PATH = join(homedir(), \".config\", \"vault-fetch\", \"config.yaml\");\n\nconst program = new Command();\n\nprogram\n .name(\"vault-fetch\")\n .description(\n \"Fetch JS-rendered web pages and save as Markdown to Obsidian Vault\",\n )\n .version(\"0.4.0\");\n\nprogram\n .command(\"fetch\")\n .description(\"Fetch a page and save as Markdown\")\n .argument(\"<url>\", \"URL to fetch\")\n .option(\"--dest <path>\", \"Destination directory\")\n .option(\"--headed\", \"Run browser in headed mode\")\n .option(\"--selector <css>\", \"CSS selector to extract\")\n .option(\"--timeout <seconds>\", \"Timeout in seconds\", parseInt)\n .option(\"--tag <name>\", \"Add tag (repeatable)\", (val: string, acc: string[]) => {\n acc.push(val);\n return acc;\n }, [] as string[])\n .option(\n \"--wait-until <event>\",\n \"Wait condition: load, domcontentloaded, networkidle\",\n )\n .option(\"--skip-session\", \"Do not use saved session\")\n .option(\"--dry-run\", \"Output to stdout instead of saving\")\n .option(\"--no-block-images\", \"Do not block image requests\")\n .option(\"--no-block-fonts\", \"Do not block font requests\")\n .option(\"--no-block-media\", \"Do not block media requests\")\n .option(\"--raw\", \"Convert full page HTML without Readability extraction\")\n .option(\"--title <text>\", \"Override the page title for the output filename\")\n .option(\n \"--field <kv>\",\n \"Add custom frontmatter field key=value (repeatable)\",\n (val: string, acc: string[]) => {\n acc.push(val);\n return acc;\n },\n [] as string[],\n )\n .option(\"--proxy <url>\", \"HTTP/HTTPS proxy URL (e.g. http://host:port)\")\n .action(async (url: string, options: Record<string, unknown>) => {\n try {\n const configPath = existsSync(CONFIG_PATH) ? CONFIG_PATH : undefined;\n const config = resolveConfig(\n {\n dest: options.dest as string | undefined,\n tags: options.tag as string[] | undefined,\n timeout: options.timeout as number | undefined,\n waitUntil: options.waitUntil as WaitUntilOption | undefined,\n headed: options.headed as boolean | undefined,\n selector: options.selector as string | undefined,\n title: options.title as string | undefined,\n noSession: options.skipSession as boolean | undefined,\n dryRun: options.dryRun as boolean | undefined,\n blockImages: options.blockImages as boolean | undefined,\n blockFonts: options.blockFonts as boolean | undefined,\n blockMedia: options.blockMedia as boolean | undefined,\n raw: options.raw as boolean | undefined,\n proxy: options.proxy as string | undefined,\n fields: parseFields(options.field as string[]),\n },\n configPath,\n );\n\n if (config.raw && config.selector) {\n throw new Error(\"--raw and --selector cannot be used together.\");\n }\n\n // Validate dest directory exists\n if (!config.dryRun && !existsSync(config.dest)) {\n throw new Error(`Destination directory does not exist: ${config.dest}`);\n }\n\n const sessionsDir = getSessionDir();\n const fetchResult = await fetchPage(url, config, sessionsDir);\n\n let markdown: string;\n let metadata: Metadata;\n\n if (fetchResult.kind === \"pdf\") {\n if (config.selector) {\n throw new Error(\"--selector cannot be used with PDF URLs.\");\n }\n if (config.raw) {\n throw new Error(\"--raw cannot be used with PDF URLs.\");\n }\n const { convertPdfToMarkdown } = await import(\"./pdf-converter.js\");\n const pdfResult = await convertPdfToMarkdown(\n fetchResult.pdfBuffer,\n fetchResult.finalUrl,\n );\n markdown = pdfResult.markdown;\n metadata = pdfResult.metadata;\n } else if (config.selector) {\n // --selector mode: skip Readability, extract metadata from full page\n metadata = extractMetadata(fetchResult.fullHtml, fetchResult.finalUrl);\n markdown = convertToMarkdown(fetchResult.html);\n } else if (config.raw) {\n // --raw mode: skip Readability, convert full page HTML directly\n metadata = extractMetadata(fetchResult.fullHtml, fetchResult.finalUrl);\n markdown = convertToMarkdown(fetchResult.fullHtml);\n } else {\n const result = extract(fetchResult.html, fetchResult.finalUrl);\n metadata = result.metadata;\n markdown = convertToMarkdown(result.content);\n }\n\n if (config.title !== null) {\n metadata = { ...metadata, title: config.title };\n }\n\n if (config.dryRun) {\n const frontmatter = buildFrontmatter(metadata, config.tags, config.fields);\n process.stdout.write(`${frontmatter}\\n\\n${markdown}\\n`);\n } else {\n const filePath = writeMarkdownFile(\n config.dest,\n metadata,\n markdown,\n config.tags,\n config.fields,\n );\n console.error(`Saved: ${filePath}`);\n }\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n console.error(`Error: ${message}`);\n process.exit(1);\n }\n });\n\nprogram\n .command(\"login\")\n .description(\"Login to a site and save session\")\n .argument(\"<url>\", \"URL to login\")\n .option(\"--timeout <seconds>\", \"Login timeout in seconds\", parseInt)\n .option(\"--proxy <url>\", \"HTTP/HTTPS proxy URL (e.g. http://host:port)\")\n .action(async (url: string, options: Record<string, unknown>) => {\n const { launch } = await import(\"cloakbrowser\");\n const sessionsDir = getSessionDir();\n ensureSessionDir(sessionsDir);\n\n const timeoutSec = (options.timeout as number | undefined) ?? 300;\n const proxyUrl = resolveProxy(\n options.proxy as string | undefined,\n process.env.VAULT_FETCH_PROXY,\n );\n const browser = await launch({\n headless: false,\n ...(proxyUrl !== null && { proxy: proxyUrl }),\n });\n\n try {\n const context = await browser.newContext();\n const page = await context.newPage();\n\n await page.goto(url, { waitUntil: \"networkidle\", timeout: timeoutSec * 1000 });\n\n console.error(\"Browser opened. Log in manually, then press Enter here to save session.\");\n\n process.stdin.resume();\n await once(process.stdin, \"data\");\n process.stdin.pause();\n process.stdin.unref();\n\n const sessionPath = getSessionPath(url, sessionsDir);\n await context.storageState({ path: sessionPath });\n console.error(`Session saved: ${sessionPath}`);\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n console.error(`Error: ${message}`);\n process.exit(1);\n } finally {\n await browser.close();\n }\n });\n\nprogram.parse();\n","import { readFileSync } from \"node:fs\";\nimport { homedir } from \"node:os\";\nimport { resolve } from \"node:path\";\nimport yaml from \"js-yaml\";\nimport { RESERVED_FRONTMATTER_KEYS } from \"./types.js\";\nimport type { ResolvedConfig, WaitUntilOption } from \"./types.js\";\n\nexport { RESERVED_FRONTMATTER_KEYS };\n\nexport function parseFields(raw: string[]): Record<string, unknown> {\n const result: Record<string, unknown> = {};\n for (const entry of raw) {\n const eq = entry.indexOf(\"=\");\n if (eq === -1) {\n throw new Error(`Invalid --field \"${entry}\": expected key=value`);\n }\n const key = entry.slice(0, eq).trim();\n const rawValue = entry.slice(eq + 1);\n if (key.length === 0) {\n throw new Error(`Invalid --field \"${entry}\": key must not be empty`);\n }\n if (\n (RESERVED_FRONTMATTER_KEYS as readonly string[]).includes(key)\n ) {\n throw new Error(\n `--field key \"${key}\" is reserved. title/tags can be set via --title/--tag; ` +\n `the others (source, author, published, created, description) are extracted automatically.`,\n );\n }\n if (key === \"__proto__\" || key === \"constructor\" || key === \"prototype\") {\n throw new Error(`--field key \"${key}\" is not allowed`);\n }\n result[key] = yaml.load(rawValue);\n }\n return result;\n}\n\nconst DEFAULT_TIMEOUT = 30;\nconst DEFAULT_WAIT_UNTIL: WaitUntilOption = \"networkidle\";\nconst REQUIRED_TAG = \"clippings\";\n\ninterface FileConfig {\n dest?: string;\n tags?: string[];\n timeout?: number;\n waitUntil?: WaitUntilOption;\n}\n\ninterface CliOptions {\n dest?: string;\n tags?: string[];\n timeout?: number;\n waitUntil?: WaitUntilOption;\n headed?: boolean;\n selector?: string;\n title?: string;\n noSession?: boolean;\n dryRun?: boolean;\n blockImages?: boolean;\n blockFonts?: boolean;\n blockMedia?: boolean;\n raw?: boolean;\n proxy?: string;\n fields?: Record<string, unknown>;\n}\n\nfunction expandTilde(filePath: string): string {\n if (filePath.startsWith(\"~/\")) {\n return resolve(homedir(), filePath.slice(2));\n }\n return filePath;\n}\n\nconst SUPPORTED_PROXY_SCHEMES = [\"http:\", \"https:\"] as const;\n\nfunction sanitizeProxyUrl(proxy: string): string {\n try {\n const parsed = new URL(proxy);\n if (parsed.username || parsed.password) {\n parsed.username = \"***\";\n parsed.password = \"***\";\n }\n return parsed.toString();\n } catch {\n return \"<invalid URL>\";\n }\n}\n\nfunction validateProxyUrl(proxy: string): void {\n let parsed: URL;\n try {\n parsed = new URL(proxy);\n } catch {\n throw new Error(`Invalid proxy URL: ${sanitizeProxyUrl(proxy)}`);\n }\n const scheme = parsed.protocol;\n if (\n !SUPPORTED_PROXY_SCHEMES.includes(\n scheme as (typeof SUPPORTED_PROXY_SCHEMES)[number],\n )\n ) {\n throw new Error(\n `Unsupported proxy scheme: \"${scheme}\" in ${sanitizeProxyUrl(proxy)}. Only HTTP and HTTPS proxies are supported.`,\n );\n }\n}\n\nconst VALID_WAIT_UNTIL: readonly string[] = [\"load\", \"domcontentloaded\", \"networkidle\"];\n\nfunction validateWaitUntil(value: string): WaitUntilOption {\n if (!VALID_WAIT_UNTIL.includes(value)) {\n throw new Error(\n `Invalid waitUntil value: \"${value}\". Must be one of: ${VALID_WAIT_UNTIL.join(\", \")}`,\n );\n }\n return value as WaitUntilOption;\n}\n\nfunction loadConfigFile(configPath: string): FileConfig {\n const content = readFileSync(configPath, \"utf-8\");\n const parsed = yaml.load(content);\n if (parsed === null || typeof parsed !== \"object\") {\n throw new Error(`Invalid config file: ${configPath}`);\n }\n const config = parsed as Record<string, unknown>;\n\n if (config.timeout !== undefined && typeof config.timeout !== \"number\") {\n throw new Error(`Invalid timeout in config file: expected number, got ${typeof config.timeout}`);\n }\n if (config.dest !== undefined && typeof config.dest !== \"string\") {\n throw new Error(`Invalid dest in config file: expected string, got ${typeof config.dest}`);\n }\n if (config.waitUntil !== undefined) {\n if (typeof config.waitUntil !== \"string\") {\n throw new Error(`Invalid waitUntil in config file: expected string, got ${typeof config.waitUntil}`);\n }\n validateWaitUntil(config.waitUntil);\n }\n if (config.tags !== undefined) {\n if (!Array.isArray(config.tags) || !config.tags.every((t: unknown) => typeof t === \"string\")) {\n throw new Error(\"Invalid tags in config file: expected array of strings\");\n }\n }\n\n return config as FileConfig;\n}\n\nexport function resolveProxy(\n cliProxy: string | undefined,\n envProxy: string | undefined,\n): string | null {\n const proxy = cliProxy ?? envProxy ?? null;\n if (proxy !== null) {\n validateProxyUrl(proxy);\n }\n return proxy;\n}\n\nexport function resolveConfig(\n cliOptions: CliOptions,\n configPath: string | undefined,\n): ResolvedConfig {\n // Layer 1: Config file\n let fileConfig: FileConfig = {};\n if (configPath) {\n fileConfig = loadConfigFile(configPath);\n }\n\n // Layer 2: Environment variables\n const envDest = process.env.VAULT_FETCH_DEST;\n const envTimeout = process.env.VAULT_FETCH_TIMEOUT;\n const envProxy = process.env.VAULT_FETCH_PROXY;\n\n // Resolve each field: CLI > env > file > default\n const dest = cliOptions.dest ?? envDest ?? fileConfig.dest;\n if (dest === undefined) {\n throw new Error(\n \"dest is required. Set via --dest, VAULT_FETCH_DEST, or config file.\",\n );\n }\n\n let timeout: number;\n if (cliOptions.timeout !== undefined) {\n timeout = cliOptions.timeout;\n } else if (envTimeout !== undefined) {\n const parsed = Number(envTimeout);\n if (Number.isNaN(parsed)) {\n throw new Error(`Invalid VAULT_FETCH_TIMEOUT value: ${envTimeout}`);\n }\n timeout = parsed;\n } else {\n timeout = fileConfig.timeout ?? DEFAULT_TIMEOUT;\n }\n\n const rawWaitUntil = cliOptions.waitUntil ?? fileConfig.waitUntil ?? DEFAULT_WAIT_UNTIL;\n const waitUntil = validateWaitUntil(rawWaitUntil);\n\n // Merge tags: file tags + CLI tags + always clippings\n const allTags = [\n ...(fileConfig.tags ?? []),\n ...(cliOptions.tags ?? []),\n REQUIRED_TAG,\n ];\n const tags = [...new Set(allTags)];\n\n return {\n dest: expandTilde(dest),\n tags,\n timeout,\n waitUntil,\n headed: cliOptions.headed ?? false,\n selector: cliOptions.selector ?? null,\n title: cliOptions.title ?? null,\n noSession: cliOptions.noSession ?? false,\n dryRun: cliOptions.dryRun ?? false,\n blockImages: cliOptions.blockImages ?? true,\n blockFonts: cliOptions.blockFonts ?? true,\n blockMedia: cliOptions.blockMedia ?? true,\n raw: cliOptions.raw ?? false,\n proxy: resolveProxy(cliOptions.proxy, envProxy),\n fields: cliOptions.fields ?? {},\n };\n}\n","export const RESERVED_FRONTMATTER_KEYS = [\n \"title\",\n \"source\",\n \"author\",\n \"published\",\n \"created\",\n \"description\",\n \"tags\",\n] as const;\n\nexport interface Metadata {\n title: string;\n source: string;\n author: string[];\n published: string | null;\n created: string;\n description: string | null;\n}\n\nexport interface HtmlFetchResult {\n kind: \"html\";\n html: string;\n fullHtml: string;\n url: string;\n finalUrl: string;\n}\n\nexport interface PdfFetchResult {\n kind: \"pdf\";\n pdfBuffer: Buffer;\n url: string;\n finalUrl: string;\n}\n\nexport type FetchResult = HtmlFetchResult | PdfFetchResult;\n\nexport type WaitUntilOption = \"load\" | \"domcontentloaded\" | \"networkidle\";\n\nexport interface ResolvedConfig {\n dest: string;\n tags: string[];\n timeout: number;\n waitUntil: WaitUntilOption;\n headed: boolean;\n selector: string | null;\n title: string | null;\n noSession: boolean;\n dryRun: boolean;\n blockImages: boolean;\n blockFonts: boolean;\n blockMedia: boolean;\n raw: boolean;\n proxy: string | null;\n fields: Record<string, unknown>;\n}\n","import { launch } from \"cloakbrowser\";\nimport type { BrowserContext } from \"playwright-core\";\nimport type { FetchResult, ResolvedConfig } from \"./types.js\";\nimport { getSessionPath, sessionExists } from \"./session.js\";\n\ninterface BlockingOptions {\n blockImages: boolean;\n blockFonts: boolean;\n blockMedia: boolean;\n}\n\nexport function isPdfContentType(contentType: string): boolean {\n return contentType.toLowerCase().includes(\"application/pdf\");\n}\n\nexport function buildBlockedResourceTypes(options: BlockingOptions): Set<string> {\n const blocked = new Set<string>();\n if (options.blockImages) blocked.add(\"image\");\n if (options.blockFonts) blocked.add(\"font\");\n if (options.blockMedia) blocked.add(\"media\");\n return blocked;\n}\n\nconst PDF_MAGIC_BYTES = \"%PDF\";\n\nexport function validatePdfBuffer(pdfBuffer: Buffer, sourceUrl: string): void {\n if (pdfBuffer.length === 0) {\n throw new Error(`Empty PDF response received from ${sourceUrl}`);\n }\n const header = pdfBuffer.subarray(0, PDF_MAGIC_BYTES.length).toString(\"ascii\");\n if (!header.startsWith(PDF_MAGIC_BYTES)) {\n throw new Error(\n `Response Content-Type is application/pdf but body is not valid PDF data from ${sourceUrl}`,\n );\n }\n}\n\nasync function downloadPdf(\n context: BrowserContext,\n url: string,\n timeoutMs: number,\n): Promise<{ pdfBuffer: Buffer; finalUrl: string }> {\n const apiResponse = await context.request.get(url, { timeout: timeoutMs });\n const status = apiResponse.status();\n if (status >= 400) {\n throw new Error(`HTTP ${status} received when downloading PDF from ${url}`);\n }\n const pdfBuffer = Buffer.from(await apiResponse.body());\n const finalUrl = apiResponse.url();\n validatePdfBuffer(pdfBuffer, finalUrl);\n return { pdfBuffer, finalUrl };\n}\n\nexport async function fetchPage(\n url: string,\n config: ResolvedConfig,\n sessionsDir: string,\n): Promise<FetchResult> {\n const browser = await launch({\n headless: !config.headed,\n ...(config.proxy !== null && { proxy: config.proxy }),\n });\n\n try {\n const contextOptions: Parameters<typeof browser.newContext>[0] = {};\n\n // Load session if available and not disabled\n if (!config.noSession && sessionExists(url, sessionsDir)) {\n const sessionPath = getSessionPath(url, sessionsDir);\n contextOptions.storageState = sessionPath;\n }\n\n const context: BrowserContext = await browser.newContext(contextOptions);\n const page = await context.newPage();\n\n // Block specified resource types for faster loading\n const blockedTypes = buildBlockedResourceTypes(config);\n if (blockedTypes.size > 0) {\n await page.route(\"**/*\", async (route) => {\n if (blockedTypes.has(route.request().resourceType())) {\n await route.abort();\n } else {\n await route.continue();\n }\n });\n }\n\n const timeoutMs = config.timeout * 1000;\n\n // page.goto throws \"Download is starting\" when the server returns\n // Content-Disposition: attachment (common for PDF downloads).\n // Catch this and download the PDF via the context's HTTP client.\n let response;\n try {\n response = await page.goto(url, {\n waitUntil: config.waitUntil,\n timeout: timeoutMs,\n });\n } catch (error) {\n if (\n error instanceof Error &&\n error.message.includes(\"Download is starting\")\n ) {\n const result = await downloadPdf(context, url, timeoutMs);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: result.pdfBuffer, url, finalUrl: result.finalUrl };\n }\n throw error;\n }\n\n if (!response) {\n throw new Error(`No response received from ${url}`);\n }\n\n const status = response.status();\n if (status >= 400) {\n throw new Error(`HTTP ${status} received from ${response.url()}`);\n }\n\n const finalUrl = response.url();\n const contentType = response.headers()[\"content-type\"] ?? \"\";\n\n // Inline PDF (Content-Disposition: inline or absent).\n // Try response.body() first; fall back to context.request if the\n // browser returned its PDF viewer HTML instead of the actual bytes.\n if (isPdfContentType(contentType)) {\n const body = await response.body();\n try {\n validatePdfBuffer(body, finalUrl);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: body, url, finalUrl };\n } catch {\n // response.body() returned PDF viewer HTML; re-download via API\n const result = await downloadPdf(context, finalUrl, timeoutMs);\n await context.close();\n return { kind: \"pdf\", pdfBuffer: result.pdfBuffer, url, finalUrl: result.finalUrl };\n }\n }\n\n const fullHtml = await page.content();\n let html: string;\n\n if (config.selector) {\n const element = await page.$(config.selector);\n if (!element) {\n throw new Error(`Selector not found: ${config.selector}`);\n }\n html = await element.innerHTML();\n } else {\n html = fullHtml;\n }\n\n await context.close();\n\n return { kind: \"html\", html, fullHtml, url, finalUrl };\n } finally {\n await browser.close();\n }\n}\n","import { existsSync, mkdirSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { homedir } from \"node:os\";\n\nconst CONFIG_DIR = join(homedir(), \".config\", \"vault-fetch\");\nconst SESSIONS_DIR = join(CONFIG_DIR, \"sessions\");\n\nexport function getSessionDir(): string {\n return SESSIONS_DIR;\n}\n\nfunction extractDomain(url: string): string {\n const parsed = new URL(url);\n return parsed.hostname ?? \"\";\n}\n\nexport function getSessionPath(url: string, sessionsDir: string): string {\n const domain = extractDomain(url);\n return join(sessionsDir, `${domain}.json`);\n}\n\nexport function sessionExists(url: string, sessionsDir: string): boolean {\n const sessionPath = getSessionPath(url, sessionsDir);\n return existsSync(sessionPath);\n}\n\nexport function ensureSessionDir(sessionsDir: string): void {\n if (!existsSync(sessionsDir)) {\n mkdirSync(sessionsDir, { recursive: true });\n }\n}\n","import { Readability } from \"@mozilla/readability\";\nimport { JSDOM } from \"jsdom\";\nimport type { Metadata } from \"./types.js\";\n\nfunction getMetaContent(doc: Document, selector: string): string | null {\n const el = doc.querySelector(selector);\n return el?.getAttribute(\"content\") ?? null;\n}\n\nfunction formatAuthor(raw: string): string {\n return `[[${raw.trim()}]]`;\n}\n\nfunction extractPublishedDate(doc: Document): string | null {\n const published =\n getMetaContent(doc, 'meta[property=\"article:published_time\"]') ??\n getMetaContent(doc, 'meta[name=\"datePublished\"]');\n\n if (!published) {\n const jsonLd = doc.querySelector('script[type=\"application/ld+json\"]');\n if (jsonLd?.textContent) {\n try {\n const data = JSON.parse(jsonLd.textContent) as Record<string, unknown>;\n if (typeof data.datePublished === \"string\") {\n return data.datePublished.split(\"T\")[0];\n }\n } catch {\n // JSON-LD parse failed\n }\n }\n return null;\n }\n\n return published.split(\"T\")[0];\n}\n\nfunction extractAuthors(\n doc: Document,\n readabilityByline: string | null,\n): string[] {\n const articleAuthors = doc.querySelectorAll('meta[property=\"article:author\"]');\n if (articleAuthors.length > 0) {\n return Array.from(articleAuthors)\n .map((el) => el.getAttribute(\"content\"))\n .filter((v): v is string => v !== null)\n .map(formatAuthor);\n }\n\n const ogAuthor = getMetaContent(doc, 'meta[property=\"og:author\"]');\n if (ogAuthor) {\n return [formatAuthor(ogAuthor)];\n }\n\n if (readabilityByline) {\n return [formatAuthor(readabilityByline)];\n }\n\n return [];\n}\n\nexport interface ExtractResult {\n metadata: Metadata;\n content: string;\n}\n\ninterface ReadabilityArticle {\n title: string;\n byline: string | null;\n excerpt: string;\n content: string;\n}\n\nfunction buildMetadata(\n doc: Document,\n article: ReadabilityArticle | null,\n finalUrl: string,\n): Metadata {\n const title = article?.title ?? doc.title;\n const authors = extractAuthors(doc, article?.byline ?? null);\n const published = extractPublishedDate(doc);\n\n const description =\n getMetaContent(doc, 'meta[property=\"og:description\"]') ??\n getMetaContent(doc, 'meta[name=\"description\"]') ??\n (article?.excerpt ?? null);\n\n const today = new Date().toISOString().split(\"T\")[0];\n\n return {\n title,\n source: finalUrl,\n author: authors,\n published,\n created: today,\n description,\n };\n}\n\nfunction parseWithReadability(html: string, url: string): ReadabilityArticle | null {\n const dom = new JSDOM(html, { url });\n const reader = new Readability(dom.window.document);\n return reader.parse() as ReadabilityArticle | null;\n}\n\nexport function extract(html: string, finalUrl: string): ExtractResult {\n const metaDom = new JSDOM(html, { url: finalUrl });\n const doc = metaDom.window.document;\n\n const article = parseWithReadability(html, finalUrl);\n\n if (!article) {\n throw new Error(\n \"Readability failed to extract content from the page. \" +\n \"Try --raw to convert the full page, or --selector <css> to target specific content.\",\n );\n }\n\n if (!article.content) {\n throw new Error(\"Readability returned empty content for the page\");\n }\n\n return {\n metadata: buildMetadata(doc, article, finalUrl),\n content: article.content,\n };\n}\n\nexport function extractMetadata(html: string, finalUrl: string): Metadata {\n const metaDom = new JSDOM(html, { url: finalUrl });\n const doc = metaDom.window.document;\n\n const article = parseWithReadability(html, finalUrl);\n\n return buildMetadata(doc, article, finalUrl);\n}\n","import TurndownService from \"turndown\";\n\nexport function convertToMarkdown(html: string): string {\n const turndown = new TurndownService({\n headingStyle: \"atx\",\n codeBlockStyle: \"fenced\",\n bulletListMarker: \"-\",\n });\n\n return turndown.turndown(html);\n}\n","import { writeFileSync, readFileSync, existsSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport yaml from \"js-yaml\";\nimport type { Metadata } from \"./types.js\";\n\nconst UNSAFE_CHARS = /[/\\\\:*?\"<>|]/g;\nconst CONTROL_CHARS = /[\\x00-\\x1f\\x7f]/g;\nconst MAX_FILENAME_LENGTH = 200;\n\nexport function sanitizeFilename(title: string): string {\n const sanitized = title\n .replace(CONTROL_CHARS, \"\")\n .replace(UNSAFE_CHARS, \"\")\n .replace(/\\s+/g, \" \")\n .trim();\n const base = sanitized.slice(0, MAX_FILENAME_LENGTH) || \"Untitled\";\n return `${base}.md`;\n}\n\nexport function buildFrontmatter(\n metadata: Metadata,\n tags: string[],\n fields: Record<string, unknown> = {},\n): string {\n // Fixed-schema keys: these MUST stay in sync with RESERVED_FRONTMATTER_KEYS in src/types.ts\n // (title, source, author, published, created, description, tags)\n const data: Record<string, unknown> = {\n title: metadata.title,\n source: metadata.source,\n };\n\n if (metadata.author.length > 0) {\n data.author = metadata.author;\n }\n\n if (metadata.published) {\n data.published = metadata.published;\n }\n\n data.created = metadata.created;\n\n if (metadata.description) {\n data.description = metadata.description;\n }\n\n data.tags = tags;\n\n for (const [key, value] of Object.entries(fields)) {\n data[key] = value;\n }\n\n const yamlStr = yaml.dump(data, {\n quotingType: '\"',\n forceQuotes: false,\n lineWidth: -1,\n sortKeys: false,\n });\n\n return `---\\n${yamlStr}---`;\n}\n\nfunction readSource(filePath: string): string | null {\n if (!existsSync(filePath)) {\n return null;\n }\n const content = readFileSync(filePath, \"utf-8\");\n if (!content.startsWith(\"---\\n\")) {\n return null;\n }\n const end = content.indexOf(\"\\n---\", 4);\n if (end === -1) {\n return null;\n }\n const frontmatter = content.slice(4, end);\n let parsed: unknown;\n try {\n parsed = yaml.load(frontmatter);\n } catch {\n return null;\n }\n if (parsed === null || typeof parsed !== \"object\") {\n return null;\n }\n const source = (parsed as Record<string, unknown>).source;\n return typeof source === \"string\" ? source : null;\n}\n\nfunction resolveTargetPath(\n dest: string,\n baseFilename: string,\n source: string,\n): string {\n const ext = \".md\";\n const base = baseFilename.endsWith(ext)\n ? baseFilename.slice(0, -ext.length)\n : baseFilename;\n\n let candidate = join(dest, `${base}${ext}`);\n if (!existsSync(candidate)) {\n return candidate;\n }\n if (readSource(candidate) === source) {\n return candidate; // 同一ソース: 上書き\n }\n let n = 2;\n for (;;) {\n candidate = join(dest, `${base}-${n}${ext}`);\n if (!existsSync(candidate) || readSource(candidate) === source) {\n return candidate;\n }\n n += 1;\n }\n}\n\nexport function writeMarkdownFile(\n dest: string,\n metadata: Metadata,\n markdownContent: string,\n tags: string[],\n fields: Record<string, unknown> = {},\n): string {\n const filename = sanitizeFilename(metadata.title);\n const filePath = resolveTargetPath(dest, filename, metadata.source);\n const frontmatter = buildFrontmatter(metadata, tags, fields);\n const fullContent = `${frontmatter}\\n\\n${markdownContent}\\n`;\n\n writeFileSync(filePath, fullContent, \"utf-8\");\n\n return filePath;\n}\n"],"mappings":";;;AAAA,SAAS,eAAe;AACxB,SAAS,YAAY;AACrB,SAAS,cAAAA,mBAAkB;AAC3B,SAAS,WAAAC,gBAAe;AACxB,SAAS,QAAAC,aAAY;;;ACJrB,SAAS,oBAAoB;AAC7B,SAAS,eAAe;AACxB,SAAS,eAAe;AACxB,OAAO,UAAU;;;ACHV,IAAM,4BAA4B;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;;;ADCO,SAAS,YAAY,KAAwC;AAClE,QAAM,SAAkC,CAAC;AACzC,aAAW,SAAS,KAAK;AACvB,UAAM,KAAK,MAAM,QAAQ,GAAG;AAC5B,QAAI,OAAO,IAAI;AACb,YAAM,IAAI,MAAM,oBAAoB,KAAK,uBAAuB;AAAA,IAClE;AACA,UAAM,MAAM,MAAM,MAAM,GAAG,EAAE,EAAE,KAAK;AACpC,UAAM,WAAW,MAAM,MAAM,KAAK,CAAC;AACnC,QAAI,IAAI,WAAW,GAAG;AACpB,YAAM,IAAI,MAAM,oBAAoB,KAAK,0BAA0B;AAAA,IACrE;AACA,QACG,0BAAgD,SAAS,GAAG,GAC7D;AACA,YAAM,IAAI;AAAA,QACR,gBAAgB,GAAG;AAAA,MAErB;AAAA,IACF;AACA,QAAI,QAAQ,eAAe,QAAQ,iBAAiB,QAAQ,aAAa;AACvE,YAAM,IAAI,MAAM,gBAAgB,GAAG,kBAAkB;AAAA,IACvD;AACA,WAAO,GAAG,IAAI,KAAK,KAAK,QAAQ;AAAA,EAClC;AACA,SAAO;AACT;AAEA,IAAM,kBAAkB;AACxB,IAAM,qBAAsC;AAC5C,IAAM,eAAe;AA2BrB,SAAS,YAAY,UAA0B;AAC7C,MAAI,SAAS,WAAW,IAAI,GAAG;AAC7B,WAAO,QAAQ,QAAQ,GAAG,SAAS,MAAM,CAAC,CAAC;AAAA,EAC7C;AACA,SAAO;AACT;AAEA,IAAM,0BAA0B,CAAC,SAAS,QAAQ;AAElD,SAAS,iBAAiB,OAAuB;AAC/C,MAAI;AACF,UAAM,SAAS,IAAI,IAAI,KAAK;AAC5B,QAAI,OAAO,YAAY,OAAO,UAAU;AACtC,aAAO,WAAW;AAClB,aAAO,WAAW;AAAA,IACpB;AACA,WAAO,OAAO,SAAS;AAAA,EACzB,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,iBAAiB,OAAqB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,KAAK;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,sBAAsB,iBAAiB,KAAK,CAAC,EAAE;AAAA,EACjE;AACA,QAAM,SAAS,OAAO;AACtB,MACE,CAAC,wBAAwB;AAAA,IACvB;AAAA,EACF,GACA;AACA,UAAM,IAAI;AAAA,MACR,8BAA8B,MAAM,QAAQ,iBAAiB,KAAK,CAAC;AAAA,IACrE;AAAA,EACF;AACF;AAEA,IAAM,mBAAsC,CAAC,QAAQ,oBAAoB,aAAa;AAEtF,SAAS,kBAAkB,OAAgC;AACzD,MAAI,CAAC,iBAAiB,SAAS,KAAK,GAAG;AACrC,UAAM,IAAI;AAAA,MACR,6BAA6B,KAAK,sBAAsB,iBAAiB,KAAK,IAAI,CAAC;AAAA,IACrF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,eAAe,YAAgC;AACtD,QAAM,UAAU,aAAa,YAAY,OAAO;AAChD,QAAM,SAAS,KAAK,KAAK,OAAO;AAChC,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,UAAM,IAAI,MAAM,wBAAwB,UAAU,EAAE;AAAA,EACtD;AACA,QAAM,SAAS;AAEf,MAAI,OAAO,YAAY,UAAa,OAAO,OAAO,YAAY,UAAU;AACtE,UAAM,IAAI,MAAM,wDAAwD,OAAO,OAAO,OAAO,EAAE;AAAA,EACjG;AACA,MAAI,OAAO,SAAS,UAAa,OAAO,OAAO,SAAS,UAAU;AAChE,UAAM,IAAI,MAAM,qDAAqD,OAAO,OAAO,IAAI,EAAE;AAAA,EAC3F;AACA,MAAI,OAAO,cAAc,QAAW;AAClC,QAAI,OAAO,OAAO,cAAc,UAAU;AACxC,YAAM,IAAI,MAAM,0DAA0D,OAAO,OAAO,SAAS,EAAE;AAAA,IACrG;AACA,sBAAkB,OAAO,SAAS;AAAA,EACpC;AACA,MAAI,OAAO,SAAS,QAAW;AAC7B,QAAI,CAAC,MAAM,QAAQ,OAAO,IAAI,KAAK,CAAC,OAAO,KAAK,MAAM,CAAC,MAAe,OAAO,MAAM,QAAQ,GAAG;AAC5F,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,aACd,UACA,UACe;AACf,QAAM,QAAQ,YAAY,YAAY;AACtC,MAAI,UAAU,MAAM;AAClB,qBAAiB,KAAK;AAAA,EACxB;AACA,SAAO;AACT;AAEO,SAAS,cACd,YACA,YACgB;AAEhB,MAAI,aAAyB,CAAC;AAC9B,MAAI,YAAY;AACd,iBAAa,eAAe,UAAU;AAAA,EACxC;AAGA,QAAM,UAAU,QAAQ,IAAI;AAC5B,QAAM,aAAa,QAAQ,IAAI;AAC/B,QAAM,WAAW,QAAQ,IAAI;AAG7B,QAAM,OAAO,WAAW,QAAQ,WAAW,WAAW;AACtD,MAAI,SAAS,QAAW;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,MAAI;AACJ,MAAI,WAAW,YAAY,QAAW;AACpC,cAAU,WAAW;AAAA,EACvB,WAAW,eAAe,QAAW;AACnC,UAAM,SAAS,OAAO,UAAU;AAChC,QAAI,OAAO,MAAM,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,sCAAsC,UAAU,EAAE;AAAA,IACpE;AACA,cAAU;AAAA,EACZ,OAAO;AACL,cAAU,WAAW,WAAW;AAAA,EAClC;AAEA,QAAM,eAAe,WAAW,aAAa,WAAW,aAAa;AACrE,QAAM,YAAY,kBAAkB,YAAY;AAGhD,QAAM,UAAU;AAAA,IACd,GAAI,WAAW,QAAQ,CAAC;AAAA,IACxB,GAAI,WAAW,QAAQ,CAAC;AAAA,IACxB;AAAA,EACF;AACA,QAAM,OAAO,CAAC,GAAG,IAAI,IAAI,OAAO,CAAC;AAEjC,SAAO;AAAA,IACL,MAAM,YAAY,IAAI;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ,WAAW,UAAU;AAAA,IAC7B,UAAU,WAAW,YAAY;AAAA,IACjC,OAAO,WAAW,SAAS;AAAA,IAC3B,WAAW,WAAW,aAAa;AAAA,IACnC,QAAQ,WAAW,UAAU;AAAA,IAC7B,aAAa,WAAW,eAAe;AAAA,IACvC,YAAY,WAAW,cAAc;AAAA,IACrC,YAAY,WAAW,cAAc;AAAA,IACrC,KAAK,WAAW,OAAO;AAAA,IACvB,OAAO,aAAa,WAAW,OAAO,QAAQ;AAAA,IAC9C,QAAQ,WAAW,UAAU,CAAC;AAAA,EAChC;AACF;;;AE9NA,SAAS,cAAc;;;ACAvB,SAAS,YAAY,iBAAiB;AACtC,SAAS,YAAY;AACrB,SAAS,WAAAC,gBAAe;AAExB,IAAM,aAAa,KAAKA,SAAQ,GAAG,WAAW,aAAa;AAC3D,IAAM,eAAe,KAAK,YAAY,UAAU;AAEzC,SAAS,gBAAwB;AACtC,SAAO;AACT;AAEA,SAAS,cAAc,KAAqB;AAC1C,QAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,SAAO,OAAO,YAAY;AAC5B;AAEO,SAAS,eAAe,KAAa,aAA6B;AACvE,QAAM,SAAS,cAAc,GAAG;AAChC,SAAO,KAAK,aAAa,GAAG,MAAM,OAAO;AAC3C;AAEO,SAAS,cAAc,KAAa,aAA8B;AACvE,QAAM,cAAc,eAAe,KAAK,WAAW;AACnD,SAAO,WAAW,WAAW;AAC/B;AAEO,SAAS,iBAAiB,aAA2B;AAC1D,MAAI,CAAC,WAAW,WAAW,GAAG;AAC5B,cAAU,aAAa,EAAE,WAAW,KAAK,CAAC;AAAA,EAC5C;AACF;;;ADnBO,SAAS,iBAAiB,aAA8B;AAC7D,SAAO,YAAY,YAAY,EAAE,SAAS,iBAAiB;AAC7D;AAEO,SAAS,0BAA0B,SAAuC;AAC/E,QAAM,UAAU,oBAAI,IAAY;AAChC,MAAI,QAAQ,YAAa,SAAQ,IAAI,OAAO;AAC5C,MAAI,QAAQ,WAAY,SAAQ,IAAI,MAAM;AAC1C,MAAI,QAAQ,WAAY,SAAQ,IAAI,OAAO;AAC3C,SAAO;AACT;AAEA,IAAM,kBAAkB;AAEjB,SAAS,kBAAkB,WAAmB,WAAyB;AAC5E,MAAI,UAAU,WAAW,GAAG;AAC1B,UAAM,IAAI,MAAM,oCAAoC,SAAS,EAAE;AAAA,EACjE;AACA,QAAM,SAAS,UAAU,SAAS,GAAG,gBAAgB,MAAM,EAAE,SAAS,OAAO;AAC7E,MAAI,CAAC,OAAO,WAAW,eAAe,GAAG;AACvC,UAAM,IAAI;AAAA,MACR,gFAAgF,SAAS;AAAA,IAC3F;AAAA,EACF;AACF;AAEA,eAAe,YACb,SACA,KACA,WACkD;AAClD,QAAM,cAAc,MAAM,QAAQ,QAAQ,IAAI,KAAK,EAAE,SAAS,UAAU,CAAC;AACzE,QAAM,SAAS,YAAY,OAAO;AAClC,MAAI,UAAU,KAAK;AACjB,UAAM,IAAI,MAAM,QAAQ,MAAM,uCAAuC,GAAG,EAAE;AAAA,EAC5E;AACA,QAAM,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,CAAC;AACtD,QAAM,WAAW,YAAY,IAAI;AACjC,oBAAkB,WAAW,QAAQ;AACrC,SAAO,EAAE,WAAW,SAAS;AAC/B;AAEA,eAAsB,UACpB,KACA,QACA,aACsB;AACtB,QAAM,UAAU,MAAM,OAAO;AAAA,IAC3B,UAAU,CAAC,OAAO;AAAA,IAClB,GAAI,OAAO,UAAU,QAAQ,EAAE,OAAO,OAAO,MAAM;AAAA,EACrD,CAAC;AAED,MAAI;AACF,UAAM,iBAA2D,CAAC;AAGlE,QAAI,CAAC,OAAO,aAAa,cAAc,KAAK,WAAW,GAAG;AACxD,YAAM,cAAc,eAAe,KAAK,WAAW;AACnD,qBAAe,eAAe;AAAA,IAChC;AAEA,UAAM,UAA0B,MAAM,QAAQ,WAAW,cAAc;AACvE,UAAM,OAAO,MAAM,QAAQ,QAAQ;AAGnC,UAAM,eAAe,0BAA0B,MAAM;AACrD,QAAI,aAAa,OAAO,GAAG;AACzB,YAAM,KAAK,MAAM,QAAQ,OAAO,UAAU;AACxC,YAAI,aAAa,IAAI,MAAM,QAAQ,EAAE,aAAa,CAAC,GAAG;AACpD,gBAAM,MAAM,MAAM;AAAA,QACpB,OAAO;AACL,gBAAM,MAAM,SAAS;AAAA,QACvB;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,YAAY,OAAO,UAAU;AAKnC,QAAI;AACJ,QAAI;AACF,iBAAW,MAAM,KAAK,KAAK,KAAK;AAAA,QAC9B,WAAW,OAAO;AAAA,QAClB,SAAS;AAAA,MACX,CAAC;AAAA,IACH,SAAS,OAAO;AACd,UACE,iBAAiB,SACjB,MAAM,QAAQ,SAAS,sBAAsB,GAC7C;AACA,cAAM,SAAS,MAAM,YAAY,SAAS,KAAK,SAAS;AACxD,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,OAAO,WAAW,KAAK,UAAU,OAAO,SAAS;AAAA,MACpF;AACA,YAAM;AAAA,IACR;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,6BAA6B,GAAG,EAAE;AAAA,IACpD;AAEA,UAAM,SAAS,SAAS,OAAO;AAC/B,QAAI,UAAU,KAAK;AACjB,YAAM,IAAI,MAAM,QAAQ,MAAM,kBAAkB,SAAS,IAAI,CAAC,EAAE;AAAA,IAClE;AAEA,UAAM,WAAW,SAAS,IAAI;AAC9B,UAAM,cAAc,SAAS,QAAQ,EAAE,cAAc,KAAK;AAK1D,QAAI,iBAAiB,WAAW,GAAG;AACjC,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAI;AACF,0BAAkB,MAAM,QAAQ;AAChC,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,MAAM,KAAK,SAAS;AAAA,MACvD,QAAQ;AAEN,cAAM,SAAS,MAAM,YAAY,SAAS,UAAU,SAAS;AAC7D,cAAM,QAAQ,MAAM;AACpB,eAAO,EAAE,MAAM,OAAO,WAAW,OAAO,WAAW,KAAK,UAAU,OAAO,SAAS;AAAA,MACpF;AAAA,IACF;AAEA,UAAM,WAAW,MAAM,KAAK,QAAQ;AACpC,QAAI;AAEJ,QAAI,OAAO,UAAU;AACnB,YAAM,UAAU,MAAM,KAAK,EAAE,OAAO,QAAQ;AAC5C,UAAI,CAAC,SAAS;AACZ,cAAM,IAAI,MAAM,uBAAuB,OAAO,QAAQ,EAAE;AAAA,MAC1D;AACA,aAAO,MAAM,QAAQ,UAAU;AAAA,IACjC,OAAO;AACL,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,MAAM;AAEpB,WAAO,EAAE,MAAM,QAAQ,MAAM,UAAU,KAAK,SAAS;AAAA,EACvD,UAAE;AACA,UAAM,QAAQ,MAAM;AAAA,EACtB;AACF;;;AE9JA,SAAS,mBAAmB;AAC5B,SAAS,aAAa;AAGtB,SAAS,eAAe,KAAe,UAAiC;AACtE,QAAM,KAAK,IAAI,cAAc,QAAQ;AACrC,SAAO,IAAI,aAAa,SAAS,KAAK;AACxC;AAEA,SAAS,aAAa,KAAqB;AACzC,SAAO,KAAK,IAAI,KAAK,CAAC;AACxB;AAEA,SAAS,qBAAqB,KAA8B;AAC1D,QAAM,YACJ,eAAe,KAAK,yCAAyC,KAC7D,eAAe,KAAK,4BAA4B;AAElD,MAAI,CAAC,WAAW;AACd,UAAM,SAAS,IAAI,cAAc,oCAAoC;AACrE,QAAI,QAAQ,aAAa;AACvB,UAAI;AACF,cAAM,OAAO,KAAK,MAAM,OAAO,WAAW;AAC1C,YAAI,OAAO,KAAK,kBAAkB,UAAU;AAC1C,iBAAO,KAAK,cAAc,MAAM,GAAG,EAAE,CAAC;AAAA,QACxC;AAAA,MACF,QAAQ;AAAA,MAER;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO,UAAU,MAAM,GAAG,EAAE,CAAC;AAC/B;AAEA,SAAS,eACP,KACA,mBACU;AACV,QAAM,iBAAiB,IAAI,iBAAiB,iCAAiC;AAC7E,MAAI,eAAe,SAAS,GAAG;AAC7B,WAAO,MAAM,KAAK,cAAc,EAC7B,IAAI,CAAC,OAAO,GAAG,aAAa,SAAS,CAAC,EACtC,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,IAAI,YAAY;AAAA,EACrB;AAEA,QAAM,WAAW,eAAe,KAAK,4BAA4B;AACjE,MAAI,UAAU;AACZ,WAAO,CAAC,aAAa,QAAQ,CAAC;AAAA,EAChC;AAEA,MAAI,mBAAmB;AACrB,WAAO,CAAC,aAAa,iBAAiB,CAAC;AAAA,EACzC;AAEA,SAAO,CAAC;AACV;AAcA,SAAS,cACP,KACA,SACA,UACU;AACV,QAAM,QAAQ,SAAS,SAAS,IAAI;AACpC,QAAM,UAAU,eAAe,KAAK,SAAS,UAAU,IAAI;AAC3D,QAAM,YAAY,qBAAqB,GAAG;AAE1C,QAAM,cACJ,eAAe,KAAK,iCAAiC,KACrD,eAAe,KAAK,0BAA0B,MAC7C,SAAS,WAAW;AAEvB,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE,CAAC;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,qBAAqB,MAAc,KAAwC;AAClF,QAAM,MAAM,IAAI,MAAM,MAAM,EAAE,IAAI,CAAC;AACnC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,SAAO,OAAO,MAAM;AACtB;AAEO,SAAS,QAAQ,MAAc,UAAiC;AACrE,QAAM,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,SAAS,CAAC;AACjD,QAAM,MAAM,QAAQ,OAAO;AAE3B,QAAM,UAAU,qBAAqB,MAAM,QAAQ;AAEnD,MAAI,CAAC,SAAS;AACZ,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AAEA,MAAI,CAAC,QAAQ,SAAS;AACpB,UAAM,IAAI,MAAM,iDAAiD;AAAA,EACnE;AAEA,SAAO;AAAA,IACL,UAAU,cAAc,KAAK,SAAS,QAAQ;AAAA,IAC9C,SAAS,QAAQ;AAAA,EACnB;AACF;AAEO,SAAS,gBAAgB,MAAc,UAA4B;AACxE,QAAM,UAAU,IAAI,MAAM,MAAM,EAAE,KAAK,SAAS,CAAC;AACjD,QAAM,MAAM,QAAQ,OAAO;AAE3B,QAAM,UAAU,qBAAqB,MAAM,QAAQ;AAEnD,SAAO,cAAc,KAAK,SAAS,QAAQ;AAC7C;;;ACtIA,OAAO,qBAAqB;AAErB,SAAS,kBAAkB,MAAsB;AACtD,QAAM,WAAW,IAAI,gBAAgB;AAAA,IACnC,cAAc;AAAA,IACd,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,EACpB,CAAC;AAED,SAAO,SAAS,SAAS,IAAI;AAC/B;;;ACVA,SAAS,eAAe,gBAAAC,eAAc,cAAAC,mBAAkB;AACxD,SAAS,QAAAC,aAAY;AACrB,OAAOC,WAAU;AAGjB,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,sBAAsB;AAErB,SAAS,iBAAiB,OAAuB;AACtD,QAAM,YAAY,MACf,QAAQ,eAAe,EAAE,EACzB,QAAQ,cAAc,EAAE,EACxB,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACR,QAAM,OAAO,UAAU,MAAM,GAAG,mBAAmB,KAAK;AACxD,SAAO,GAAG,IAAI;AAChB;AAEO,SAAS,iBACd,UACA,MACA,SAAkC,CAAC,GAC3B;AAGR,QAAM,OAAgC;AAAA,IACpC,OAAO,SAAS;AAAA,IAChB,QAAQ,SAAS;AAAA,EACnB;AAEA,MAAI,SAAS,OAAO,SAAS,GAAG;AAC9B,SAAK,SAAS,SAAS;AAAA,EACzB;AAEA,MAAI,SAAS,WAAW;AACtB,SAAK,YAAY,SAAS;AAAA,EAC5B;AAEA,OAAK,UAAU,SAAS;AAExB,MAAI,SAAS,aAAa;AACxB,SAAK,cAAc,SAAS;AAAA,EAC9B;AAEA,OAAK,OAAO;AAEZ,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,MAAM,GAAG;AACjD,SAAK,GAAG,IAAI;AAAA,EACd;AAEA,QAAM,UAAUA,MAAK,KAAK,MAAM;AAAA,IAC9B,aAAa;AAAA,IACb,aAAa;AAAA,IACb,WAAW;AAAA,IACX,UAAU;AAAA,EACZ,CAAC;AAED,SAAO;AAAA,EAAQ,OAAO;AACxB;AAEA,SAAS,WAAW,UAAiC;AACnD,MAAI,CAACF,YAAW,QAAQ,GAAG;AACzB,WAAO;AAAA,EACT;AACA,QAAM,UAAUD,cAAa,UAAU,OAAO;AAC9C,MAAI,CAAC,QAAQ,WAAW,OAAO,GAAG;AAChC,WAAO;AAAA,EACT;AACA,QAAM,MAAM,QAAQ,QAAQ,SAAS,CAAC;AACtC,MAAI,QAAQ,IAAI;AACd,WAAO;AAAA,EACT;AACA,QAAM,cAAc,QAAQ,MAAM,GAAG,GAAG;AACxC,MAAI;AACJ,MAAI;AACF,aAASG,MAAK,KAAK,WAAW;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AACA,MAAI,WAAW,QAAQ,OAAO,WAAW,UAAU;AACjD,WAAO;AAAA,EACT;AACA,QAAM,SAAU,OAAmC;AACnD,SAAO,OAAO,WAAW,WAAW,SAAS;AAC/C;AAEA,SAAS,kBACP,MACA,cACA,QACQ;AACR,QAAM,MAAM;AACZ,QAAM,OAAO,aAAa,SAAS,GAAG,IAClC,aAAa,MAAM,GAAG,CAAC,IAAI,MAAM,IACjC;AAEJ,MAAI,YAAYD,MAAK,MAAM,GAAG,IAAI,GAAG,GAAG,EAAE;AAC1C,MAAI,CAACD,YAAW,SAAS,GAAG;AAC1B,WAAO;AAAA,EACT;AACA,MAAI,WAAW,SAAS,MAAM,QAAQ;AACpC,WAAO;AAAA,EACT;AACA,MAAI,IAAI;AACR,aAAS;AACP,gBAAYC,MAAK,MAAM,GAAG,IAAI,IAAI,CAAC,GAAG,GAAG,EAAE;AAC3C,QAAI,CAACD,YAAW,SAAS,KAAK,WAAW,SAAS,MAAM,QAAQ;AAC9D,aAAO;AAAA,IACT;AACA,SAAK;AAAA,EACP;AACF;AAEO,SAAS,kBACd,MACA,UACA,iBACA,MACA,SAAkC,CAAC,GAC3B;AACR,QAAM,WAAW,iBAAiB,SAAS,KAAK;AAChD,QAAM,WAAW,kBAAkB,MAAM,UAAU,SAAS,MAAM;AAClE,QAAM,cAAc,iBAAiB,UAAU,MAAM,MAAM;AAC3D,QAAM,cAAc,GAAG,WAAW;AAAA;AAAA,EAAO,eAAe;AAAA;AAExD,gBAAc,UAAU,aAAa,OAAO;AAE5C,SAAO;AACT;;;APhHA,IAAM,cAAcG,MAAKC,SAAQ,GAAG,WAAW,eAAe,aAAa;AAE3E,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,aAAa,EAClB;AAAA,EACC;AACF,EACC,QAAQ,OAAO;AAElB,QACG,QAAQ,OAAO,EACf,YAAY,mCAAmC,EAC/C,SAAS,SAAS,cAAc,EAChC,OAAO,iBAAiB,uBAAuB,EAC/C,OAAO,YAAY,4BAA4B,EAC/C,OAAO,oBAAoB,yBAAyB,EACpD,OAAO,uBAAuB,sBAAsB,QAAQ,EAC5D,OAAO,gBAAgB,wBAAwB,CAAC,KAAa,QAAkB;AAC9E,MAAI,KAAK,GAAG;AACZ,SAAO;AACT,GAAG,CAAC,CAAa,EAChB;AAAA,EACC;AAAA,EACA;AACF,EACC,OAAO,kBAAkB,0BAA0B,EACnD,OAAO,aAAa,oCAAoC,EACxD,OAAO,qBAAqB,6BAA6B,EACzD,OAAO,oBAAoB,4BAA4B,EACvD,OAAO,oBAAoB,6BAA6B,EACxD,OAAO,SAAS,uDAAuD,EACvE,OAAO,kBAAkB,iDAAiD,EAC1E;AAAA,EACC;AAAA,EACA;AAAA,EACA,CAAC,KAAa,QAAkB;AAC9B,QAAI,KAAK,GAAG;AACZ,WAAO;AAAA,EACT;AAAA,EACA,CAAC;AACH,EACC,OAAO,iBAAiB,8CAA8C,EACtE,OAAO,OAAO,KAAa,YAAqC;AAC/D,MAAI;AACF,UAAM,aAAaC,YAAW,WAAW,IAAI,cAAc;AAC3D,UAAM,SAAS;AAAA,MACb;AAAA,QACE,MAAM,QAAQ;AAAA,QACd,MAAM,QAAQ;AAAA,QACd,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,QAAQ,QAAQ;AAAA,QAChB,UAAU,QAAQ;AAAA,QAClB,OAAO,QAAQ;AAAA,QACf,WAAW,QAAQ;AAAA,QACnB,QAAQ,QAAQ;AAAA,QAChB,aAAa,QAAQ;AAAA,QACrB,YAAY,QAAQ;AAAA,QACpB,YAAY,QAAQ;AAAA,QACpB,KAAK,QAAQ;AAAA,QACb,OAAO,QAAQ;AAAA,QACf,QAAQ,YAAY,QAAQ,KAAiB;AAAA,MAC/C;AAAA,MACA;AAAA,IACF;AAEA,QAAI,OAAO,OAAO,OAAO,UAAU;AACjC,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAGA,QAAI,CAAC,OAAO,UAAU,CAACA,YAAW,OAAO,IAAI,GAAG;AAC9C,YAAM,IAAI,MAAM,yCAAyC,OAAO,IAAI,EAAE;AAAA,IACxE;AAEA,UAAM,cAAc,cAAc;AAClC,UAAM,cAAc,MAAM,UAAU,KAAK,QAAQ,WAAW;AAE5D,QAAI;AACJ,QAAI;AAEJ,QAAI,YAAY,SAAS,OAAO;AAC9B,UAAI,OAAO,UAAU;AACnB,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AACA,UAAI,OAAO,KAAK;AACd,cAAM,IAAI,MAAM,qCAAqC;AAAA,MACvD;AACA,YAAM,EAAE,qBAAqB,IAAI,MAAM,OAAO,6BAAoB;AAClE,YAAM,YAAY,MAAM;AAAA,QACtB,YAAY;AAAA,QACZ,YAAY;AAAA,MACd;AACA,iBAAW,UAAU;AACrB,iBAAW,UAAU;AAAA,IACvB,WAAW,OAAO,UAAU;AAE1B,iBAAW,gBAAgB,YAAY,UAAU,YAAY,QAAQ;AACrE,iBAAW,kBAAkB,YAAY,IAAI;AAAA,IAC/C,WAAW,OAAO,KAAK;AAErB,iBAAW,gBAAgB,YAAY,UAAU,YAAY,QAAQ;AACrE,iBAAW,kBAAkB,YAAY,QAAQ;AAAA,IACnD,OAAO;AACL,YAAM,SAAS,QAAQ,YAAY,MAAM,YAAY,QAAQ;AAC7D,iBAAW,OAAO;AAClB,iBAAW,kBAAkB,OAAO,OAAO;AAAA,IAC7C;AAEA,QAAI,OAAO,UAAU,MAAM;AACzB,iBAAW,EAAE,GAAG,UAAU,OAAO,OAAO,MAAM;AAAA,IAChD;AAEA,QAAI,OAAO,QAAQ;AACjB,YAAM,cAAc,iBAAiB,UAAU,OAAO,MAAM,OAAO,MAAM;AACzE,cAAQ,OAAO,MAAM,GAAG,WAAW;AAAA;AAAA,EAAO,QAAQ;AAAA,CAAI;AAAA,IACxD,OAAO;AACL,YAAM,WAAW;AAAA,QACf,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA,OAAO;AAAA,QACP,OAAO;AAAA,MACT;AACA,cAAQ,MAAM,UAAU,QAAQ,EAAE;AAAA,IACpC;AAAA,EACF,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,YAAQ,MAAM,UAAU,OAAO,EAAE;AACjC,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,OAAO,EACf,YAAY,kCAAkC,EAC9C,SAAS,SAAS,cAAc,EAChC,OAAO,uBAAuB,4BAA4B,QAAQ,EAClE,OAAO,iBAAiB,8CAA8C,EACtE,OAAO,OAAO,KAAa,YAAqC;AAC/D,QAAM,EAAE,QAAAC,QAAO,IAAI,MAAM,OAAO,cAAc;AAC9C,QAAM,cAAc,cAAc;AAClC,mBAAiB,WAAW;AAE5B,QAAM,aAAc,QAAQ,WAAkC;AAC9D,QAAM,WAAW;AAAA,IACf,QAAQ;AAAA,IACR,QAAQ,IAAI;AAAA,EACd;AACA,QAAM,UAAU,MAAMA,QAAO;AAAA,IAC3B,UAAU;AAAA,IACV,GAAI,aAAa,QAAQ,EAAE,OAAO,SAAS;AAAA,EAC7C,CAAC;AAED,MAAI;AACF,UAAM,UAAU,MAAM,QAAQ,WAAW;AACzC,UAAM,OAAO,MAAM,QAAQ,QAAQ;AAEnC,UAAM,KAAK,KAAK,KAAK,EAAE,WAAW,eAAe,SAAS,aAAa,IAAK,CAAC;AAE7E,YAAQ,MAAM,yEAAyE;AAEvF,YAAQ,MAAM,OAAO;AACrB,UAAM,KAAK,QAAQ,OAAO,MAAM;AAChC,YAAQ,MAAM,MAAM;AACpB,YAAQ,MAAM,MAAM;AAEpB,UAAM,cAAc,eAAe,KAAK,WAAW;AACnD,UAAM,QAAQ,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,YAAQ,MAAM,kBAAkB,WAAW,EAAE;AAAA,EAC/C,SAAS,OAAO;AACd,UAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AACrE,YAAQ,MAAM,UAAU,OAAO,EAAE;AACjC,YAAQ,KAAK,CAAC;AAAA,EAChB,UAAE;AACA,UAAM,QAAQ,MAAM;AAAA,EACtB;AACF,CAAC;AAEH,QAAQ,MAAM;","names":["existsSync","homedir","join","homedir","readFileSync","existsSync","join","yaml","join","homedir","existsSync","launch"]}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vault-fetch",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Fetch JS-rendered web pages with
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "Fetch JS-rendered web pages with CloakBrowser (stealth Chromium) and save as Markdown to Obsidian Vault",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "driller",
|
|
@@ -36,10 +36,11 @@
|
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@mozilla/readability": "^0.6.0",
|
|
38
38
|
"@opendocsg/pdf2md": "^0.2.5",
|
|
39
|
+
"cloakbrowser": "0.3.18",
|
|
39
40
|
"commander": "^14.0.3",
|
|
40
41
|
"js-yaml": "^4.1.1",
|
|
41
42
|
"jsdom": "^29.0.1",
|
|
42
|
-
"playwright": "^1.58.2",
|
|
43
|
+
"playwright-core": "^1.58.2",
|
|
43
44
|
"turndown": "^7.2.2"
|
|
44
45
|
},
|
|
45
46
|
"devDependencies": {
|