md-fetch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +212 -0
- package/LICENSE +21 -0
- package/README.md +449 -0
- package/README.zh-CN.md +449 -0
- package/dist/cli.d.ts +27 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +158 -0
- package/dist/cli.js.map +1 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +15 -0
- package/dist/constants.js.map +1 -0
- package/dist/core/browser.d.ts +23 -0
- package/dist/core/browser.d.ts.map +1 -0
- package/dist/core/browser.js +125 -0
- package/dist/core/browser.js.map +1 -0
- package/dist/core/converter.d.ts +18 -0
- package/dist/core/converter.d.ts.map +1 -0
- package/dist/core/converter.js +74 -0
- package/dist/core/converter.js.map +1 -0
- package/dist/core/extractor.d.ts +28 -0
- package/dist/core/extractor.d.ts.map +1 -0
- package/dist/core/extractor.js +151 -0
- package/dist/core/extractor.js.map +1 -0
- package/dist/core/fetcher.d.ts +24 -0
- package/dist/core/fetcher.d.ts.map +1 -0
- package/dist/core/fetcher.js +111 -0
- package/dist/core/fetcher.js.map +1 -0
- package/dist/core/processor.d.ts +22 -0
- package/dist/core/processor.d.ts.map +1 -0
- package/dist/core/processor.js +104 -0
- package/dist/core/processor.js.map +1 -0
- package/dist/core/screenshotter.d.ts +31 -0
- package/dist/core/screenshotter.d.ts.map +1 -0
- package/dist/core/screenshotter.js +222 -0
- package/dist/core/screenshotter.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/screen-cli.d.ts +26 -0
- package/dist/screen-cli.d.ts.map +1 -0
- package/dist/screen-cli.js +196 -0
- package/dist/screen-cli.js.map +1 -0
- package/dist/screen.d.ts +3 -0
- package/dist/screen.d.ts.map +1 -0
- package/dist/screen.js +14 -0
- package/dist/screen.js.map +1 -0
- package/dist/types/index.d.ts +151 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +42 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/filename-sanitizer.d.ts +38 -0
- package/dist/utils/filename-sanitizer.d.ts.map +1 -0
- package/dist/utils/filename-sanitizer.js +79 -0
- package/dist/utils/filename-sanitizer.js.map +1 -0
- package/dist/utils/frontmatter.d.ts +6 -0
- package/dist/utils/frontmatter.d.ts.map +1 -0
- package/dist/utils/frontmatter.js +65 -0
- package/dist/utils/frontmatter.js.map +1 -0
- package/package.json +56 -0
- package/skills/md-fetch/SKILL.md +133 -0
- package/skills/md-fetch/references/cli-reference.md +257 -0
- package/src/cli.ts +169 -0
- package/src/constants.ts +17 -0
- package/src/core/browser.ts +161 -0
- package/src/core/converter.ts +82 -0
- package/src/core/extractor.ts +172 -0
- package/src/core/fetcher.ts +143 -0
- package/src/core/processor.ts +124 -0
- package/src/core/screenshotter.ts +289 -0
- package/src/index.ts +15 -0
- package/src/screen-cli.ts +216 -0
- package/src/screen.ts +15 -0
- package/src/types/index.ts +227 -0
- package/src/utils/filename-sanitizer.ts +88 -0
- package/src/utils/frontmatter.ts +81 -0
- package/tsconfig.json +20 -0
package/dist/screen.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { ScreenCLI } from './screen-cli.js';
|
|
3
|
+
async function main() {
|
|
4
|
+
try {
|
|
5
|
+
const cli = new ScreenCLI();
|
|
6
|
+
await cli.run(process.argv);
|
|
7
|
+
}
|
|
8
|
+
catch (error) {
|
|
9
|
+
console.error('Fatal error:', error.message);
|
|
10
|
+
process.exit(1);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
main();
|
|
14
|
+
//# sourceMappingURL=screen.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"screen.js","sourceRoot":"","sources":["../src/screen.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,cAAc,EAAG,KAAe,CAAC,OAAO,CAAC,CAAC;QACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import type { PuppeteerLifeCycleEvent } from 'puppeteer-core';
|
|
2
|
+
export interface FetchOptions {
|
|
3
|
+
headers?: Record<string, string>;
|
|
4
|
+
proxy?: string;
|
|
5
|
+
timeout?: number;
|
|
6
|
+
userAgent?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface BrowserOptions {
|
|
9
|
+
executablePath?: string;
|
|
10
|
+
waitUntil?: PuppeteerLifeCycleEvent;
|
|
11
|
+
timeout?: number;
|
|
12
|
+
userAgent?: string;
|
|
13
|
+
proxy?: string;
|
|
14
|
+
headless?: boolean;
|
|
15
|
+
}
|
|
16
|
+
export interface ExtractOptions {
|
|
17
|
+
useReadability: boolean;
|
|
18
|
+
selector?: string;
|
|
19
|
+
}
|
|
20
|
+
export interface PageMetadata {
|
|
21
|
+
url: string;
|
|
22
|
+
title?: string;
|
|
23
|
+
description?: string;
|
|
24
|
+
author?: string;
|
|
25
|
+
publishedTime?: string;
|
|
26
|
+
modifiedTime?: string;
|
|
27
|
+
siteName?: string;
|
|
28
|
+
keywords?: string[];
|
|
29
|
+
image?: string;
|
|
30
|
+
lang?: string;
|
|
31
|
+
}
|
|
32
|
+
export interface ExtractedContent {
|
|
33
|
+
content: string;
|
|
34
|
+
metadata: PageMetadata;
|
|
35
|
+
}
|
|
36
|
+
export interface ConversionOptions {
|
|
37
|
+
headingStyle?: 'atx' | 'setext';
|
|
38
|
+
codeBlockStyle?: 'fenced' | 'indented';
|
|
39
|
+
bulletListMarker?: '-' | '+' | '*';
|
|
40
|
+
}
|
|
41
|
+
export interface ProcessOptions {
|
|
42
|
+
useBrowser: boolean;
|
|
43
|
+
useReadability: boolean;
|
|
44
|
+
selector?: string;
|
|
45
|
+
fetchOptions: FetchOptions;
|
|
46
|
+
browserOptions?: BrowserOptions;
|
|
47
|
+
conversionOptions?: ConversionOptions;
|
|
48
|
+
verbose?: boolean;
|
|
49
|
+
}
|
|
50
|
+
export interface FetchResult {
|
|
51
|
+
url: string;
|
|
52
|
+
markdown?: string;
|
|
53
|
+
error?: Error;
|
|
54
|
+
success: boolean;
|
|
55
|
+
}
|
|
56
|
+
export interface CLIOptions {
|
|
57
|
+
output?: string;
|
|
58
|
+
browser?: boolean;
|
|
59
|
+
browserPath?: string;
|
|
60
|
+
readability?: boolean;
|
|
61
|
+
selector?: string;
|
|
62
|
+
file?: string;
|
|
63
|
+
header?: string[];
|
|
64
|
+
proxy?: string;
|
|
65
|
+
timeout?: number;
|
|
66
|
+
config?: string;
|
|
67
|
+
userAgent?: string;
|
|
68
|
+
waitUntil?: string;
|
|
69
|
+
concurrent?: number;
|
|
70
|
+
verbose?: boolean;
|
|
71
|
+
}
|
|
72
|
+
export interface Config {
|
|
73
|
+
browser?: {
|
|
74
|
+
executablePath?: string;
|
|
75
|
+
waitUntil?: string;
|
|
76
|
+
};
|
|
77
|
+
fetch?: {
|
|
78
|
+
timeout?: number;
|
|
79
|
+
headers?: Record<string, string>;
|
|
80
|
+
proxy?: string;
|
|
81
|
+
};
|
|
82
|
+
conversion?: {
|
|
83
|
+
headingStyle?: 'atx' | 'setext';
|
|
84
|
+
codeBlockStyle?: 'fenced' | 'indented';
|
|
85
|
+
bulletListMarker?: '-' | '+' | '*';
|
|
86
|
+
};
|
|
87
|
+
defaults?: {
|
|
88
|
+
useReadability?: boolean;
|
|
89
|
+
concurrent?: number;
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
export declare class FetchError extends Error {
|
|
93
|
+
url: string;
|
|
94
|
+
statusCode?: number | undefined;
|
|
95
|
+
constructor(url: string, statusCode?: number | undefined, message?: string);
|
|
96
|
+
}
|
|
97
|
+
export declare class BrowserError extends Error {
|
|
98
|
+
url: string;
|
|
99
|
+
constructor(url: string, message: string);
|
|
100
|
+
}
|
|
101
|
+
export declare class ExtractionError extends Error {
|
|
102
|
+
url: string;
|
|
103
|
+
constructor(url: string, message: string);
|
|
104
|
+
}
|
|
105
|
+
export declare class ValidationError extends Error {
|
|
106
|
+
constructor(message: string);
|
|
107
|
+
}
|
|
108
|
+
export interface ScreenshotOptions {
|
|
109
|
+
fullPage: boolean;
|
|
110
|
+
width: number;
|
|
111
|
+
height: number;
|
|
112
|
+
deviceScaleFactor: number;
|
|
113
|
+
outputDir: string;
|
|
114
|
+
format: 'png' | 'jpeg' | 'webp';
|
|
115
|
+
quality?: number;
|
|
116
|
+
browserOptions: BrowserOptions;
|
|
117
|
+
delay?: number;
|
|
118
|
+
selector?: string;
|
|
119
|
+
hideSelectors?: string[];
|
|
120
|
+
verbose?: boolean;
|
|
121
|
+
}
|
|
122
|
+
export interface ScreenshotCLIOptions {
|
|
123
|
+
fullPage?: boolean;
|
|
124
|
+
viewport?: boolean;
|
|
125
|
+
width?: number;
|
|
126
|
+
height?: number;
|
|
127
|
+
scale?: number;
|
|
128
|
+
output?: string;
|
|
129
|
+
format?: string;
|
|
130
|
+
quality?: number;
|
|
131
|
+
browserPath?: string;
|
|
132
|
+
waitUntil?: string;
|
|
133
|
+
timeout?: number;
|
|
134
|
+
userAgent?: string;
|
|
135
|
+
proxy?: string;
|
|
136
|
+
delay?: number;
|
|
137
|
+
selector?: string;
|
|
138
|
+
hide?: string;
|
|
139
|
+
verbose?: boolean;
|
|
140
|
+
}
|
|
141
|
+
export interface ScreenshotResult {
|
|
142
|
+
url: string;
|
|
143
|
+
filepath?: string;
|
|
144
|
+
success: boolean;
|
|
145
|
+
error?: Error;
|
|
146
|
+
}
|
|
147
|
+
export declare class ScreenshotError extends Error {
|
|
148
|
+
url: string;
|
|
149
|
+
constructor(url: string, message: string);
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAG9D,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAGD,MAAM,WAAW,cAAc;IAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,uBAAuB,CAAC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAGD,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAGD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,YAAY,CAAC;CACxB;AAGD,MAAM,WAAW,iBAAiB;IAChC,YAAY,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAChC,cAAc,CAAC,EAAE,QAAQ,GAAG,UAAU,CAAC;IACvC,gBAAgB,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;CACpC;AAGD,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,OAAO,CAAC;IACpB,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,YAAY,CAAC;IAC3B,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,OAAO,EAAE,OAAO,CAAC;CAClB;AAGD,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,MAAM;IACrB,OAAO,CAAC,EAAE;QACR,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,KAAK,CAAC,EAAE;QACN,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,UAAU,CAAC,EAAE;QACX,YAAY,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;QAChC,cAAc,CAAC,EAAE,QAAQ,GAAG,UAAU,CAAC;QACvC,gBAAgB,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;KACpC,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAGD,qBAAa,UAAW,SAAQ,KAAK;IAE1B,GAAG,EAAE,MAAM;IACX,UAAU,CAAC,EAAE,MAAM;gBADnB,GAAG,EAAE,MAAM,EACX,UAAU,CAAC,EAAE,MAAM,YAAA,EAC1B,OAAO,CAAC,EAAE,MAAM;CAKnB;AAED,qBAAa,YAAa,SAAQ,KAAK;IAE5B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB;AAED,qBAAa,eAAgB,SAAQ,KAAK;IAE/B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB;AAED,qBAAa,eAAgB,SAAQ,KAAK;gBAC5B,OAAO,EAAE,MAAM;CAI5B;AAKD,MAAM,WAAW,iBAAiB;IAEhC,QAAQ,EAAE,OAAO,CAAC;IAGlB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,MAAM,CAAC;IAG1B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,cAAc,EAAE,cAAc,CAAC;IAG/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IAGzB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,oBAAoB;IAEnC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IAGf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IAGf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IAGd,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,qBAAa,eAAgB,SAAQ,KAAK;IAE/B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// 自定义错误类型
|
|
2
|
+
export class FetchError extends Error {
|
|
3
|
+
url;
|
|
4
|
+
statusCode;
|
|
5
|
+
constructor(url, statusCode, message) {
|
|
6
|
+
super(message || `Failed to fetch ${url}`);
|
|
7
|
+
this.url = url;
|
|
8
|
+
this.statusCode = statusCode;
|
|
9
|
+
this.name = 'FetchError';
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
export class BrowserError extends Error {
|
|
13
|
+
url;
|
|
14
|
+
constructor(url, message) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.url = url;
|
|
17
|
+
this.name = 'BrowserError';
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export class ExtractionError extends Error {
|
|
21
|
+
url;
|
|
22
|
+
constructor(url, message) {
|
|
23
|
+
super(message);
|
|
24
|
+
this.url = url;
|
|
25
|
+
this.name = 'ExtractionError';
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
export class ValidationError extends Error {
|
|
29
|
+
constructor(message) {
|
|
30
|
+
super(message);
|
|
31
|
+
this.name = 'ValidationError';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
export class ScreenshotError extends Error {
|
|
35
|
+
url;
|
|
36
|
+
constructor(url, message) {
|
|
37
|
+
super(message);
|
|
38
|
+
this.url = url;
|
|
39
|
+
this.name = 'ScreenshotError';
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAgHA,UAAU;AACV,MAAM,OAAO,UAAW,SAAQ,KAAK;IAE1B;IACA;IAFT,YACS,GAAW,EACX,UAAmB,EAC1B,OAAgB;QAEhB,KAAK,CAAC,OAAO,IAAI,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAJpC,QAAG,GAAH,GAAG,CAAQ;QACX,eAAU,GAAV,UAAU,CAAS;QAI1B,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,KAAK;IAE5B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAE/B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF;AAED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IACxC,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF;AAqED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAE/B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文件名安全化工具
|
|
3
|
+
* 用于从 URL 生成安全的文件名
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* 从 URL 生成安全的文件名
|
|
7
|
+
* 格式: <URL安全化前50字符>_<年月日时分秒>.png
|
|
8
|
+
*
|
|
9
|
+
* @param url - 原始URL
|
|
10
|
+
* @param extension - 文件扩展名(默认 'png')
|
|
11
|
+
* @returns 安全的文件名
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* generateScreenshotFilename('https://github.com/user/repo/issues/123')
|
|
15
|
+
* // => 'github.com_user_repo_issues_123_20251229143025.png'
|
|
16
|
+
*/
|
|
17
|
+
export declare function generateScreenshotFilename(url: string, extension?: string): string;
|
|
18
|
+
/**
|
|
19
|
+
* 将 URL 转换为安全的文件名前缀
|
|
20
|
+
* - 保留域名
|
|
21
|
+
* - 替换非法字符为下划线
|
|
22
|
+
* - 截取前50个字符
|
|
23
|
+
*
|
|
24
|
+
* @param url - 原始URL
|
|
25
|
+
* @returns 安全化后的文件名前缀
|
|
26
|
+
*/
|
|
27
|
+
export declare function sanitizeUrlForFilename(url: string): string;
|
|
28
|
+
/**
|
|
29
|
+
* 生成时间戳字符串
|
|
30
|
+
* 格式: YYYYMMDDHHMMSS
|
|
31
|
+
*
|
|
32
|
+
* @returns 时间戳字符串
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* generateTimestamp() // => '20251229143025'
|
|
36
|
+
*/
|
|
37
|
+
export declare function generateTimestamp(): string;
|
|
38
|
+
//# sourceMappingURL=filename-sanitizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filename-sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/filename-sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;;;;;;GAWG;AACH,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,MAAc,GAAG,MAAM,CAKzF;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAgC1D;AAED;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,IAAI,MAAM,CAW1C"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文件名安全化工具
|
|
3
|
+
* 用于从 URL 生成安全的文件名
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* 从 URL 生成安全的文件名
|
|
7
|
+
* 格式: <URL安全化前50字符>_<年月日时分秒>.png
|
|
8
|
+
*
|
|
9
|
+
* @param url - 原始URL
|
|
10
|
+
* @param extension - 文件扩展名(默认 'png')
|
|
11
|
+
* @returns 安全的文件名
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* generateScreenshotFilename('https://github.com/user/repo/issues/123')
|
|
15
|
+
* // => 'github.com_user_repo_issues_123_20251229143025.png'
|
|
16
|
+
*/
|
|
17
|
+
export function generateScreenshotFilename(url, extension = 'png') {
|
|
18
|
+
const safeName = sanitizeUrlForFilename(url);
|
|
19
|
+
const timestamp = generateTimestamp();
|
|
20
|
+
return `${safeName}_${timestamp}.${extension}`;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* 将 URL 转换为安全的文件名前缀
|
|
24
|
+
* - 保留域名
|
|
25
|
+
* - 替换非法字符为下划线
|
|
26
|
+
* - 截取前50个字符
|
|
27
|
+
*
|
|
28
|
+
* @param url - 原始URL
|
|
29
|
+
* @returns 安全化后的文件名前缀
|
|
30
|
+
*/
|
|
31
|
+
export function sanitizeUrlForFilename(url) {
|
|
32
|
+
try {
|
|
33
|
+
const urlObj = new URL(url);
|
|
34
|
+
// 构建完整的文件名部分:域名 + 路径 + 查询参数
|
|
35
|
+
let fullPart = urlObj.hostname + urlObj.pathname + urlObj.search;
|
|
36
|
+
// 安全化处理
|
|
37
|
+
const safeName = fullPart
|
|
38
|
+
// 替换非法文件名字符为下划线
|
|
39
|
+
.replace(/[<>:"/\\|?*\x00-\x1F]/g, '_')
|
|
40
|
+
// 替换多个连续点为单个点(保留域名中的点)
|
|
41
|
+
.replace(/\.{2,}/g, '.')
|
|
42
|
+
// 替换空格为下划线
|
|
43
|
+
.replace(/\s+/g, '_')
|
|
44
|
+
// 替换多个连续斜杠和下划线为单个下划线
|
|
45
|
+
.replace(/[/_]+/g, '_')
|
|
46
|
+
// 去除开头和结尾的点和下划线
|
|
47
|
+
.replace(/^[._]+|[._]+$/g, '');
|
|
48
|
+
// 截取前50个字符
|
|
49
|
+
const truncated = safeName.slice(0, 50);
|
|
50
|
+
// 去除末尾的下划线或点
|
|
51
|
+
const cleaned = truncated.replace(/[._]+$/, '');
|
|
52
|
+
// 如果清理后为空,使用默认值
|
|
53
|
+
return cleaned || 'screenshot';
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
// URL 解析失败,使用默认值
|
|
57
|
+
return 'screenshot';
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* 生成时间戳字符串
|
|
62
|
+
* 格式: YYYYMMDDHHMMSS
|
|
63
|
+
*
|
|
64
|
+
* @returns 时间戳字符串
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* generateTimestamp() // => '20251229143025'
|
|
68
|
+
*/
|
|
69
|
+
export function generateTimestamp() {
|
|
70
|
+
const now = new Date();
|
|
71
|
+
const year = now.getFullYear();
|
|
72
|
+
const month = String(now.getMonth() + 1).padStart(2, '0');
|
|
73
|
+
const day = String(now.getDate()).padStart(2, '0');
|
|
74
|
+
const hours = String(now.getHours()).padStart(2, '0');
|
|
75
|
+
const minutes = String(now.getMinutes()).padStart(2, '0');
|
|
76
|
+
const seconds = String(now.getSeconds()).padStart(2, '0');
|
|
77
|
+
return `${year}${month}${day}${hours}${minutes}${seconds}`;
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=filename-sanitizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filename-sanitizer.js","sourceRoot":"","sources":["../../src/utils/filename-sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,0BAA0B,CAAC,GAAW,EAAE,YAAoB,KAAK;IAC/E,MAAM,QAAQ,GAAG,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,iBAAiB,EAAE,CAAC;IAEtC,OAAO,GAAG,QAAQ,IAAI,SAAS,IAAI,SAAS,EAAE,CAAC;AACjD,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAAC,GAAW;IAChD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,4BAA4B;QAC5B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC;QAEjE,QAAQ;QACR,MAAM,QAAQ,GAAG,QAAQ;YACvB,gBAAgB;aACf,OAAO,CAAC,wBAAwB,EAAE,GAAG,CAAC;YACvC,uBAAuB;aACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;YACxB,WAAW;aACV,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;YACrB,qBAAqB;aACpB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;YACvB,gBAAgB;aACf,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;QAEjC,WAAW;QACX,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEhD,gBAAgB;QAChB,OAAO,OAAO,IAAI,YAAY,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,iBAAiB;QACjB,OAAO,YAAY,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,iBAAiB;IAC/B,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IAEvB,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAE1D,OAAO,GAAG,IAAI,GAAG,KAAK,GAAG,GAAG,GAAG,KAAK,GAAG,OAAO,GAAG,OAAO,EAAE,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"frontmatter.d.ts","sourceRoot":"","sources":["../../src/utils/frontmatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,CAmDlE"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate YAML frontmatter from page metadata
|
|
3
|
+
*/
|
|
4
|
+
export function generateFrontmatter(metadata) {
|
|
5
|
+
const lines = ['---'];
|
|
6
|
+
// Add fields in a logical order
|
|
7
|
+
if (metadata.title) {
|
|
8
|
+
lines.push(`title: ${escapeYamlString(metadata.title)}`);
|
|
9
|
+
}
|
|
10
|
+
if (metadata.url) {
|
|
11
|
+
lines.push(`url: ${metadata.url}`);
|
|
12
|
+
}
|
|
13
|
+
if (metadata.description) {
|
|
14
|
+
lines.push(`description: ${escapeYamlString(metadata.description)}`);
|
|
15
|
+
}
|
|
16
|
+
if (metadata.author) {
|
|
17
|
+
lines.push(`author: ${escapeYamlString(metadata.author)}`);
|
|
18
|
+
}
|
|
19
|
+
if (metadata.siteName) {
|
|
20
|
+
lines.push(`siteName: ${escapeYamlString(metadata.siteName)}`);
|
|
21
|
+
}
|
|
22
|
+
if (metadata.publishedTime) {
|
|
23
|
+
lines.push(`publishedTime: ${metadata.publishedTime}`);
|
|
24
|
+
}
|
|
25
|
+
if (metadata.modifiedTime) {
|
|
26
|
+
lines.push(`modifiedTime: ${metadata.modifiedTime}`);
|
|
27
|
+
}
|
|
28
|
+
if (metadata.keywords && metadata.keywords.length > 0) {
|
|
29
|
+
lines.push('keywords:');
|
|
30
|
+
metadata.keywords.forEach(keyword => {
|
|
31
|
+
lines.push(` - ${escapeYamlString(keyword)}`);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
if (metadata.image) {
|
|
35
|
+
lines.push(`image: ${metadata.image}`);
|
|
36
|
+
}
|
|
37
|
+
if (metadata.lang) {
|
|
38
|
+
lines.push(`lang: ${metadata.lang}`);
|
|
39
|
+
}
|
|
40
|
+
lines.push('---');
|
|
41
|
+
lines.push(''); // Add blank line after frontmatter
|
|
42
|
+
return lines.join('\n');
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Escape special characters in YAML strings
|
|
46
|
+
*/
|
|
47
|
+
function escapeYamlString(str) {
|
|
48
|
+
// If string contains special characters, quote it
|
|
49
|
+
if (str.includes(':') ||
|
|
50
|
+
str.includes('#') ||
|
|
51
|
+
str.includes('[') ||
|
|
52
|
+
str.includes(']') ||
|
|
53
|
+
str.includes('{') ||
|
|
54
|
+
str.includes('}') ||
|
|
55
|
+
str.includes('\n') ||
|
|
56
|
+
str.includes('"') ||
|
|
57
|
+
str.includes("'") ||
|
|
58
|
+
str.startsWith(' ') ||
|
|
59
|
+
str.endsWith(' ')) {
|
|
60
|
+
// Use double quotes and escape internal quotes
|
|
61
|
+
return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
|
|
62
|
+
}
|
|
63
|
+
return str;
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=frontmatter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"frontmatter.js","sourceRoot":"","sources":["../../src/utils/frontmatter.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAsB;IACxD,MAAM,KAAK,GAAa,CAAC,KAAK,CAAC,CAAC;IAEhC,gCAAgC;IAChC,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,QAAQ,QAAQ,CAAC,GAAG,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,gBAAgB,gBAAgB,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,WAAW,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,aAAa,gBAAgB,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,QAAQ,CAAC,aAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,aAAa,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,iBAAiB,QAAQ,CAAC,YAAY,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;YAClC,KAAK,CAAC,IAAI,CAAC,OAAO,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,SAAS,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,mCAAmC;IAEnD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAW;IACnC,kDAAkD;IAClD,IACE,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC;QAClB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;QACnB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EACjB,CAAC;QACD,+CAA+C;QAC/C,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC;IAChE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "md-fetch",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Convert web pages to clean Markdown using fetch, readability, and turndown",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "github",
|
|
8
|
+
"url": "https://github.com/fairjm/md-fetch"
|
|
9
|
+
},
|
|
10
|
+
"bin": {
|
|
11
|
+
"md-fetch": "./dist/index.js",
|
|
12
|
+
"md-fetch-screen": "./dist/screen.js"
|
|
13
|
+
},
|
|
14
|
+
"exports": {
|
|
15
|
+
".": "./dist/index.js"
|
|
16
|
+
},
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsc",
|
|
19
|
+
"dev": "tsx src/index.ts",
|
|
20
|
+
"test": "vitest",
|
|
21
|
+
"prepublishOnly": "pnpm run build"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [
|
|
24
|
+
"markdown",
|
|
25
|
+
"cli",
|
|
26
|
+
"fetch",
|
|
27
|
+
"readability",
|
|
28
|
+
"web-scraping",
|
|
29
|
+
"html-to-markdown"
|
|
30
|
+
],
|
|
31
|
+
"author": {
|
|
32
|
+
"name": "fairjm",
|
|
33
|
+
"url": "https://bingowith.me"
|
|
34
|
+
},
|
|
35
|
+
"license": "MIT",
|
|
36
|
+
"engines": {
|
|
37
|
+
"node": ">=18.0.0"
|
|
38
|
+
},
|
|
39
|
+
"packageManager": "pnpm@9.0.0",
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"@mozilla/readability": "^0.5.0",
|
|
42
|
+
"commander": "^12.0.0",
|
|
43
|
+
"jsdom": "^24.0.0",
|
|
44
|
+
"puppeteer-core": "^22.0.0",
|
|
45
|
+
"turndown": "^7.2.0",
|
|
46
|
+
"undici": "^7.16.0"
|
|
47
|
+
},
|
|
48
|
+
"devDependencies": {
|
|
49
|
+
"@types/jsdom": "^27.0.0",
|
|
50
|
+
"@types/node": "^20.0.0",
|
|
51
|
+
"@types/turndown": "^5.0.0",
|
|
52
|
+
"tsx": "^4.7.0",
|
|
53
|
+
"typescript": "^5.3.0",
|
|
54
|
+
"vitest": "^1.2.0"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: md-fetch
|
|
3
|
+
description: Convert web pages to Markdown or take screenshots. Use when: (1) Converting web pages to Markdown with YAML frontmatter, (2) Taking screenshots of web pages, (3) Processing SPA/dynamic content, (4) Batch processing URLs. Triggers on "convert webpage", "save article", "fetch content", "take screenshot", or when mentioning md-fetch.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# md-fetch
|
|
7
|
+
|
|
8
|
+
Convert web pages to Markdown or take high-quality screenshots.
|
|
9
|
+
|
|
10
|
+
## Two Tools
|
|
11
|
+
|
|
12
|
+
- **md-fetch** - Convert web pages to Markdown with YAML frontmatter
|
|
13
|
+
- **md-fetch-screen** - Take screenshots
|
|
14
|
+
|
|
15
|
+
## Common Usage
|
|
16
|
+
|
|
17
|
+
### Convert to Markdown
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Basic usage
|
|
21
|
+
md-fetch https://example.com -o article.md
|
|
22
|
+
|
|
23
|
+
# Browser mode for SPA/dynamic pages
|
|
24
|
+
md-fetch -b https://react-app.com -o app.md
|
|
25
|
+
|
|
26
|
+
# Custom CSS selector
|
|
27
|
+
md-fetch https://example.com -s "article.content" -o content.md
|
|
28
|
+
|
|
29
|
+
# Disable readability (keep full content)
|
|
30
|
+
md-fetch https://example.com -R -o full.md
|
|
31
|
+
|
|
32
|
+
# Multiple URLs
|
|
33
|
+
md-fetch https://site1.com https://site2.com
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Take Screenshots
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Full page screenshot
|
|
40
|
+
md-fetch-screen https://example.com
|
|
41
|
+
|
|
42
|
+
# Viewport-only (1440x900)
|
|
43
|
+
md-fetch-screen https://example.com --viewport -W 1440 -H 900
|
|
44
|
+
|
|
45
|
+
# High-DPI (2x scale for Retina)
|
|
46
|
+
md-fetch-screen https://example.com --scale 2
|
|
47
|
+
|
|
48
|
+
# Screenshot specific element
|
|
49
|
+
md-fetch-screen https://example.com --selector "#main"
|
|
50
|
+
|
|
51
|
+
# Hide unwanted elements
|
|
52
|
+
md-fetch-screen https://example.com --hide ".ad,.popup"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Key Parameters
|
|
56
|
+
|
|
57
|
+
### md-fetch
|
|
58
|
+
- `-o, --output <file>` - Save to file (otherwise stdout)
|
|
59
|
+
- `-b, --browser` - Use headless browser for SPA pages
|
|
60
|
+
- `-R, --no-readability` - Keep full HTML content
|
|
61
|
+
- `-s, --selector <css>` - Custom CSS selector
|
|
62
|
+
- `--wait-until <event>` - Browser wait: `load|networkidle0|networkidle2`
|
|
63
|
+
- `--proxy <url>` - Proxy server
|
|
64
|
+
- `--verbose` - Verbose logging
|
|
65
|
+
|
|
66
|
+
### md-fetch-screen
|
|
67
|
+
- `--viewport` - Viewport-only screenshot (vs full page)
|
|
68
|
+
- `-W, --width <pixels>` - Viewport width (default: 1920)
|
|
69
|
+
- `-H, --height <pixels>` - Viewport height (default: 1080)
|
|
70
|
+
- `--scale <1|2|3>` - Device scale factor for high-DPI
|
|
71
|
+
- `--selector <css>` - Screenshot specific element
|
|
72
|
+
- `--hide <selectors>` - Hide elements (comma-separated)
|
|
73
|
+
- `--format <png|jpeg|webp>` - Image format
|
|
74
|
+
- `--delay <ms>` - Delay before screenshot
|
|
75
|
+
- `--output <dir>` - Output directory (default: current)
|
|
76
|
+
|
|
77
|
+
## Output Format
|
|
78
|
+
|
|
79
|
+
**Markdown** includes YAML frontmatter:
|
|
80
|
+
```markdown
|
|
81
|
+
---
|
|
82
|
+
title: "Page Title"
|
|
83
|
+
url: https://example.com
|
|
84
|
+
author: "Author Name"
|
|
85
|
+
publishedTime: 2024-01-01T00:00:00Z
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
# Content here...
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Screenshots** named as: `domain_path_timestamp.png`
|
|
92
|
+
|
|
93
|
+
## Common Scenarios
|
|
94
|
+
|
|
95
|
+
**SPA pages (React/Vue/Angular):**
|
|
96
|
+
```bash
|
|
97
|
+
md-fetch -b https://app.com --wait-until networkidle0 -o app.md
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Specific content section:**
|
|
101
|
+
```bash
|
|
102
|
+
md-fetch https://blog.com/post -s "article.post-content" -o post.md
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**High-quality screenshot without ads:**
|
|
106
|
+
```bash
|
|
107
|
+
md-fetch-screen https://site.com --scale 2 --hide ".ad,.banner"
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Using proxy:**
|
|
111
|
+
```bash
|
|
112
|
+
export HTTPS_PROXY=http://proxy.example.com:8080
|
|
113
|
+
md-fetch https://example.com -o output.md
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Batch processing:**
|
|
117
|
+
```bash
|
|
118
|
+
for url in url1 url2 url3; do
|
|
119
|
+
md-fetch "$url" -o "${url##*/}.md"
|
|
120
|
+
done
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Reference
|
|
124
|
+
|
|
125
|
+
For complete CLI options and advanced usage, see [cli-reference.md](references/cli-reference.md).
|
|
126
|
+
|
|
127
|
+
## Tips
|
|
128
|
+
|
|
129
|
+
- Use `-b` (browser mode) only for SPA/dynamic pages
|
|
130
|
+
- Test CSS selectors in browser DevTools first
|
|
131
|
+
- `networkidle0` waits longest but is most reliable
|
|
132
|
+
- Add `--verbose` for debugging
|
|
133
|
+
- Screenshots auto-retry 3x with exponential backoff
|