md-fetch 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/AGENTS.md +212 -0
  2. package/LICENSE +21 -0
  3. package/README.md +449 -0
  4. package/README.zh-CN.md +449 -0
  5. package/dist/cli.d.ts +27 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +158 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/constants.d.ts +9 -0
  10. package/dist/constants.d.ts.map +1 -0
  11. package/dist/constants.js +15 -0
  12. package/dist/constants.js.map +1 -0
  13. package/dist/core/browser.d.ts +23 -0
  14. package/dist/core/browser.d.ts.map +1 -0
  15. package/dist/core/browser.js +125 -0
  16. package/dist/core/browser.js.map +1 -0
  17. package/dist/core/converter.d.ts +18 -0
  18. package/dist/core/converter.d.ts.map +1 -0
  19. package/dist/core/converter.js +74 -0
  20. package/dist/core/converter.js.map +1 -0
  21. package/dist/core/extractor.d.ts +28 -0
  22. package/dist/core/extractor.d.ts.map +1 -0
  23. package/dist/core/extractor.js +151 -0
  24. package/dist/core/extractor.js.map +1 -0
  25. package/dist/core/fetcher.d.ts +24 -0
  26. package/dist/core/fetcher.d.ts.map +1 -0
  27. package/dist/core/fetcher.js +111 -0
  28. package/dist/core/fetcher.js.map +1 -0
  29. package/dist/core/processor.d.ts +22 -0
  30. package/dist/core/processor.d.ts.map +1 -0
  31. package/dist/core/processor.js +104 -0
  32. package/dist/core/processor.js.map +1 -0
  33. package/dist/core/screenshotter.d.ts +31 -0
  34. package/dist/core/screenshotter.d.ts.map +1 -0
  35. package/dist/core/screenshotter.js +222 -0
  36. package/dist/core/screenshotter.js.map +1 -0
  37. package/dist/index.d.ts +3 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +14 -0
  40. package/dist/index.js.map +1 -0
  41. package/dist/screen-cli.d.ts +26 -0
  42. package/dist/screen-cli.d.ts.map +1 -0
  43. package/dist/screen-cli.js +196 -0
  44. package/dist/screen-cli.js.map +1 -0
  45. package/dist/screen.d.ts +3 -0
  46. package/dist/screen.d.ts.map +1 -0
  47. package/dist/screen.js +14 -0
  48. package/dist/screen.js.map +1 -0
  49. package/dist/types/index.d.ts +151 -0
  50. package/dist/types/index.d.ts.map +1 -0
  51. package/dist/types/index.js +42 -0
  52. package/dist/types/index.js.map +1 -0
  53. package/dist/utils/filename-sanitizer.d.ts +38 -0
  54. package/dist/utils/filename-sanitizer.d.ts.map +1 -0
  55. package/dist/utils/filename-sanitizer.js +79 -0
  56. package/dist/utils/filename-sanitizer.js.map +1 -0
  57. package/dist/utils/frontmatter.d.ts +6 -0
  58. package/dist/utils/frontmatter.d.ts.map +1 -0
  59. package/dist/utils/frontmatter.js +65 -0
  60. package/dist/utils/frontmatter.js.map +1 -0
  61. package/package.json +56 -0
  62. package/skills/md-fetch/SKILL.md +133 -0
  63. package/skills/md-fetch/references/cli-reference.md +257 -0
  64. package/src/cli.ts +169 -0
  65. package/src/constants.ts +17 -0
  66. package/src/core/browser.ts +161 -0
  67. package/src/core/converter.ts +82 -0
  68. package/src/core/extractor.ts +172 -0
  69. package/src/core/fetcher.ts +143 -0
  70. package/src/core/processor.ts +124 -0
  71. package/src/core/screenshotter.ts +289 -0
  72. package/src/index.ts +15 -0
  73. package/src/screen-cli.ts +216 -0
  74. package/src/screen.ts +15 -0
  75. package/src/types/index.ts +227 -0
  76. package/src/utils/filename-sanitizer.ts +88 -0
  77. package/src/utils/frontmatter.ts +81 -0
  78. package/tsconfig.json +20 -0
package/dist/screen.js ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env node
2
+ import { ScreenCLI } from './screen-cli.js';
3
+ async function main() {
4
+ try {
5
+ const cli = new ScreenCLI();
6
+ await cli.run(process.argv);
7
+ }
8
+ catch (error) {
9
+ console.error('Fatal error:', error.message);
10
+ process.exit(1);
11
+ }
12
+ }
13
+ main();
14
+ //# sourceMappingURL=screen.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"screen.js","sourceRoot":"","sources":["../src/screen.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,cAAc,EAAG,KAAe,CAAC,OAAO,CAAC,CAAC;QACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC"}
@@ -0,0 +1,151 @@
1
+ import type { PuppeteerLifeCycleEvent } from 'puppeteer-core';
2
+ export interface FetchOptions {
3
+ headers?: Record<string, string>;
4
+ proxy?: string;
5
+ timeout?: number;
6
+ userAgent?: string;
7
+ }
8
+ export interface BrowserOptions {
9
+ executablePath?: string;
10
+ waitUntil?: PuppeteerLifeCycleEvent;
11
+ timeout?: number;
12
+ userAgent?: string;
13
+ proxy?: string;
14
+ headless?: boolean;
15
+ }
16
+ export interface ExtractOptions {
17
+ useReadability: boolean;
18
+ selector?: string;
19
+ }
20
+ export interface PageMetadata {
21
+ url: string;
22
+ title?: string;
23
+ description?: string;
24
+ author?: string;
25
+ publishedTime?: string;
26
+ modifiedTime?: string;
27
+ siteName?: string;
28
+ keywords?: string[];
29
+ image?: string;
30
+ lang?: string;
31
+ }
32
+ export interface ExtractedContent {
33
+ content: string;
34
+ metadata: PageMetadata;
35
+ }
36
+ export interface ConversionOptions {
37
+ headingStyle?: 'atx' | 'setext';
38
+ codeBlockStyle?: 'fenced' | 'indented';
39
+ bulletListMarker?: '-' | '+' | '*';
40
+ }
41
+ export interface ProcessOptions {
42
+ useBrowser: boolean;
43
+ useReadability: boolean;
44
+ selector?: string;
45
+ fetchOptions: FetchOptions;
46
+ browserOptions?: BrowserOptions;
47
+ conversionOptions?: ConversionOptions;
48
+ verbose?: boolean;
49
+ }
50
+ export interface FetchResult {
51
+ url: string;
52
+ markdown?: string;
53
+ error?: Error;
54
+ success: boolean;
55
+ }
56
+ export interface CLIOptions {
57
+ output?: string;
58
+ browser?: boolean;
59
+ browserPath?: string;
60
+ readability?: boolean;
61
+ selector?: string;
62
+ file?: string;
63
+ header?: string[];
64
+ proxy?: string;
65
+ timeout?: number;
66
+ config?: string;
67
+ userAgent?: string;
68
+ waitUntil?: string;
69
+ concurrent?: number;
70
+ verbose?: boolean;
71
+ }
72
+ export interface Config {
73
+ browser?: {
74
+ executablePath?: string;
75
+ waitUntil?: string;
76
+ };
77
+ fetch?: {
78
+ timeout?: number;
79
+ headers?: Record<string, string>;
80
+ proxy?: string;
81
+ };
82
+ conversion?: {
83
+ headingStyle?: 'atx' | 'setext';
84
+ codeBlockStyle?: 'fenced' | 'indented';
85
+ bulletListMarker?: '-' | '+' | '*';
86
+ };
87
+ defaults?: {
88
+ useReadability?: boolean;
89
+ concurrent?: number;
90
+ };
91
+ }
92
+ export declare class FetchError extends Error {
93
+ url: string;
94
+ statusCode?: number | undefined;
95
+ constructor(url: string, statusCode?: number | undefined, message?: string);
96
+ }
97
+ export declare class BrowserError extends Error {
98
+ url: string;
99
+ constructor(url: string, message: string);
100
+ }
101
+ export declare class ExtractionError extends Error {
102
+ url: string;
103
+ constructor(url: string, message: string);
104
+ }
105
+ export declare class ValidationError extends Error {
106
+ constructor(message: string);
107
+ }
108
+ export interface ScreenshotOptions {
109
+ fullPage: boolean;
110
+ width: number;
111
+ height: number;
112
+ deviceScaleFactor: number;
113
+ outputDir: string;
114
+ format: 'png' | 'jpeg' | 'webp';
115
+ quality?: number;
116
+ browserOptions: BrowserOptions;
117
+ delay?: number;
118
+ selector?: string;
119
+ hideSelectors?: string[];
120
+ verbose?: boolean;
121
+ }
122
+ export interface ScreenshotCLIOptions {
123
+ fullPage?: boolean;
124
+ viewport?: boolean;
125
+ width?: number;
126
+ height?: number;
127
+ scale?: number;
128
+ output?: string;
129
+ format?: string;
130
+ quality?: number;
131
+ browserPath?: string;
132
+ waitUntil?: string;
133
+ timeout?: number;
134
+ userAgent?: string;
135
+ proxy?: string;
136
+ delay?: number;
137
+ selector?: string;
138
+ hide?: string;
139
+ verbose?: boolean;
140
+ }
141
+ export interface ScreenshotResult {
142
+ url: string;
143
+ filepath?: string;
144
+ success: boolean;
145
+ error?: Error;
146
+ }
147
+ export declare class ScreenshotError extends Error {
148
+ url: string;
149
+ constructor(url: string, message: string);
150
+ }
151
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAG9D,MAAM,WAAW,YAAY;IAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAGD,MAAM,WAAW,cAAc;IAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,uBAAuB,CAAC;IACpC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAGD,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAGD,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,YAAY,CAAC;CACxB;AAGD,MAAM,WAAW,iBAAiB;IAChC,YAAY,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAChC,cAAc,CAAC,EAAE,QAAQ,GAAG,UAAU,CAAC;IACvC,gBAAgB,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;CACpC;AAGD,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,OAAO,CAAC;IACpB,cAAc,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,YAAY,CAAC;IAC3B,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,OAAO,EAAE,OAAO,CAAC;CAClB;AAGD,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,MAAM;IACrB,OAAO,CAAC,EAAE;QACR,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,KAAK,CAAC,EAAE;QACN,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,UAAU,CAAC,EAAE;QACX,YAAY,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;QAChC,cAAc,CAAC,EAAE,QAAQ,GAAG,UAAU,CAAC;QACvC,gBAAgB,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;KACpC,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAGD,qBAAa,UAAW,SAAQ,KAAK;IAE1B,GAAG,EAAE,MAAM;IACX,UAAU,CAAC,EAAE,MAAM;gBADnB,GAAG,EAAE,MAAM,EACX,UAAU,CAAC,EAAE,MAAM,YAAA,EAC1B,OAAO,CAAC,EAAE,MAAM;CAKnB;AAED,qBAAa,YAAa,SAAQ,KAAK;IAE5B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB;AAED,qBAAa,eAAgB,SAAQ,KAAK;IAE/B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB;AAED,qBAAa,eAAgB,SAAQ,KAAK;gBAC5B,OAAO,EAAE,MAAM;CAI5B;AAKD,MAAM,WAAW,iBAAiB;IAEhC,QAAQ,EAAE,OAAO,CAAC;IAGlB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,MAAM,CAAC;IAG1B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;IAChC,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,cAAc,EAAE,cAAc,CAAC;IAG/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IAGzB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,oBAAoB;IAEnC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IAGf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IAGf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IAGd,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAGD,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,qBAAa,eAAgB,SAAQ,KAAK;IAE/B,GAAG,EAAE,MAAM;gBAAX,GAAG,EAAE,MAAM,EAClB,OAAO,EAAE,MAAM;CAKlB"}
@@ -0,0 +1,42 @@
1
+ // 自定义错误类型
2
+ export class FetchError extends Error {
3
+ url;
4
+ statusCode;
5
+ constructor(url, statusCode, message) {
6
+ super(message || `Failed to fetch ${url}`);
7
+ this.url = url;
8
+ this.statusCode = statusCode;
9
+ this.name = 'FetchError';
10
+ }
11
+ }
12
+ export class BrowserError extends Error {
13
+ url;
14
+ constructor(url, message) {
15
+ super(message);
16
+ this.url = url;
17
+ this.name = 'BrowserError';
18
+ }
19
+ }
20
+ export class ExtractionError extends Error {
21
+ url;
22
+ constructor(url, message) {
23
+ super(message);
24
+ this.url = url;
25
+ this.name = 'ExtractionError';
26
+ }
27
+ }
28
+ export class ValidationError extends Error {
29
+ constructor(message) {
30
+ super(message);
31
+ this.name = 'ValidationError';
32
+ }
33
+ }
34
+ export class ScreenshotError extends Error {
35
+ url;
36
+ constructor(url, message) {
37
+ super(message);
38
+ this.url = url;
39
+ this.name = 'ScreenshotError';
40
+ }
41
+ }
42
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAgHA,UAAU;AACV,MAAM,OAAO,UAAW,SAAQ,KAAK;IAE1B;IACA;IAFT,YACS,GAAW,EACX,UAAmB,EAC1B,OAAgB;QAEhB,KAAK,CAAC,OAAO,IAAI,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAJpC,QAAG,GAAH,GAAG,CAAQ;QACX,eAAU,GAAV,UAAU,CAAS;QAI1B,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,KAAK;IAE5B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAE/B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF;AAED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IACxC,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF;AAqED,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAE/B;IADT,YACS,GAAW,EAClB,OAAe;QAEf,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,QAAG,GAAH,GAAG,CAAQ;QAIlB,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF"}
@@ -0,0 +1,38 @@
1
+ /**
2
+ * 文件名安全化工具
3
+ * 用于从 URL 生成安全的文件名
4
+ */
5
+ /**
6
+ * 从 URL 生成安全的文件名
7
+ * 格式: <URL安全化前50字符>_<年月日时分秒>.png
8
+ *
9
+ * @param url - 原始URL
10
+ * @param extension - 文件扩展名(默认 'png')
11
+ * @returns 安全的文件名
12
+ *
13
+ * @example
14
+ * generateScreenshotFilename('https://github.com/user/repo/issues/123')
15
+ * // => 'github.com_user_repo_issues_123_20251229143025.png'
16
+ */
17
+ export declare function generateScreenshotFilename(url: string, extension?: string): string;
18
+ /**
19
+ * 将 URL 转换为安全的文件名前缀
20
+ * - 保留域名
21
+ * - 替换非法字符为下划线
22
+ * - 截取前50个字符
23
+ *
24
+ * @param url - 原始URL
25
+ * @returns 安全化后的文件名前缀
26
+ */
27
+ export declare function sanitizeUrlForFilename(url: string): string;
28
+ /**
29
+ * 生成时间戳字符串
30
+ * 格式: YYYYMMDDHHMMSS
31
+ *
32
+ * @returns 时间戳字符串
33
+ *
34
+ * @example
35
+ * generateTimestamp() // => '20251229143025'
36
+ */
37
+ export declare function generateTimestamp(): string;
38
+ //# sourceMappingURL=filename-sanitizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"filename-sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/filename-sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;;;;;;GAWG;AACH,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,MAAc,GAAG,MAAM,CAKzF;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAgC1D;AAED;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,IAAI,MAAM,CAW1C"}
@@ -0,0 +1,79 @@
1
+ /**
2
+ * 文件名安全化工具
3
+ * 用于从 URL 生成安全的文件名
4
+ */
5
+ /**
6
+ * 从 URL 生成安全的文件名
7
+ * 格式: <URL安全化前50字符>_<年月日时分秒>.png
8
+ *
9
+ * @param url - 原始URL
10
+ * @param extension - 文件扩展名(默认 'png')
11
+ * @returns 安全的文件名
12
+ *
13
+ * @example
14
+ * generateScreenshotFilename('https://github.com/user/repo/issues/123')
15
+ * // => 'github.com_user_repo_issues_123_20251229143025.png'
16
+ */
17
+ export function generateScreenshotFilename(url, extension = 'png') {
18
+ const safeName = sanitizeUrlForFilename(url);
19
+ const timestamp = generateTimestamp();
20
+ return `${safeName}_${timestamp}.${extension}`;
21
+ }
22
+ /**
23
+ * 将 URL 转换为安全的文件名前缀
24
+ * - 保留域名
25
+ * - 替换非法字符为下划线
26
+ * - 截取前50个字符
27
+ *
28
+ * @param url - 原始URL
29
+ * @returns 安全化后的文件名前缀
30
+ */
31
+ export function sanitizeUrlForFilename(url) {
32
+ try {
33
+ const urlObj = new URL(url);
34
+ // 构建完整的文件名部分:域名 + 路径 + 查询参数
35
+ let fullPart = urlObj.hostname + urlObj.pathname + urlObj.search;
36
+ // 安全化处理
37
+ const safeName = fullPart
38
+ // 替换非法文件名字符为下划线
39
+ .replace(/[<>:"/\\|?*\x00-\x1F]/g, '_')
40
+ // 替换多个连续点为单个点(保留域名中的点)
41
+ .replace(/\.{2,}/g, '.')
42
+ // 替换空格为下划线
43
+ .replace(/\s+/g, '_')
44
+ // 替换多个连续斜杠和下划线为单个下划线
45
+ .replace(/[/_]+/g, '_')
46
+ // 去除开头和结尾的点和下划线
47
+ .replace(/^[._]+|[._]+$/g, '');
48
+ // 截取前50个字符
49
+ const truncated = safeName.slice(0, 50);
50
+ // 去除末尾的下划线或点
51
+ const cleaned = truncated.replace(/[._]+$/, '');
52
+ // 如果清理后为空,使用默认值
53
+ return cleaned || 'screenshot';
54
+ }
55
+ catch (error) {
56
+ // URL 解析失败,使用默认值
57
+ return 'screenshot';
58
+ }
59
+ }
60
+ /**
61
+ * 生成时间戳字符串
62
+ * 格式: YYYYMMDDHHMMSS
63
+ *
64
+ * @returns 时间戳字符串
65
+ *
66
+ * @example
67
+ * generateTimestamp() // => '20251229143025'
68
+ */
69
+ export function generateTimestamp() {
70
+ const now = new Date();
71
+ const year = now.getFullYear();
72
+ const month = String(now.getMonth() + 1).padStart(2, '0');
73
+ const day = String(now.getDate()).padStart(2, '0');
74
+ const hours = String(now.getHours()).padStart(2, '0');
75
+ const minutes = String(now.getMinutes()).padStart(2, '0');
76
+ const seconds = String(now.getSeconds()).padStart(2, '0');
77
+ return `${year}${month}${day}${hours}${minutes}${seconds}`;
78
+ }
79
+ //# sourceMappingURL=filename-sanitizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"filename-sanitizer.js","sourceRoot":"","sources":["../../src/utils/filename-sanitizer.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,0BAA0B,CAAC,GAAW,EAAE,YAAoB,KAAK;IAC/E,MAAM,QAAQ,GAAG,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,iBAAiB,EAAE,CAAC;IAEtC,OAAO,GAAG,QAAQ,IAAI,SAAS,IAAI,SAAS,EAAE,CAAC;AACjD,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,sBAAsB,CAAC,GAAW;IAChD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAE5B,4BAA4B;QAC5B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC;QAEjE,QAAQ;QACR,MAAM,QAAQ,GAAG,QAAQ;YACvB,gBAAgB;aACf,OAAO,CAAC,wBAAwB,EAAE,GAAG,CAAC;YACvC,uBAAuB;aACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;YACxB,WAAW;aACV,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;YACrB,qBAAqB;aACpB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;YACvB,gBAAgB;aACf,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;QAEjC,WAAW;QACX,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEhD,gBAAgB;QAChB,OAAO,OAAO,IAAI,YAAY,CAAC;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,iBAAiB;QACjB,OAAO,YAAY,CAAC;IACtB,CAAC;AACH,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,iBAAiB;IAC/B,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IAEvB,MAAM,IAAI,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAE1D,OAAO,GAAG,IAAI,GAAG,KAAK,GAAG,GAAG,GAAG,KAAK,GAAG,OAAO,GAAG,OAAO,EAAE,CAAC;AAC7D,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { PageMetadata } from '../types/index.js';
2
+ /**
3
+ * Generate YAML frontmatter from page metadata
4
+ */
5
+ export declare function generateFrontmatter(metadata: PageMetadata): string;
6
+ //# sourceMappingURL=frontmatter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"frontmatter.d.ts","sourceRoot":"","sources":["../../src/utils/frontmatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,CAmDlE"}
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Generate YAML frontmatter from page metadata
3
+ */
4
+ export function generateFrontmatter(metadata) {
5
+ const lines = ['---'];
6
+ // Add fields in a logical order
7
+ if (metadata.title) {
8
+ lines.push(`title: ${escapeYamlString(metadata.title)}`);
9
+ }
10
+ if (metadata.url) {
11
+ lines.push(`url: ${metadata.url}`);
12
+ }
13
+ if (metadata.description) {
14
+ lines.push(`description: ${escapeYamlString(metadata.description)}`);
15
+ }
16
+ if (metadata.author) {
17
+ lines.push(`author: ${escapeYamlString(metadata.author)}`);
18
+ }
19
+ if (metadata.siteName) {
20
+ lines.push(`siteName: ${escapeYamlString(metadata.siteName)}`);
21
+ }
22
+ if (metadata.publishedTime) {
23
+ lines.push(`publishedTime: ${metadata.publishedTime}`);
24
+ }
25
+ if (metadata.modifiedTime) {
26
+ lines.push(`modifiedTime: ${metadata.modifiedTime}`);
27
+ }
28
+ if (metadata.keywords && metadata.keywords.length > 0) {
29
+ lines.push('keywords:');
30
+ metadata.keywords.forEach(keyword => {
31
+ lines.push(` - ${escapeYamlString(keyword)}`);
32
+ });
33
+ }
34
+ if (metadata.image) {
35
+ lines.push(`image: ${metadata.image}`);
36
+ }
37
+ if (metadata.lang) {
38
+ lines.push(`lang: ${metadata.lang}`);
39
+ }
40
+ lines.push('---');
41
+ lines.push(''); // Add blank line after frontmatter
42
+ return lines.join('\n');
43
+ }
44
+ /**
45
+ * Escape special characters in YAML strings
46
+ */
47
+ function escapeYamlString(str) {
48
+ // If string contains special characters, quote it
49
+ if (str.includes(':') ||
50
+ str.includes('#') ||
51
+ str.includes('[') ||
52
+ str.includes(']') ||
53
+ str.includes('{') ||
54
+ str.includes('}') ||
55
+ str.includes('\n') ||
56
+ str.includes('"') ||
57
+ str.includes("'") ||
58
+ str.startsWith(' ') ||
59
+ str.endsWith(' ')) {
60
+ // Use double quotes and escape internal quotes
61
+ return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
62
+ }
63
+ return str;
64
+ }
65
+ //# sourceMappingURL=frontmatter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"frontmatter.js","sourceRoot":"","sources":["../../src/utils/frontmatter.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAsB;IACxD,MAAM,KAAK,GAAa,CAAC,KAAK,CAAC,CAAC;IAEhC,gCAAgC;IAChC,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,gBAAgB,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,QAAQ,QAAQ,CAAC,GAAG,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,QAAQ,CAAC,WAAW,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,gBAAgB,gBAAgB,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;IACvE,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,WAAW,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,aAAa,gBAAgB,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,QAAQ,CAAC,aAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,CAAC,aAAa,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;QAC1B,KAAK,CAAC,IAAI,CAAC,iBAAiB,QAAQ,CAAC,YAAY,EAAE,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxB,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;YAClC,KAAK,CAAC,IAAI,CAAC,OAAO,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC;IAED,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,SAAS,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,mCAAmC;IAEnD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAW;IACnC,kDAAkD;IAClD,IACE,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC;QAClB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC;QACjB,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;QACnB,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EACjB,CAAC;QACD,+CAA+C;QAC/C,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC;IAChE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
package/package.json ADDED
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "md-fetch",
3
+ "version": "1.0.0",
4
+ "description": "Convert web pages to clean Markdown using fetch, readability, and turndown",
5
+ "type": "module",
6
+ "repository": {
7
+ "type": "github",
8
+ "url": "https://github.com/fairjm/md-fetch"
9
+ },
10
+ "bin": {
11
+ "md-fetch": "./dist/index.js",
12
+ "md-fetch-screen": "./dist/screen.js"
13
+ },
14
+ "exports": {
15
+ ".": "./dist/index.js"
16
+ },
17
+ "scripts": {
18
+ "build": "tsc",
19
+ "dev": "tsx src/index.ts",
20
+ "test": "vitest",
21
+ "prepublishOnly": "pnpm run build"
22
+ },
23
+ "keywords": [
24
+ "markdown",
25
+ "cli",
26
+ "fetch",
27
+ "readability",
28
+ "web-scraping",
29
+ "html-to-markdown"
30
+ ],
31
+ "author": {
32
+ "name": "fairjm",
33
+ "url": "https://bingowith.me"
34
+ },
35
+ "license": "MIT",
36
+ "engines": {
37
+ "node": ">=18.0.0"
38
+ },
39
+ "packageManager": "pnpm@9.0.0",
40
+ "dependencies": {
41
+ "@mozilla/readability": "^0.5.0",
42
+ "commander": "^12.0.0",
43
+ "jsdom": "^24.0.0",
44
+ "puppeteer-core": "^22.0.0",
45
+ "turndown": "^7.2.0",
46
+ "undici": "^7.16.0"
47
+ },
48
+ "devDependencies": {
49
+ "@types/jsdom": "^27.0.0",
50
+ "@types/node": "^20.0.0",
51
+ "@types/turndown": "^5.0.0",
52
+ "tsx": "^4.7.0",
53
+ "typescript": "^5.3.0",
54
+ "vitest": "^1.2.0"
55
+ }
56
+ }
@@ -0,0 +1,133 @@
1
+ ---
2
+ name: md-fetch
3
+ description: Convert web pages to Markdown or take screenshots. Use when: (1) Converting web pages to Markdown with YAML frontmatter, (2) Taking screenshots of web pages, (3) Processing SPA/dynamic content, (4) Batch processing URLs. Triggers on "convert webpage", "save article", "fetch content", "take screenshot", or when mentioning md-fetch.
4
+ ---
5
+
6
+ # md-fetch
7
+
8
+ Convert web pages to Markdown or take high-quality screenshots.
9
+
10
+ ## Two Tools
11
+
12
+ - **md-fetch** - Convert web pages to Markdown with YAML frontmatter
13
+ - **md-fetch-screen** - Take screenshots
14
+
15
+ ## Common Usage
16
+
17
+ ### Convert to Markdown
18
+
19
+ ```bash
20
+ # Basic usage
21
+ md-fetch https://example.com -o article.md
22
+
23
+ # Browser mode for SPA/dynamic pages
24
+ md-fetch -b https://react-app.com -o app.md
25
+
26
+ # Custom CSS selector
27
+ md-fetch https://example.com -s "article.content" -o content.md
28
+
29
+ # Disable readability (keep full content)
30
+ md-fetch https://example.com -R -o full.md
31
+
32
+ # Multiple URLs
33
+ md-fetch https://site1.com https://site2.com
34
+ ```
35
+
36
+ ### Take Screenshots
37
+
38
+ ```bash
39
+ # Full page screenshot
40
+ md-fetch-screen https://example.com
41
+
42
+ # Viewport-only (1440x900)
43
+ md-fetch-screen https://example.com --viewport -W 1440 -H 900
44
+
45
+ # High-DPI (2x scale for Retina)
46
+ md-fetch-screen https://example.com --scale 2
47
+
48
+ # Screenshot specific element
49
+ md-fetch-screen https://example.com --selector "#main"
50
+
51
+ # Hide unwanted elements
52
+ md-fetch-screen https://example.com --hide ".ad,.popup"
53
+ ```
54
+
55
+ ## Key Parameters
56
+
57
+ ### md-fetch
58
+ - `-o, --output <file>` - Save to file (otherwise stdout)
59
+ - `-b, --browser` - Use headless browser for SPA pages
60
+ - `-R, --no-readability` - Keep full HTML content
61
+ - `-s, --selector <css>` - Custom CSS selector
62
+ - `--wait-until <event>` - Browser wait: `load|networkidle0|networkidle2`
63
+ - `--proxy <url>` - Proxy server
64
+ - `--verbose` - Verbose logging
65
+
66
+ ### md-fetch-screen
67
+ - `--viewport` - Viewport-only screenshot (vs full page)
68
+ - `-W, --width <pixels>` - Viewport width (default: 1920)
69
+ - `-H, --height <pixels>` - Viewport height (default: 1080)
70
+ - `--scale <1|2|3>` - Device scale factor for high-DPI
71
+ - `--selector <css>` - Screenshot specific element
72
+ - `--hide <selectors>` - Hide elements (comma-separated)
73
+ - `--format <png|jpeg|webp>` - Image format
74
+ - `--delay <ms>` - Delay before screenshot
75
+ - `--output <dir>` - Output directory (default: current)
76
+
77
+ ## Output Format
78
+
79
+ **Markdown** includes YAML frontmatter:
80
+ ```markdown
81
+ ---
82
+ title: "Page Title"
83
+ url: https://example.com
84
+ author: "Author Name"
85
+ publishedTime: 2024-01-01T00:00:00Z
86
+ ---
87
+
88
+ # Content here...
89
+ ```
90
+
91
+ **Screenshots** named as: `domain_path_timestamp.png`
92
+
93
+ ## Common Scenarios
94
+
95
+ **SPA pages (React/Vue/Angular):**
96
+ ```bash
97
+ md-fetch -b https://app.com --wait-until networkidle0 -o app.md
98
+ ```
99
+
100
+ **Specific content section:**
101
+ ```bash
102
+ md-fetch https://blog.com/post -s "article.post-content" -o post.md
103
+ ```
104
+
105
+ **High-quality screenshot without ads:**
106
+ ```bash
107
+ md-fetch-screen https://site.com --scale 2 --hide ".ad,.banner"
108
+ ```
109
+
110
+ **Using proxy:**
111
+ ```bash
112
+ export HTTPS_PROXY=http://proxy.example.com:8080
113
+ md-fetch https://example.com -o output.md
114
+ ```
115
+
116
+ **Batch processing:**
117
+ ```bash
118
+ for url in url1 url2 url3; do
119
+ md-fetch "$url" -o "${url##*/}.md"
120
+ done
121
+ ```
122
+
123
+ ## Reference
124
+
125
+ For complete CLI options and advanced usage, see [cli-reference.md](references/cli-reference.md).
126
+
127
+ ## Tips
128
+
129
+ - Use `-b` (browser mode) only for SPA/dynamic pages
130
+ - Test CSS selectors in browser DevTools first
131
+ - `networkidle0` waits longest but is most reliable
132
+ - Add `--verbose` for debugging
133
+ - Screenshots auto-retry 3x with exponential backoff