@lyhue1991/webfetch-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,118 @@
1
+ # webfetch-cli
2
+
3
+ 网页内容抓取工具,支持将网页转换为 Markdown、文本或 HTML 格式。可作为 CLI 工具或 OpenCode SKILL 使用。
4
+
5
+ ## 安装
6
+
7
+ ### 方式一:作为 SKILL 安装(推荐)
8
+
9
+ ```bash
10
+ npx skills add lyhue1991/webfetch-cli
11
+ ```
12
+
13
+ ### 方式二:作为 CLI 工具安装
14
+
15
+ ```bash
16
+ # 使用 npx 直接运行
17
+ npx @lyhue1991/webfetch-cli https://example.com
18
+
19
+ # 或全局安装
20
+ npm install -g @lyhue1991/webfetch-cli
21
+ webfetch https://example.com
22
+ ```
23
+
24
+ ## 使用方法
25
+
26
+ ```bash
27
+ webfetch <url> [options]
28
+ ```
29
+
30
+ ### 参数说明
31
+
32
+ | 参数 | 简写 | 说明 | 默认值 |
33
+ |------|------|------|--------|
34
+ | `--format` | `-f` | 输出格式:markdown, text, html | markdown |
35
+ | `--timeout` | `-t` | 超时时间(秒),最大 120 | 30 |
36
+ | `--output` | `-o` | 保存到指定文件 | - |
37
+ | `--quiet` | `-q` | 静默模式,仅输出内容 | false |
38
+ | `--proxy` | | 代理服务器地址 | - |
39
+ | `--insecure` | | 跳过 TLS 证书验证 | false |
40
+ | `--help` | `-h` | 显示帮助 | - |
41
+ | `--version` | `-v` | 显示版本 | - |
42
+
43
+ ### 示例
44
+
45
+ ```bash
46
+ # 抓取并转换为 Markdown(默认)
47
+ webfetch https://example.com
48
+
49
+ # 输出纯文本
50
+ webfetch https://example.com -f text
51
+
52
+ # 保存到文件
53
+ webfetch https://example.com -o output.md
54
+
55
+ # 静默模式(适合管道)
56
+ webfetch https://example.com -q > content.md
57
+
58
+ # 自定义超时
59
+ webfetch https://example.com --timeout 60
60
+
61
+ # 使用代理
62
+ webfetch https://example.com --proxy http://proxy:8080
63
+
64
+ # 跳过证书验证(自签名证书场景)
65
+ webfetch https://example.com --insecure
66
+
67
+ # 抓取微信公众号文章
68
+ webfetch "https://mp.weixin.qq.com/s/xxx" -o article.md --insecure
69
+ ```
70
+
71
+ ## 代理支持
72
+
73
+ webfetch 自动读取环境变量代理配置:
74
+
75
+ - `HTTP_PROXY` / `http_proxy` - HTTP 请求代理
76
+ - `HTTPS_PROXY` / `https_proxy` - HTTPS 请求代理
77
+ - `NO_PROXY` / `no_proxy` - 跳过代理的主机列表
78
+
79
+ ```bash
80
+ # 通过环境变量设置代理
81
+ export HTTPS_PROXY=http://proxy.example.com:8080
82
+ webfetch https://example.com
83
+
84
+ # 命令行覆盖代理
85
+ webfetch https://example.com --proxy http://custom-proxy:8080
86
+
87
+ # 禁用本次请求的代理
88
+ webfetch https://example.com --proxy ""
89
+
90
+ # 处理代理证书问题
91
+ webfetch https://example.com --insecure
92
+ ```
93
+
94
+ ## 功能特性
95
+
96
+ - **多格式输出** - 转换 HTML 为 Markdown(默认)、纯文本或保留原始 HTML
97
+ - **智能内容处理** - 自动检测 Content-Type,正确处理非 HTML 响应
98
+ - **反爬虫策略** - Chrome UA 伪装,Cloudflare 挑战自动重试
99
+ - **安全限制** - 最大 5MB 响应限制,可配置超时
100
+ - **清洁输出** - 移除 script、style 等非内容元素
101
+ - **代理支持** - 自动环境变量检测,支持手动覆盖
102
+
103
+ ## 退出码
104
+
105
+ | 代码 | 说明 |
106
+ |------|------|
107
+ | 0 | 成功 |
108
+ | 1 | 用户错误(无效 URL、参数错误) |
109
+ | 2 | 网络错误(超时、DNS 解析失败) |
110
+ | 3 | 服务器错误(4xx、5xx 响应) |
111
+
112
+ ## 系统要求
113
+
114
+ - Node.js >= 18.0.0
115
+
116
+ ## 许可证
117
+
118
+ MIT
@@ -0,0 +1,4 @@
1
+ import { WebfetchOptions, WebfetchResult } from './types.js';
2
+ export declare function validateUrl(url: string): void;
3
+ export declare function webfetch(options: WebfetchOptions): Promise<WebfetchResult>;
4
+ //# sourceMappingURL=fetch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../src/fetch.ts"],"names":[],"mappings":"AACA,OAAO,EACL,eAAe,EACf,cAAc,EAGf,MAAM,YAAY,CAAA;AAoBnB,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAI7C;AAED,wBAAsB,QAAQ,CAAC,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC,CA+FhF"}
package/dist/fetch.js ADDED
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.validateUrl = validateUrl;
4
+ exports.webfetch = webfetch;
5
+ const undici_1 = require("undici");
6
+ const types_js_1 = require("./types.js");
7
+ const proxy_js_1 = require("./proxy.js");
8
+ const CHROME_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36';
9
+ const HONEST_USER_AGENT = 'webfetch-cli';
10
+ function buildAcceptHeader(format) {
11
+ switch (format) {
12
+ case 'markdown':
13
+ return 'text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1';
14
+ case 'text':
15
+ return 'text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1';
16
+ case 'html':
17
+ return 'text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1';
18
+ default:
19
+ return 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
20
+ }
21
+ }
22
+ function validateUrl(url) {
23
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
24
+ throw new Error('URL must start with http:// or https://');
25
+ }
26
+ }
27
+ async function webfetch(options) {
28
+ const { url, format, timeout, proxy: manualProxy, insecure } = options;
29
+ validateUrl(url);
30
+ const actualTimeout = Math.min(timeout, types_js_1.MAX_TIMEOUT) * 1000;
31
+ const controller = new AbortController();
32
+ const timeoutId = setTimeout(() => controller.abort(), actualTimeout);
33
+ // Configure proxy
34
+ const envProxyConfig = (0, proxy_js_1.detectProxyConfig)(url);
35
+ const proxyConfig = (0, proxy_js_1.mergeProxyConfig)(envProxyConfig, manualProxy, insecure);
36
+ const dispatcher = (0, proxy_js_1.createProxyAgent)(proxyConfig, url);
37
+ const headers = {
38
+ 'User-Agent': CHROME_USER_AGENT,
39
+ Accept: buildAcceptHeader(format),
40
+ 'Accept-Language': 'en-US,en;q=0.9',
41
+ };
42
+ let response;
43
+ try {
44
+ response = await (0, undici_1.fetch)(url, {
45
+ headers,
46
+ signal: controller.signal,
47
+ dispatcher,
48
+ });
49
+ }
50
+ catch (error) {
51
+ clearTimeout(timeoutId);
52
+ if (error instanceof Error && error.name === 'AbortError') {
53
+ throw new Error(`Request timed out after ${timeout} seconds`);
54
+ }
55
+ if (error instanceof Error) {
56
+ if (error.message.includes('getaddrinfo') || error.message.includes('ENOTFOUND')) {
57
+ const urlObj = new URL(url);
58
+ throw new Error(`Failed to resolve host: ${urlObj.hostname}`);
59
+ }
60
+ if (error.message.includes('ECONNREFUSED')) {
61
+ throw new Error(`Connection refused: ${url}`);
62
+ }
63
+ if (error.message.includes('certificate') || error.message.includes('SSL') || error.message.includes('TLS')) {
64
+ throw new Error(`SSL/TLS certificate error. Try using --insecure flag to skip certificate verification`);
65
+ }
66
+ if (error.message.includes('Proxy')) {
67
+ throw new Error(`Proxy error: ${error.message}`);
68
+ }
69
+ throw new Error(`Network error: ${error.message}`);
70
+ }
71
+ throw error;
72
+ }
73
+ clearTimeout(timeoutId);
74
+ // Cloudflare challenge detection - retry with honest UA
75
+ if (response.status === 403 && response.headers.get('cf-mitigated') === 'challenge') {
76
+ const retryController = new AbortController();
77
+ const retryTimeoutId = setTimeout(() => retryController.abort(), actualTimeout);
78
+ headers['User-Agent'] = HONEST_USER_AGENT;
79
+ try {
80
+ response = await (0, undici_1.fetch)(url, {
81
+ headers,
82
+ signal: retryController.signal,
83
+ dispatcher,
84
+ });
85
+ }
86
+ catch (error) {
87
+ clearTimeout(retryTimeoutId);
88
+ throw error;
89
+ }
90
+ clearTimeout(retryTimeoutId);
91
+ }
92
+ if (!response.ok) {
93
+ throw new Error(`Request failed with status code: ${response.status}`);
94
+ }
95
+ const contentLength = response.headers.get('content-length');
96
+ if (contentLength && parseInt(contentLength) > types_js_1.MAX_RESPONSE_SIZE) {
97
+ throw new Error('Response too large (exceeds 5MB limit)');
98
+ }
99
+ const arrayBuffer = await response.arrayBuffer();
100
+ if (arrayBuffer.byteLength > types_js_1.MAX_RESPONSE_SIZE) {
101
+ throw new Error('Response too large (exceeds 5MB limit)');
102
+ }
103
+ const contentType = response.headers.get('content-type') || 'text/plain';
104
+ return {
105
+ content: new TextDecoder().decode(arrayBuffer),
106
+ contentType,
107
+ url,
108
+ };
109
+ }
110
+ //# sourceMappingURL=fetch.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch.js","sourceRoot":"","sources":["../src/fetch.ts"],"names":[],"mappings":";;AA0BA,kCAIC;AAED,4BA+FC;AA/HD,mCAA6C;AAC7C,yCAKmB;AACnB,yCAAkF;AAElF,MAAM,iBAAiB,GACrB,iHAAiH,CAAA;AACnH,MAAM,iBAAiB,GAAG,cAAc,CAAA;AAExC,SAAS,iBAAiB,CAAC,MAAc;IACvC,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,UAAU;YACb,OAAO,0FAA0F,CAAA;QACnG,KAAK,MAAM;YACT,OAAO,mEAAmE,CAAA;QAC5E,KAAK,MAAM;YACT,OAAO,gGAAgG,CAAA;QACzG;YACE,OAAO,iEAAiE,CAAA;IAC5E,CAAC;AACH,CAAC;AAED,SAAgB,WAAW,CAAC,GAAW;IACrC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAA;IAC5D,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,QAAQ,CAAC,OAAwB;IACrD,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAA;IAEtE,WAAW,CAAC,GAAG,CAAC,CAAA;IAEhB,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,sBAAW,CAAC,GAAG,IAAI,CAAA;IAC3D,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,aAAa,CAAC,CAAA;IAErE,kBAAkB;IAClB,MAAM,cAAc,GAAG,IAAA,4BAAiB,EAAC,GAAG,CAAC,CAAA;IAC7C,MAAM,WAAW,GAAG,IAAA,2BAAgB,EAAC,cAAc,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAA;IAC3E,MAAM,UAAU,GAAG,IAAA,2BAAgB,EAAC,WAAW,EAAE,GAAG,CAAC,CAAA;IAErD,MAAM,OAAO,GAA2B;QACtC,YAAY,EAAE,iBAAiB;QAC/B,MAAM,EAAE,iBAAiB,CAAC,MAAM,CAAC;QACjC,iBAAiB,EAAE,gBAAgB;KACpC,CAAA;IAED,IAAI,QAAkB,CAAA;IACtB,IAAI,CAAC;QACH,QAAQ,GAAG,MAAM,IAAA,cAAW,EAAC,GAAG,EAAE;YAChC,OAAO;YACP,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,UAAU;SACX,CAAC,CAAA;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,YAAY,CAAC,SAAS,CAAC,CAAA;QACvB,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YAC1D,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,UAAU,CAAC,CAAA;QAC/D,CAAC;QACD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACjF,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;gBAC3B,MAAM,IAAI,KAAK,CAAC,2BAA2B,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAA;YAC/D,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAA;YAC/C,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5G,MAAM,IAAI,KAAK,CACb,uFAAuF,CACxF,CAAA;YACH,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACpC,MAAM,IAAI,KAAK,CAAC,gBAAgB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;YAClD,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;QACpD,CAAC;QACD,MAAM,KAAK,CAAA;IACb,CAAC;IAED,YAAY,CAAC,SAAS,CAAC,CAAA;IAEvB,wDAAwD;IACxD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,KAAK,WAAW,EAAE,CAAC;QACpF,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAA;QAC7C,MAAM,cAAc,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,eAAe,CAAC,KAAK,EAAE,EAAE,aAAa,CAAC,CAAA;QAE/E,OAAO,CAAC,YAAY,CAAC,GAAG,iBAAiB,CAAA;QACzC,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,IAAA,cAAW,EAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,eAAe,CAAC,MAAM;gBAC9B,UAAU;aACX,CAAC,CAAA;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,YAAY,CAAC,cAAc,CAAC,CAAA;YAC5B,MAAM,KAAK,CAAA;QACb,CAAC;QACD,YAAY,CAAC,cAAc,CAAC,CAAA;IAC9B,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,oCAAoC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAA;IACxE,CAAC;IAED,MAAM,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAA;IAC5D,IAAI,aAAa,IAAI,QAAQ,CAAC,aAAa,CAAC,GAAG,4BAAiB,EAAE,CAAC;QACjE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;IAC3D,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAA;IAChD,IAAI,WAAW,CAAC,UAAU,GAAG,4BAAiB,EAAE,CAAC;QAC/C,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;IAC3D,CAAC;IAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY,CAAA;IAExE,OAAO;QACL,OAAO,EAAE,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC;QAC9C,WAAW;QACX,GAAG;KACJ,CAAA;AACH,CAAC"}
@@ -0,0 +1,6 @@
1
+ import { OutputFormat, WebfetchResult } from './types.js';
2
+ export declare function htmlToMarkdown(html: string): string;
3
+ export declare function htmlToText(html: string): string;
4
+ export declare function shouldConvert(contentType: string): boolean;
5
+ export declare function convertResult(result: WebfetchResult, format: OutputFormat): string;
6
+ //# sourceMappingURL=format.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../src/format.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AAczD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAQ/C;AAED,wBAAgB,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAG1D;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE,YAAY,GAAG,MAAM,CAkBlF"}
package/dist/format.js ADDED
@@ -0,0 +1,90 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.htmlToMarkdown = htmlToMarkdown;
40
+ exports.htmlToText = htmlToText;
41
+ exports.shouldConvert = shouldConvert;
42
+ exports.convertResult = convertResult;
43
+ const turndown_1 = __importDefault(require("turndown"));
44
+ const cheerio = __importStar(require("cheerio"));
45
+ function createTurndownService() {
46
+ const turndownService = new turndown_1.default({
47
+ headingStyle: 'atx',
48
+ hr: '---',
49
+ bulletListMarker: '-',
50
+ codeBlockStyle: 'fenced',
51
+ emDelimiter: '*',
52
+ });
53
+ turndownService.remove(['script', 'style', 'meta', 'link']);
54
+ return turndownService;
55
+ }
56
+ function htmlToMarkdown(html) {
57
+ const turndownService = createTurndownService();
58
+ return turndownService.turndown(html);
59
+ }
60
+ function htmlToText(html) {
61
+ const $ = cheerio.load(html);
62
+ // Remove script and style elements
63
+ $('script, style, noscript, iframe, object, embed').remove();
64
+ // Get text from body, normalize whitespace
65
+ const text = $('body').text();
66
+ // Normalize whitespace: collapse multiple spaces/newlines
67
+ return text.replace(/\s+/g, ' ').trim();
68
+ }
69
+ function shouldConvert(contentType) {
70
+ const mime = contentType.split(';')[0]?.trim().toLowerCase() || '';
71
+ return mime.includes('text/html') || mime.includes('application/xhtml+xml');
72
+ }
73
+ function convertResult(result, format) {
74
+ const { content, contentType } = result;
75
+ // If not HTML, return as-is
76
+ if (!shouldConvert(contentType)) {
77
+ return content;
78
+ }
79
+ switch (format) {
80
+ case 'markdown':
81
+ return htmlToMarkdown(content);
82
+ case 'text':
83
+ return htmlToText(content);
84
+ case 'html':
85
+ return content;
86
+ default:
87
+ return content;
88
+ }
89
+ }
90
+ //# sourceMappingURL=format.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"format.js","sourceRoot":"","sources":["../src/format.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgBA,wCAGC;AAED,gCAQC;AAED,sCAGC;AAED,sCAkBC;AAtDD,wDAAsC;AACtC,iDAAkC;AAGlC,SAAS,qBAAqB;IAC5B,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;QAC1C,YAAY,EAAE,KAAK;QACnB,EAAE,EAAE,KAAK;QACT,gBAAgB,EAAE,GAAG;QACrB,cAAc,EAAE,QAAQ;QACxB,WAAW,EAAE,GAAG;KACjB,CAAC,CAAA;IACF,eAAe,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;IAC3D,OAAO,eAAe,CAAA;AACxB,CAAC;AAED,SAAgB,cAAc,CAAC,IAAY;IACzC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;IAC/C,OAAO,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;AACvC,CAAC;AAED,SAAgB,UAAU,CAAC,IAAY;IACrC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC5B,mCAAmC;IACnC,CAAC,CAAC,gDAAgD,CAAC,CAAC,MAAM,EAAE,CAAA;IAC5D,2CAA2C;IAC3C,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAA;IAC7B,0DAA0D;IAC1D,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACzC,CAAC;AAED,SAAgB,aAAa,CAAC,WAAmB;IAC/C,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAA;IAClE,OAAO,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,uBAAuB,CAAC,CAAA;AAC7E,CAAC;AAED,SAAgB,aAAa,CAAC,MAAsB,EAAE,MAAoB;IACxE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,MAAM,CAAA;IAEvC,4BAA4B;IAC5B,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,EAAE,CAAC;QAChC,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,UAAU;YACb,OAAO,cAAc,CAAC,OAAO,CAAC,CAAA;QAChC,KAAK,MAAM;YACT,OAAO,UAAU,CAAC,OAAO,CAAC,CAAA;QAC5B,KAAK,MAAM;YACT,OAAO,OAAO,CAAA;QAChB;YACE,OAAO,OAAO,CAAA;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ const commander_1 = require("commander");
5
+ const zod_1 = require("zod");
6
+ const fetch_js_1 = require("./fetch.js");
7
+ const format_js_1 = require("./format.js");
8
+ const types_js_1 = require("./types.js");
9
+ const fs_1 = require("fs");
10
+ const formatSchema = zod_1.z.enum(['markdown', 'text', 'html']);
11
+ const program = new commander_1.Command()
12
+ .name('webfetch')
13
+ .description('Fetch and convert web content to markdown, text, or html')
14
+ .version('0.1.0')
15
+ .argument('<url>', 'URL to fetch')
16
+ .option('-f, --format <format>', 'Output format: markdown, text, html', 'markdown')
17
+ .option('-t, --timeout <seconds>', 'Timeout in seconds (max 120)', String(types_js_1.DEFAULT_TIMEOUT))
18
+ .option('-o, --output <file>', 'Write output to file')
19
+ .option('-q, --quiet', 'Only output content, no metadata', false)
20
+ .option('--proxy <url>', 'Proxy server URL (e.g., http://proxy:8080)')
21
+ .option('--insecure', 'Skip TLS certificate verification', false)
22
+ .addHelpText('after', `
23
+ Examples:
24
+ $ webfetch https://example.com
25
+ $ webfetch https://example.com -f text
26
+ $ webfetch https://example.com -o output.md
27
+ $ webfetch https://example.com --quiet > output.md
28
+
29
+ Proxy:
30
+ $ webfetch https://example.com --proxy http://proxy:8080
31
+ $ webfetch https://example.com --insecure
32
+
33
+ Environment variables:
34
+ HTTP_PROXY, HTTPS_PROXY - Proxy server URL
35
+ NO_PROXY - Comma-separated hosts to bypass proxy
36
+ `);
37
+ async function main() {
38
+ try {
39
+ program.parse(process.argv);
40
+ const options = program.opts();
41
+ const url = program.args[0];
42
+ if (!url) {
43
+ console.error('Error: URL is required');
44
+ process.exit(types_js_1.EXIT_CODES.USER_ERROR);
45
+ }
46
+ // Validate and parse options
47
+ const format = formatSchema.parse(options.format);
48
+ const timeout = Math.min(Math.max(1, parseInt(options.timeout)), 120);
49
+ const quiet = options.quiet;
50
+ const outputFile = options.output;
51
+ const proxy = options.proxy;
52
+ const insecure = options.insecure;
53
+ // Fetch the URL
54
+ const result = await (0, fetch_js_1.webfetch)({ url, format, timeout, proxy, insecure });
55
+ // Convert to requested format
56
+ const output = (0, format_js_1.convertResult)(result, format);
57
+ // Output
58
+ if (outputFile) {
59
+ (0, fs_1.writeFileSync)(outputFile, output);
60
+ if (!quiet) {
61
+ console.log(`Saved to ${outputFile}`);
62
+ }
63
+ }
64
+ else {
65
+ if (quiet) {
66
+ console.log(output);
67
+ }
68
+ else {
69
+ console.log(`URL: ${result.url}`);
70
+ console.log(`Content-Type: ${result.contentType}`);
71
+ console.log('---');
72
+ console.log(output);
73
+ }
74
+ }
75
+ process.exit(types_js_1.EXIT_CODES.SUCCESS);
76
+ }
77
+ catch (error) {
78
+ if (error instanceof zod_1.z.ZodError) {
79
+ console.error(`Error: Invalid format. Must be one of: markdown, text, html`);
80
+ process.exit(types_js_1.EXIT_CODES.USER_ERROR);
81
+ }
82
+ if (error instanceof Error) {
83
+ console.error(`Error: ${error.message}`);
84
+ if (error.cause) {
85
+ console.error(`Cause: ${error.cause}`);
86
+ }
87
+ // Determine exit code based on error type
88
+ if (error.message.includes('timed out') || error.message.includes('resolve host')) {
89
+ process.exit(types_js_1.EXIT_CODES.NETWORK_ERROR);
90
+ }
91
+ if (error.message.includes('status code')) {
92
+ process.exit(types_js_1.EXIT_CODES.SERVER_ERROR);
93
+ }
94
+ process.exit(types_js_1.EXIT_CODES.USER_ERROR);
95
+ }
96
+ console.error('An unexpected error occurred');
97
+ process.exit(types_js_1.EXIT_CODES.USER_ERROR);
98
+ }
99
+ }
100
+ main();
101
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAEA,yCAAmC;AACnC,6BAAuB;AACvB,yCAAqC;AACrC,2CAA2C;AAC3C,yCAImB;AACnB,2BAAkC;AAElC,MAAM,YAAY,GAAG,OAAC,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;AAEzD,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE;KAC1B,IAAI,CAAC,UAAU,CAAC;KAChB,WAAW,CAAC,0DAA0D,CAAC;KACvE,OAAO,CAAC,OAAO,CAAC;KAChB,QAAQ,CAAC,OAAO,EAAE,cAAc,CAAC;KACjC,MAAM,CAAC,uBAAuB,EAAE,qCAAqC,EAAE,UAAU,CAAC;KAClF,MAAM,CAAC,yBAAyB,EAAE,8BAA8B,EAAE,MAAM,CAAC,0BAAe,CAAC,CAAC;KAC1F,MAAM,CAAC,qBAAqB,EAAE,sBAAsB,CAAC;KACrD,MAAM,CAAC,aAAa,EAAE,kCAAkC,EAAE,KAAK,CAAC;KAChE,MAAM,CAAC,eAAe,EAAE,4CAA4C,CAAC;KACrE,MAAM,CAAC,YAAY,EAAE,mCAAmC,EAAE,KAAK,CAAC;KAChE,WAAW,CACV,OAAO,EACP;;;;;;;;;;;;;;CAcH,CACE,CAAA;AAEH,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QAC3B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9B,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAE3B,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAA;YACvC,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,UAAU,CAAC,CAAA;QACrC,CAAC;QAED,6BAA6B;QAC7B,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAiB,CAAA;QACjE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;QACrE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAgB,CAAA;QACtC,MAAM,UAAU,GAAG,OAAO,CAAC,MAA4B,CAAA;QACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAA2B,CAAA;QACjD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAmB,CAAA;QAE5C,gBAAgB;QAChB,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAA;QAExE,8BAA8B;QAC9B,MAAM,MAAM,GAAG,IAAA,yBAAa,EAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QAE5C,SAAS;QACT,IAAI,UAAU,EAAE,CAAC;YACf,IAAA,kBAAa,EAAC,UAAU,EAAE,MAAM,CAAC,CAAA;YACjC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,OAAO,CAAC,GAAG,CAAC,YAAY,UAAU,EAAE,CAAC,CAAA;YACvC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,KAAK,EAAE,CAAC;gBACV,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,QAAQ,MAAM,CAAC,GAAG,EAAE,CAAC,CAAA;gBACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,MAAM,CAAC,WAAW,EAAE,CAAC,CAAA;gBAClD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBAClB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;YACrB,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,OAAO,CAAC,CAAA;IAClC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,OAAC,CAAC,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,KAAK,CAAC,6DAA6D,CAAC,CAAA;YAC5E,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,UAAU,CAAC,CAAA;QACrC,CAAC;QACD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,UAAU,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;YACxC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,KAAK,CAAC,UAAU,KAAK,CAAC,KAAK,EAAE,CAAC,CAAA;YACxC,CAAC;YACD,0CAA0C;YAC1C,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;gBAClF,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,aAAa,CAAC,CAAA;YACxC,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBAC1C,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,YAAY,CAAC,CAAA;YACvC,CAAC;YACD,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,UAAU,CAAC,CAAA;QACrC,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAA;QAC7C,OAAO,CAAC,IAAI,CAAC,qBAAU,CAAC,UAAU,CAAC,CAAA;IACrC,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAA"}
@@ -0,0 +1,12 @@
1
+ import type { Dispatcher } from 'undici';
2
+ export interface ProxyConfig {
3
+ proxyUrl: string | null;
4
+ noProxy: string[];
5
+ insecure: boolean;
6
+ }
7
+ export declare function getProxyFromUrl(url: string): ProxyConfig;
8
+ export declare function detectProxyConfig(targetUrl: string): ProxyConfig;
9
+ export declare function shouldBypassProxy(targetUrl: string, noProxyList: string[]): boolean;
10
+ export declare function createProxyAgent(config: ProxyConfig, targetUrl: string): Dispatcher | undefined;
11
+ export declare function mergeProxyConfig(envConfig: ProxyConfig, manualProxy?: string, insecure?: boolean): ProxyConfig;
12
+ //# sourceMappingURL=proxy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proxy.d.ts","sourceRoot":"","sources":["../src/proxy.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAA;AAExC,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,QAAQ,EAAE,OAAO,CAAA;CAClB;AAWD,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,WAAW,CAYxD;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,CAgChE;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,CAqBnF;AAED,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAoB/F;AAED,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,WAAW,EACtB,WAAW,CAAC,EAAE,MAAM,EACpB,QAAQ,CAAC,EAAE,OAAO,GACjB,WAAW,CAMb"}
package/dist/proxy.js ADDED
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getProxyFromUrl = getProxyFromUrl;
4
+ exports.detectProxyConfig = detectProxyConfig;
5
+ exports.shouldBypassProxy = shouldBypassProxy;
6
+ exports.createProxyAgent = createProxyAgent;
7
+ exports.mergeProxyConfig = mergeProxyConfig;
8
+ const undici_1 = require("undici");
9
+ const PROXY_ENV_VARS = [
10
+ 'HTTPS_PROXY',
11
+ 'https_proxy',
12
+ 'HTTP_PROXY',
13
+ 'http_proxy',
14
+ ];
15
+ const NO_PROXY_ENV_VARS = ['NO_PROXY', 'no_proxy'];
16
+ function getProxyFromUrl(url) {
17
+ // Parse proxy URL
18
+ try {
19
+ const proxyUrl = new URL(url);
20
+ return {
21
+ proxyUrl: url,
22
+ noProxy: [],
23
+ insecure: false,
24
+ };
25
+ }
26
+ catch {
27
+ throw new Error(`Invalid proxy URL: ${url}`);
28
+ }
29
+ }
30
+ function detectProxyConfig(targetUrl) {
31
+ // Determine which proxy env var to use based on target URL
32
+ const isHttps = targetUrl.startsWith('https://');
33
+ let proxyUrl = null;
34
+ if (isHttps) {
35
+ proxyUrl = process.env.HTTPS_PROXY || process.env.https_proxy || null;
36
+ }
37
+ // Fallback to HTTP_PROXY if no HTTPS-specific proxy
38
+ if (!proxyUrl) {
39
+ proxyUrl =
40
+ process.env.HTTP_PROXY ||
41
+ process.env.http_proxy ||
42
+ process.env.HTTPS_PROXY ||
43
+ process.env.https_proxy ||
44
+ null;
45
+ }
46
+ // Parse NO_PROXY
47
+ const noProxyStr = NO_PROXY_ENV_VARS.map((v) => process.env[v]).find(Boolean) || '';
48
+ const noProxy = noProxyStr
49
+ .split(',')
50
+ .map((s) => s.trim())
51
+ .filter(Boolean);
52
+ return {
53
+ proxyUrl,
54
+ noProxy,
55
+ insecure: false,
56
+ };
57
+ }
58
+ function shouldBypassProxy(targetUrl, noProxyList) {
59
+ if (noProxyList.length === 0)
60
+ return false;
61
+ try {
62
+ const target = new URL(targetUrl);
63
+ const hostname = target.hostname;
64
+ for (const pattern of noProxyList) {
65
+ // Exact match
66
+ if (hostname === pattern)
67
+ return true;
68
+ // Wildcard match (e.g., *.example.com)
69
+ if (pattern.startsWith('.') && hostname.endsWith(pattern))
70
+ return true;
71
+ if (pattern.startsWith('*.') && hostname.endsWith(pattern.slice(1)))
72
+ return true;
73
+ // Simple suffix match
74
+ if (hostname.endsWith('.' + pattern))
75
+ return true;
76
+ }
77
+ return false;
78
+ }
79
+ catch {
80
+ return false;
81
+ }
82
+ }
83
+ function createProxyAgent(config, targetUrl) {
84
+ if (!config.proxyUrl)
85
+ return undefined;
86
+ if (shouldBypassProxy(targetUrl, config.noProxy)) {
87
+ return undefined;
88
+ }
89
+ return new undici_1.ProxyAgent({
90
+ uri: config.proxyUrl,
91
+ requestTls: config.insecure
92
+ ? {
93
+ rejectUnauthorized: false,
94
+ }
95
+ : undefined,
96
+ proxyTls: config.insecure
97
+ ? {
98
+ rejectUnauthorized: false,
99
+ }
100
+ : undefined,
101
+ });
102
+ }
103
+ function mergeProxyConfig(envConfig, manualProxy, insecure) {
104
+ return {
105
+ proxyUrl: manualProxy !== undefined ? (manualProxy || null) : envConfig.proxyUrl,
106
+ noProxy: envConfig.noProxy,
107
+ insecure: insecure ?? envConfig.insecure,
108
+ };
109
+ }
110
+ //# sourceMappingURL=proxy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proxy.js","sourceRoot":"","sources":["../src/proxy.ts"],"names":[],"mappings":";;AAkBA,0CAYC;AAED,8CAgCC;AAED,8CAqBC;AAED,4CAoBC;AAED,4CAUC;AAzHD,mCAAmC;AASnC,MAAM,cAAc,GAAG;IACrB,aAAa;IACb,aAAa;IACb,YAAY;IACZ,YAAY;CACb,CAAA;AAED,MAAM,iBAAiB,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAA;AAElD,SAAgB,eAAe,CAAC,GAAW;IACzC,kBAAkB;IAClB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC7B,OAAO;YACL,QAAQ,EAAE,GAAG;YACb,OAAO,EAAE,EAAE;YACX,QAAQ,EAAE,KAAK;SAChB,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,sBAAsB,GAAG,EAAE,CAAC,CAAA;IAC9C,CAAC;AACH,CAAC;AAED,SAAgB,iBAAiB,CAAC,SAAiB;IACjD,2DAA2D;IAC3D,MAAM,OAAO,GAAG,SAAS,CAAC,UAAU,CAAC,UAAU,CAAC,CAAA;IAEhD,IAAI,QAAQ,GAAkB,IAAI,CAAA;IAElC,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,IAAI,CAAA;IACvE,CAAC;IAED,oDAAoD;IACpD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,QAAQ;YACN,OAAO,CAAC,GAAG,CAAC,UAAU;gBACtB,OAAO,CAAC,GAAG,CAAC,UAAU;gBACtB,OAAO,CAAC,GAAG,CAAC,WAAW;gBACvB,OAAO,CAAC,GAAG,CAAC,WAAW;gBACvB,IAAI,CAAA;IACR,CAAC;IAED,iBAAiB;IACjB,MAAM,UAAU,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;IACnF,MAAM,OAAO,GAAG,UAAU;SACvB,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,OAAO,CAAC,CAAA;IAElB,OAAO;QACL,QAAQ;QACR,OAAO;QACP,QAAQ,EAAE,KAAK;KAChB,CAAA;AACH,CAAC;AAED,SAAgB,iBAAiB,CAAC,SAAiB,EAAE,WAAqB;IACxE,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IAE1C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAA;QACjC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAA;QAEhC,KAAK,MAAM,OAAO,IAAI,WAAW,EAAE,CAAC;YAClC,cAAc;YACd,IAAI,QAAQ,KAAK,OAAO;gBAAE,OAAO,IAAI,CAAA;YACrC,uCAAuC;YACvC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAAE,OAAO,IAAI,CAAA;YACtE,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAAE,OAAO,IAAI,CAAA;YAChF,sBAAsB;YACtB,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC;gBAAE,OAAO,IAAI,CAAA;QACnD,CAAC;QAED,OAAO,KAAK,CAAA;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,SAAgB,gBAAgB,CAAC,MAAmB,EAAE,SAAiB;IACrE,IAAI,CAAC,MAAM,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAA;IAEtC,IAAI,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QACjD,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,OAAO,IAAI,mBAAU,CAAC;QACpB,GAAG,EAAE,MAAM,CAAC,QAAQ;QACpB,UAAU,EAAE,MAAM,CAAC,QAAQ;YACzB,CAAC,CAAC;gBACE,kBAAkB,EAAE,KAAK;aAC1B;YACH,CAAC,CAAC,SAAS;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACvB,CAAC,CAAC;gBACE,kBAAkB,EAAE,KAAK;aAC1B;YACH,CAAC,CAAC,SAAS;KACd,CAAC,CAAA;AACJ,CAAC;AAED,SAAgB,gBAAgB,CAC9B,SAAsB,EACtB,WAAoB,EACpB,QAAkB;IAElB,OAAO;QACL,QAAQ,EAAE,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ;QAChF,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,QAAQ,EAAE,QAAQ,IAAI,SAAS,CAAC,QAAQ;KACzC,CAAA;AACH,CAAC"}
@@ -0,0 +1,23 @@
1
+ export type OutputFormat = 'markdown' | 'text' | 'html';
2
+ export interface WebfetchOptions {
3
+ url: string;
4
+ format: OutputFormat;
5
+ timeout: number;
6
+ proxy?: string;
7
+ insecure?: boolean;
8
+ }
9
+ export interface WebfetchResult {
10
+ content: string;
11
+ contentType: string;
12
+ url: string;
13
+ }
14
+ export declare const DEFAULT_TIMEOUT = 30;
15
+ export declare const MAX_TIMEOUT = 120;
16
+ export declare const MAX_RESPONSE_SIZE: number;
17
+ export declare const EXIT_CODES: {
18
+ readonly SUCCESS: 0;
19
+ readonly USER_ERROR: 1;
20
+ readonly NETWORK_ERROR: 2;
21
+ readonly SERVER_ERROR: 3;
22
+ };
23
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,CAAA;AAEvD,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,YAAY,CAAA;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAA;IACf,WAAW,EAAE,MAAM,CAAA;IACnB,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,eAAO,MAAM,eAAe,KAAK,CAAA;AACjC,eAAO,MAAM,WAAW,MAAM,CAAA;AAC9B,eAAO,MAAM,iBAAiB,QAAkB,CAAA;AAEhD,eAAO,MAAM,UAAU;;;;;CAKb,CAAA"}
package/dist/types.js ADDED
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.EXIT_CODES = exports.MAX_RESPONSE_SIZE = exports.MAX_TIMEOUT = exports.DEFAULT_TIMEOUT = void 0;
4
+ exports.DEFAULT_TIMEOUT = 30;
5
+ exports.MAX_TIMEOUT = 120;
6
+ exports.MAX_RESPONSE_SIZE = 5 * 1024 * 1024; // 5MB
7
+ exports.EXIT_CODES = {
8
+ SUCCESS: 0,
9
+ USER_ERROR: 1,
10
+ NETWORK_ERROR: 2,
11
+ SERVER_ERROR: 3,
12
+ };
13
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":";;;AAgBa,QAAA,eAAe,GAAG,EAAE,CAAA;AACpB,QAAA,WAAW,GAAG,GAAG,CAAA;AACjB,QAAA,iBAAiB,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAA,CAAC,MAAM;AAE1C,QAAA,UAAU,GAAG;IACxB,OAAO,EAAE,CAAC;IACV,UAAU,EAAE,CAAC;IACb,aAAa,EAAE,CAAC;IAChB,YAAY,EAAE,CAAC;CACP,CAAA"}
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "@lyhue1991/webfetch-cli",
3
+ "version": "0.1.0",
4
+ "description": "Fetch and convert web content to markdown, text, or html from the command line",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "bin": {
8
+ "webfetch": "dist/index.js"
9
+ },
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "dev": "tsx src/index.ts",
13
+ "start": "node dist/index.js",
14
+ "test": "vitest",
15
+ "test:run": "vitest run",
16
+ "typecheck": "tsc --noEmit"
17
+ },
18
+ "keywords": [
19
+ "webfetch",
20
+ "cli",
21
+ "fetch",
22
+ "markdown",
23
+ "html",
24
+ "web",
25
+ "scraper",
26
+ "converter"
27
+ ],
28
+ "author": "",
29
+ "license": "MIT",
30
+ "publishConfig": {
31
+ "access": "public"
32
+ },
33
+ "engines": {
34
+ "node": ">=18.0.0"
35
+ },
36
+ "files": [
37
+ "dist"
38
+ ],
39
+ "repository": {
40
+ "type": "git",
41
+ "url": ""
42
+ },
43
+ "bugs": {
44
+ "url": ""
45
+ },
46
+ "homepage": "",
47
+ "devDependencies": {
48
+ "@types/node": "^25.5.0",
49
+ "@types/turndown": "^5.0.6",
50
+ "tsx": "^4.21.0",
51
+ "typescript": "^5.9.3",
52
+ "vitest": "^3.2.4"
53
+ },
54
+ "dependencies": {
55
+ "cheerio": "^1.2.0",
56
+ "commander": "^14.0.3",
57
+ "turndown": "^7.2.2",
58
+ "undici": "^7.24.5",
59
+ "zod": "^4.3.6"
60
+ }
61
+ }