wespy-ts 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -0
- package/dist/cli/main.d.ts +7 -0
- package/dist/cli/main.d.ts.map +1 -0
- package/dist/cli/main.js +312 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/converter/html-to-markdown.d.ts +9 -0
- package/dist/converter/html-to-markdown.d.ts.map +1 -0
- package/dist/converter/html-to-markdown.js +171 -0
- package/dist/converter/html-to-markdown.js.map +1 -0
- package/dist/converter/sanitize-html.d.ts +12 -0
- package/dist/converter/sanitize-html.d.ts.map +1 -0
- package/dist/converter/sanitize-html.js +22 -0
- package/dist/converter/sanitize-html.js.map +1 -0
- package/dist/core/errors.d.ts +17 -0
- package/dist/core/errors.d.ts.map +1 -0
- package/dist/core/errors.js +36 -0
- package/dist/core/errors.js.map +1 -0
- package/dist/core/result.d.ts +26 -0
- package/dist/core/result.d.ts.map +1 -0
- package/dist/core/result.js +26 -0
- package/dist/core/result.js.map +1 -0
- package/dist/core/types.d.ts +156 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +29 -0
- package/dist/core/types.js.map +1 -0
- package/dist/fetcher/http-client.d.ts +31 -0
- package/dist/fetcher/http-client.d.ts.map +1 -0
- package/dist/fetcher/http-client.js +124 -0
- package/dist/fetcher/http-client.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/platforms/detector.d.ts +15 -0
- package/dist/platforms/detector.d.ts.map +1 -0
- package/dist/platforms/detector.js +30 -0
- package/dist/platforms/detector.js.map +1 -0
- package/dist/platforms/generic/generic-article.extractor.d.ts +25 -0
- package/dist/platforms/generic/generic-article.extractor.d.ts.map +1 -0
- package/dist/platforms/generic/generic-article.extractor.js +171 -0
- package/dist/platforms/generic/generic-article.extractor.js.map +1 -0
- package/dist/platforms/juejin/juejin-article.extractor.d.ts +20 -0
- package/dist/platforms/juejin/juejin-article.extractor.d.ts.map +1 -0
- package/dist/platforms/juejin/juejin-article.extractor.js +167 -0
- package/dist/platforms/juejin/juejin-article.extractor.js.map +1 -0
- package/dist/platforms/juejin/juejin.types.d.ts +13 -0
- package/dist/platforms/juejin/juejin.types.d.ts.map +1 -0
- package/dist/platforms/juejin/juejin.types.js +5 -0
- package/dist/platforms/juejin/juejin.types.js.map +1 -0
- package/dist/platforms/wechat/wechat-album.extractor.d.ts +25 -0
- package/dist/platforms/wechat/wechat-album.extractor.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat-album.extractor.js +190 -0
- package/dist/platforms/wechat/wechat-album.extractor.js.map +1 -0
- package/dist/platforms/wechat/wechat-article.extractor.d.ts +20 -0
- package/dist/platforms/wechat/wechat-article.extractor.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat-article.extractor.js +132 -0
- package/dist/platforms/wechat/wechat-article.extractor.js.map +1 -0
- package/dist/platforms/wechat/wechat.types.d.ts +17 -0
- package/dist/platforms/wechat/wechat.types.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat.types.js +5 -0
- package/dist/platforms/wechat/wechat.types.js.map +1 -0
- package/dist/sdk/fetch-album-list.d.ts +10 -0
- package/dist/sdk/fetch-album-list.d.ts.map +1 -0
- package/dist/sdk/fetch-album-list.js +31 -0
- package/dist/sdk/fetch-album-list.js.map +1 -0
- package/dist/sdk/fetch-album.d.ts +24 -0
- package/dist/sdk/fetch-album.d.ts.map +1 -0
- package/dist/sdk/fetch-album.js +67 -0
- package/dist/sdk/fetch-album.js.map +1 -0
- package/dist/sdk/fetch-article.d.ts +24 -0
- package/dist/sdk/fetch-article.d.ts.map +1 -0
- package/dist/sdk/fetch-article.js +111 -0
- package/dist/sdk/fetch-article.js.map +1 -0
- package/dist/utils/fs.d.ts +16 -0
- package/dist/utils/fs.d.ts.map +1 -0
- package/dist/utils/fs.js +26 -0
- package/dist/utils/fs.js.map +1 -0
- package/dist/utils/text.d.ts +20 -0
- package/dist/utils/text.d.ts.map +1 -0
- package/dist/utils/text.js +96 -0
- package/dist/utils/text.js.map +1 -0
- package/dist/utils/url.d.ts +22 -0
- package/dist/utils/url.d.ts.map +1 -0
- package/dist/utils/url.js +63 -0
- package/dist/utils/url.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fetchArticle SDK API
|
|
3
|
+
*/
|
|
4
|
+
import { FetchArticleInputSchema } from '../core/types.js';
|
|
5
|
+
import { HttpClient } from '../fetcher/http-client.js';
|
|
6
|
+
import { detectPlatform } from '../utils/url.js';
|
|
7
|
+
import { fetchWechatArticle } from '../platforms/wechat/wechat-article.extractor.js';
|
|
8
|
+
import { fetchJuejinArticle } from '../platforms/juejin/juejin-article.extractor.js';
|
|
9
|
+
import { fetchGenericArticle } from '../platforms/generic/generic-article.extractor.js';
|
|
10
|
+
import { unsupportedUrlError, invalidInputError } from '../core/errors.js';
|
|
11
|
+
import { fetchAlbum } from './fetch-album.js';
|
|
12
|
+
/**
|
|
13
|
+
* 获取单篇文章
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { fetchArticle } from 'wespy'
|
|
18
|
+
*
|
|
19
|
+
* const result = await fetchArticle({
|
|
20
|
+
* url: 'https://mp.weixin.qq.com/s/xxxxx',
|
|
21
|
+
* format: ['markdown', 'json'],
|
|
22
|
+
* })
|
|
23
|
+
*
|
|
24
|
+
* if (result.ok) {
|
|
25
|
+
* console.log(result.article.title)
|
|
26
|
+
* console.log(result.article.markdown)
|
|
27
|
+
* }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export async function fetchArticle(input) {
|
|
31
|
+
// 输入校验
|
|
32
|
+
const parsed = FetchArticleInputSchema.safeParse(input);
|
|
33
|
+
if (!parsed.success) {
|
|
34
|
+
return failure(invalidInputError(parsed.error.issues.map((i) => i.message).join('; ')));
|
|
35
|
+
}
|
|
36
|
+
const { url, outputDir, format, timeoutMs, userAgent, headers, downloadImages } = parsed.data;
|
|
37
|
+
if (downloadImages) {
|
|
38
|
+
return failure(invalidInputError('downloadImages 尚未实现;Python 原版仅在 Markdown 中转换图片代理 URL'));
|
|
39
|
+
}
|
|
40
|
+
const httpClient = new HttpClient({ timeoutMs, userAgent, headers });
|
|
41
|
+
const platform = detectPlatform(url);
|
|
42
|
+
if (!platform) {
|
|
43
|
+
return failure(unsupportedUrlError(url));
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
46
|
+
let result;
|
|
47
|
+
switch (platform) {
|
|
48
|
+
case 'wechat': {
|
|
49
|
+
const res = await fetchWechatArticle(url, httpClient, outputDir, format);
|
|
50
|
+
if (!res.ok)
|
|
51
|
+
return failure(res.error);
|
|
52
|
+
result = res.value;
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
case 'juejin': {
|
|
56
|
+
const res = await fetchJuejinArticle(url, httpClient, outputDir, format);
|
|
57
|
+
if (!res.ok)
|
|
58
|
+
return failure(res.error);
|
|
59
|
+
result = res.value;
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
case 'generic': {
|
|
63
|
+
const res = await fetchGenericArticle(url, httpClient, outputDir, format);
|
|
64
|
+
if (!res.ok)
|
|
65
|
+
return failure(res.error);
|
|
66
|
+
result = res.value;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
case 'wechat-album': {
|
|
70
|
+
const albumResult = await fetchAlbum({ url, outputDir, format, timeoutMs, maxArticles: 10 });
|
|
71
|
+
if (!albumResult.ok)
|
|
72
|
+
return failure(albumResult.error);
|
|
73
|
+
return albumSuccess(albumResult.articles, albumResult.artifacts, albumResult.warnings, albumResult.summaryFile, albumResult.failedCount);
|
|
74
|
+
}
|
|
75
|
+
default:
|
|
76
|
+
return failure(unsupportedUrlError(url));
|
|
77
|
+
}
|
|
78
|
+
return success(result.article, result.artifacts);
|
|
79
|
+
}
|
|
80
|
+
catch (e) {
|
|
81
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
82
|
+
return failure({
|
|
83
|
+
code: 'UNKNOWN_ERROR',
|
|
84
|
+
message,
|
|
85
|
+
retryable: false,
|
|
86
|
+
details: { url },
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
function albumSuccess(articles, artifacts, warnings, summaryFile, failedCount) {
|
|
91
|
+
return {
|
|
92
|
+
ok: true,
|
|
93
|
+
articles,
|
|
94
|
+
artifacts,
|
|
95
|
+
warnings,
|
|
96
|
+
summaryFile,
|
|
97
|
+
failedCount,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
function success(article, artifacts) {
|
|
101
|
+
return {
|
|
102
|
+
ok: true,
|
|
103
|
+
article,
|
|
104
|
+
artifacts,
|
|
105
|
+
warnings: article.warnings,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function failure(error) {
|
|
109
|
+
return { ok: false, error };
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=fetch-article.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-article.js","sourceRoot":"","sources":["../../src/sdk/fetch-article.ts"],"names":[],"mappings":"AAAA;;GAEG;AAUH,OAAO,EAAE,uBAAuB,EAAE,MAAM,kBAAkB,CAAA;AAE1D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAA;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,iDAAiD,CAAA;AACpF,OAAO,EAAE,kBAAkB,EAAE,MAAM,iDAAiD,CAAA;AACpF,OAAO,EAAE,mBAAmB,EAAE,MAAM,mDAAmD,CAAA;AACvF,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAwB;IACzD,OAAO;IACP,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACvD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,OAAO,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzF,CAAC;IAED,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,IAAI,CAAA;IAE7F,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC,iBAAiB,CAAC,sDAAsD,CAAC,CAAC,CAAA;IAC3F,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAA;IACpE,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,OAAO,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAA;IAC1C,CAAC;IAED,IAAI,CAAC;QACH,IAAI,MAAyF,CAAA;QAE7F,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACxE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACxE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,GAAG,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACzE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC,CAAA;gBAC5F,IAAI,CAAC,WAAW,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAA;gBACtD,OAAO,YAAY,CACjB,WAAW,CAAC,QAAQ,EACpB,WAAW,CAAC,SAAS,EACrB,WAAW,CAAC,QAAQ,EACpB,WAAW,CAAC,WAAW,EACvB,WAAW,CAAC,WAAW,CACxB,CAAA;YACH,CAAC;YACD;gBACE,OAAO,OAAO,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAA;QAC5C,CAAC;QAED,OAAO,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,CAAA;IAClD,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,OAAO,CAAC;YACb,IAAI,EAAE,eAAe;YACrB,OAAO;YACP,SAAS,EAAE,KAAK;YAChB,OAAO,EAAE,EAAE,GAAG,EAAE;SACjB,CAAC,CAAA;IACJ,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CACnB,QAAmD,EACnD,SAA2B,EAC3B,QAAkB,EAClB,WAAoB,EACpB,WAAoB;IAEpB,OAAO;QACL,EAAE,EAAE,IAAI;QACR,QAAQ;QACR,SAAS;QACT,QAAQ;QACR,WAAW;QACX,WAAW;KACZ,CAAA;AACH,CAAC;AAED,SAAS,OAAO,CACd,OAAgD,EAChD,SAA2B;IAE3B,OAAO;QACL,EAAE,EAAE,IAAI;QACR,OAAO;QACP,SAAS;QACT,QAAQ,EAAE,OAAO,CAAC,QAAQ;KAC3B,CAAA;AACH,CAAC;AAED,SAAS,OAAO,CAAC,KAAiB;IAChC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAA;AAC7B,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文件系统工具函数
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* 确保目录存在,不存在则递归创建
|
|
6
|
+
*/
|
|
7
|
+
export declare function ensureDir(dirPath: string): Promise<void>;
|
|
8
|
+
/**
|
|
9
|
+
* 写入文件,自动创建父目录
|
|
10
|
+
*/
|
|
11
|
+
export declare function writeFileSafe(filePath: string, content: string): Promise<void>;
|
|
12
|
+
/**
|
|
13
|
+
* 写入 JSON 文件,自动创建父目录
|
|
14
|
+
*/
|
|
15
|
+
export declare function writeJsonSafe(filePath: string, data: unknown, pretty?: boolean): Promise<void>;
|
|
16
|
+
//# sourceMappingURL=fs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs.d.ts","sourceRoot":"","sources":["../../src/utils/fs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH;;GAEG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAE9D;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGpF;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,OAAO,EACb,MAAM,UAAO,GACZ,OAAO,CAAC,IAAI,CAAC,CAGf"}
|
package/dist/utils/fs.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文件系统工具函数
|
|
3
|
+
*/
|
|
4
|
+
import { mkdir, writeFile } from 'node:fs/promises';
|
|
5
|
+
import { dirname } from 'node:path';
|
|
6
|
+
/**
|
|
7
|
+
* 确保目录存在,不存在则递归创建
|
|
8
|
+
*/
|
|
9
|
+
export async function ensureDir(dirPath) {
|
|
10
|
+
await mkdir(dirPath, { recursive: true });
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* 写入文件,自动创建父目录
|
|
14
|
+
*/
|
|
15
|
+
export async function writeFileSafe(filePath, content) {
|
|
16
|
+
await ensureDir(dirname(filePath));
|
|
17
|
+
await writeFile(filePath, content, 'utf-8');
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* 写入 JSON 文件,自动创建父目录
|
|
21
|
+
*/
|
|
22
|
+
export async function writeJsonSafe(filePath, data, pretty = true) {
|
|
23
|
+
const content = JSON.stringify(data, null, pretty ? 2 : undefined);
|
|
24
|
+
await writeFileSafe(filePath, content);
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=fs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs.js","sourceRoot":"","sources":["../../src/utils/fs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAEnC;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,OAAe;IAC7C,MAAM,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,QAAgB,EAAE,OAAe;IACnE,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAA;IAClC,MAAM,SAAS,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,CAAA;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAgB,EAChB,IAAa,EACb,MAAM,GAAG,IAAI;IAEb,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAClE,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;AACxC,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文本工具函数
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* 清理文件名中的非法字符
|
|
6
|
+
*/
|
|
7
|
+
export declare function sanitizeFilename(title: string, maxLen?: number): string;
|
|
8
|
+
/**
|
|
9
|
+
* 从 CSS class 列表中检测代码语言
|
|
10
|
+
*/
|
|
11
|
+
export declare function detectCodeLanguage(classes: string[]): string | null;
|
|
12
|
+
/**
|
|
13
|
+
* 清理代码块内容:去除前导空行,保留缩进(与 Python 版一致,保留末尾空行)
|
|
14
|
+
*/
|
|
15
|
+
export declare function cleanCodeContent(code: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* 格式化本地时间: YYYY-MM-DD HH:MM:SS(与 Python time.strftime 一致)
|
|
18
|
+
*/
|
|
19
|
+
export declare function formatLocalTime(date?: Date): string;
|
|
20
|
+
//# sourceMappingURL=text.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,SAAK,GAAG,MAAM,CAMnE;AA6CD;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAgBnE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASrD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,OAAa,GAAG,MAAM,CAGzD"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 文本工具函数
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* 清理文件名中的非法字符
|
|
6
|
+
*/
|
|
7
|
+
export function sanitizeFilename(title, maxLen = 50) {
|
|
8
|
+
return title
|
|
9
|
+
.replace(/[<>:"/\\|?*]/g, '_')
|
|
10
|
+
.replace(/\s+/g, ' ')
|
|
11
|
+
.trim()
|
|
12
|
+
.slice(0, maxLen);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* 语言 class 到语言名的映射表
|
|
16
|
+
*/
|
|
17
|
+
const LANGUAGE_MAPPING = {
|
|
18
|
+
'language-python': 'python',
|
|
19
|
+
'language-javascript': 'javascript',
|
|
20
|
+
'language-js': 'javascript',
|
|
21
|
+
'language-typescript': 'typescript',
|
|
22
|
+
'language-ts': 'typescript',
|
|
23
|
+
'language-java': 'java',
|
|
24
|
+
'language-cpp': 'cpp',
|
|
25
|
+
'language-c++': 'cpp',
|
|
26
|
+
'language-c': 'c',
|
|
27
|
+
'language-csharp': 'csharp',
|
|
28
|
+
'language-c#': 'csharp',
|
|
29
|
+
'language-go': 'go',
|
|
30
|
+
'language-rust': 'rust',
|
|
31
|
+
'language-php': 'php',
|
|
32
|
+
'language-ruby': 'ruby',
|
|
33
|
+
'language-python3': 'python',
|
|
34
|
+
'language-py': 'python',
|
|
35
|
+
'language-html': 'html',
|
|
36
|
+
'language-css': 'css',
|
|
37
|
+
'language-scss': 'scss',
|
|
38
|
+
'language-sass': 'sass',
|
|
39
|
+
'language-json': 'json',
|
|
40
|
+
'language-xml': 'xml',
|
|
41
|
+
'language-yaml': 'yaml',
|
|
42
|
+
'language-yml': 'yaml',
|
|
43
|
+
'language-sql': 'sql',
|
|
44
|
+
'language-bash': 'bash',
|
|
45
|
+
'language-shell': 'bash',
|
|
46
|
+
'language-sh': 'bash',
|
|
47
|
+
'language-markdown': 'markdown',
|
|
48
|
+
'language-md': 'markdown',
|
|
49
|
+
'language-dockerfile': 'dockerfile',
|
|
50
|
+
'language-docker': 'dockerfile',
|
|
51
|
+
'language-git': 'git',
|
|
52
|
+
'language-diff': 'diff',
|
|
53
|
+
'language-text': 'text',
|
|
54
|
+
'language-plain': 'text',
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* 从 CSS class 列表中检测代码语言
|
|
58
|
+
*/
|
|
59
|
+
export function detectCodeLanguage(classes) {
|
|
60
|
+
// 精确匹配
|
|
61
|
+
for (const cls of classes) {
|
|
62
|
+
if (cls in LANGUAGE_MAPPING) {
|
|
63
|
+
return LANGUAGE_MAPPING[cls];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// 部分匹配
|
|
67
|
+
for (const cls of classes) {
|
|
68
|
+
for (const [key, lang] of Object.entries(LANGUAGE_MAPPING)) {
|
|
69
|
+
if (cls.includes(key)) {
|
|
70
|
+
return lang;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* 清理代码块内容:去除前导空行,保留缩进(与 Python 版一致,保留末尾空行)
|
|
78
|
+
*/
|
|
79
|
+
export function cleanCodeContent(code) {
|
|
80
|
+
const lines = code.split('\n');
|
|
81
|
+
const cleaned = [];
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
if (line.trim() || cleaned.length > 0) {
|
|
84
|
+
cleaned.push(line);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return cleaned.join('\n');
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* 格式化本地时间: YYYY-MM-DD HH:MM:SS(与 Python time.strftime 一致)
|
|
91
|
+
*/
|
|
92
|
+
export function formatLocalTime(date = new Date()) {
|
|
93
|
+
const pad = (n) => String(n).padStart(2, '0');
|
|
94
|
+
return `${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())} ${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}`;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=text.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAa,EAAE,MAAM,GAAG,EAAE;IACzD,OAAO,KAAK;SACT,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC;SAC7B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE;SACN,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAA;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,gBAAgB,GAA2B;IAC/C,iBAAiB,EAAE,QAAQ;IAC3B,qBAAqB,EAAE,YAAY;IACnC,aAAa,EAAE,YAAY;IAC3B,qBAAqB,EAAE,YAAY;IACnC,aAAa,EAAE,YAAY;IAC3B,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,cAAc,EAAE,KAAK;IACrB,YAAY,EAAE,GAAG;IACjB,iBAAiB,EAAE,QAAQ;IAC3B,aAAa,EAAE,QAAQ;IACvB,aAAa,EAAE,IAAI;IACnB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,kBAAkB,EAAE,QAAQ;IAC5B,aAAa,EAAE,QAAQ;IACvB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,MAAM;IACtB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,gBAAgB,EAAE,MAAM;IACxB,aAAa,EAAE,MAAM;IACrB,mBAAmB,EAAE,UAAU;IAC/B,aAAa,EAAE,UAAU;IACzB,qBAAqB,EAAE,YAAY;IACnC,iBAAiB,EAAE,YAAY;IAC/B,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,gBAAgB,EAAE,MAAM;CACzB,CAAA;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAiB;IAClD,OAAO;IACP,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,GAAG,IAAI,gBAAgB,EAAE,CAAC;YAC5B,OAAO,gBAAgB,CAAC,GAAG,CAAC,CAAA;QAC9B,CAAC;IACH,CAAC;IACD,OAAO;IACP,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAC3D,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAA;YACb,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC9B,MAAM,OAAO,GAAa,EAAE,CAAA;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAC3B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAI,GAAG,IAAI,IAAI,EAAE;IAC/C,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACrD,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,CAAA;AAC/J,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL 工具函数
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* 从 URL 中提取域名(不含 www. 前缀)
|
|
6
|
+
* URL 无效时返回 null
|
|
7
|
+
*/
|
|
8
|
+
export declare function extractDomain(url: string): string | null;
|
|
9
|
+
/**
|
|
10
|
+
* 检测 URL 对应的平台
|
|
11
|
+
* URL 无效时返回 null(不静默路由到 generic)
|
|
12
|
+
*/
|
|
13
|
+
export declare function detectPlatform(url: string): 'wechat' | 'wechat-album' | 'juejin' | 'generic' | null;
|
|
14
|
+
/**
|
|
15
|
+
* 移除 URL 末尾的 #rd 后缀(微信文章常见)
|
|
16
|
+
*/
|
|
17
|
+
export declare function stripHashSuffix(url: string): string;
|
|
18
|
+
/**
|
|
19
|
+
* 构建代理图片 URL(使用 images.weserv.nl 绕过防盗链)
|
|
20
|
+
*/
|
|
21
|
+
export declare function buildProxyImageUrl(originalUrl: string): string;
|
|
22
|
+
//# sourceMappingURL=url.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;GAGG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAQxD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,IAAI,CAoBnG;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAiB9D"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL 工具函数
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* 从 URL 中提取域名(不含 www. 前缀)
|
|
6
|
+
* URL 无效时返回 null
|
|
7
|
+
*/
|
|
8
|
+
export function extractDomain(url) {
|
|
9
|
+
try {
|
|
10
|
+
const parsed = new URL(url);
|
|
11
|
+
const host = parsed.hostname.toLowerCase();
|
|
12
|
+
return host.startsWith('www.') ? host.slice(4) : host;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* 检测 URL 对应的平台
|
|
20
|
+
* URL 无效时返回 null(不静默路由到 generic)
|
|
21
|
+
*/
|
|
22
|
+
export function detectPlatform(url) {
|
|
23
|
+
try {
|
|
24
|
+
const parsed = new URL(url);
|
|
25
|
+
const host = parsed.hostname.toLowerCase();
|
|
26
|
+
if (host.includes('mp.weixin.qq.com')) {
|
|
27
|
+
if (parsed.pathname.includes('/mp/appmsgalbum')) {
|
|
28
|
+
return 'wechat-album';
|
|
29
|
+
}
|
|
30
|
+
return 'wechat';
|
|
31
|
+
}
|
|
32
|
+
if (host.includes('juejin.cn')) {
|
|
33
|
+
return 'juejin';
|
|
34
|
+
}
|
|
35
|
+
return 'generic';
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* 移除 URL 末尾的 #rd 后缀(微信文章常见)
|
|
43
|
+
*/
|
|
44
|
+
export function stripHashSuffix(url) {
|
|
45
|
+
return url.replace(/#rd$/, '');
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* 构建代理图片 URL(使用 images.weserv.nl 绕过防盗链)
|
|
49
|
+
*/
|
|
50
|
+
export function buildProxyImageUrl(originalUrl) {
|
|
51
|
+
if (!originalUrl || !originalUrl.startsWith('http')) {
|
|
52
|
+
return originalUrl;
|
|
53
|
+
}
|
|
54
|
+
const encodedUrl = encodeURIComponent(originalUrl);
|
|
55
|
+
let proxyUrl = `https://images.weserv.nl/?url=${encodedUrl}`;
|
|
56
|
+
// GIF 图片添加特殊参数
|
|
57
|
+
if (originalUrl.toLowerCase().includes('gif') ||
|
|
58
|
+
originalUrl.toLowerCase().includes('wx_fmt=gif')) {
|
|
59
|
+
proxyUrl += '&n=-1';
|
|
60
|
+
}
|
|
61
|
+
return proxyUrl;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=url.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;QAC1C,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACvD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;QAE1C,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACtC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBAChD,OAAO,cAAc,CAAA;YACvB,CAAC;YACD,OAAO,QAAQ,CAAA;QACjB,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,OAAO,QAAQ,CAAA;QACjB,CAAC;QAED,OAAO,SAAS,CAAA;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,OAAO,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,WAAmB;IACpD,IAAI,CAAC,WAAW,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,OAAO,WAAW,CAAA;IACpB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAA;IAClD,IAAI,QAAQ,GAAG,iCAAiC,UAAU,EAAE,CAAA;IAE5D,eAAe;IACf,IACE,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;QACzC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAChD,CAAC;QACD,QAAQ,IAAI,OAAO,CAAA;IACrB,CAAC;IAED,OAAO,QAAQ,CAAA;AACjB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "wespy-ts",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "WeSpy - 文章抓取与 Markdown 转换工具 (TypeScript 版)",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"dist"
|
|
16
|
+
],
|
|
17
|
+
"bin": {
|
|
18
|
+
"wespy": "dist/cli/main.js"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsc",
|
|
22
|
+
"dev": "tsc --watch",
|
|
23
|
+
"test": "vitest run",
|
|
24
|
+
"test:watch": "vitest",
|
|
25
|
+
"lint": "tsc --noEmit",
|
|
26
|
+
"clean": "rm -rf dist",
|
|
27
|
+
"prepublishOnly": "npm run build"
|
|
28
|
+
},
|
|
29
|
+
"keywords": [
|
|
30
|
+
"wechat",
|
|
31
|
+
"article",
|
|
32
|
+
"scraper",
|
|
33
|
+
"markdown",
|
|
34
|
+
"juejin",
|
|
35
|
+
"fetch",
|
|
36
|
+
"crawl"
|
|
37
|
+
],
|
|
38
|
+
"license": "MIT",
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "git+https://github.com/Cuimc/WeSpy-TS.git"
|
|
42
|
+
},
|
|
43
|
+
"bugs": {
|
|
44
|
+
"url": "https://github.com/Cuimc/WeSpy-TS/issues"
|
|
45
|
+
},
|
|
46
|
+
"homepage": "https://github.com/Cuimc/WeSpy-TS#readme",
|
|
47
|
+
"sideEffects": false,
|
|
48
|
+
"engines": {
|
|
49
|
+
"node": ">=18"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"cheerio": "^1.0.0",
|
|
53
|
+
"commander": "^12.0.0",
|
|
54
|
+
"turndown": "^7.1.0",
|
|
55
|
+
"undici": "^6.0.0",
|
|
56
|
+
"zod": "^3.22.0"
|
|
57
|
+
},
|
|
58
|
+
"devDependencies": {
|
|
59
|
+
"@types/node": "^20.0.0",
|
|
60
|
+
"@types/turndown": "^5.0.1",
|
|
61
|
+
"typescript": "^5.4.0",
|
|
62
|
+
"vitest": "^1.0.0"
|
|
63
|
+
}
|
|
64
|
+
}
|