wespy-ts 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +146 -0
  2. package/dist/cli/main.d.ts +7 -0
  3. package/dist/cli/main.d.ts.map +1 -0
  4. package/dist/cli/main.js +312 -0
  5. package/dist/cli/main.js.map +1 -0
  6. package/dist/converter/html-to-markdown.d.ts +9 -0
  7. package/dist/converter/html-to-markdown.d.ts.map +1 -0
  8. package/dist/converter/html-to-markdown.js +171 -0
  9. package/dist/converter/html-to-markdown.js.map +1 -0
  10. package/dist/converter/sanitize-html.d.ts +12 -0
  11. package/dist/converter/sanitize-html.d.ts.map +1 -0
  12. package/dist/converter/sanitize-html.js +22 -0
  13. package/dist/converter/sanitize-html.js.map +1 -0
  14. package/dist/core/errors.d.ts +17 -0
  15. package/dist/core/errors.d.ts.map +1 -0
  16. package/dist/core/errors.js +36 -0
  17. package/dist/core/errors.js.map +1 -0
  18. package/dist/core/result.d.ts +26 -0
  19. package/dist/core/result.d.ts.map +1 -0
  20. package/dist/core/result.js +26 -0
  21. package/dist/core/result.js.map +1 -0
  22. package/dist/core/types.d.ts +156 -0
  23. package/dist/core/types.d.ts.map +1 -0
  24. package/dist/core/types.js +29 -0
  25. package/dist/core/types.js.map +1 -0
  26. package/dist/fetcher/http-client.d.ts +31 -0
  27. package/dist/fetcher/http-client.d.ts.map +1 -0
  28. package/dist/fetcher/http-client.js +124 -0
  29. package/dist/fetcher/http-client.js.map +1 -0
  30. package/dist/index.d.ts +14 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +14 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/platforms/detector.d.ts +15 -0
  35. package/dist/platforms/detector.d.ts.map +1 -0
  36. package/dist/platforms/detector.js +30 -0
  37. package/dist/platforms/detector.js.map +1 -0
  38. package/dist/platforms/generic/generic-article.extractor.d.ts +25 -0
  39. package/dist/platforms/generic/generic-article.extractor.d.ts.map +1 -0
  40. package/dist/platforms/generic/generic-article.extractor.js +171 -0
  41. package/dist/platforms/generic/generic-article.extractor.js.map +1 -0
  42. package/dist/platforms/juejin/juejin-article.extractor.d.ts +20 -0
  43. package/dist/platforms/juejin/juejin-article.extractor.d.ts.map +1 -0
  44. package/dist/platforms/juejin/juejin-article.extractor.js +167 -0
  45. package/dist/platforms/juejin/juejin-article.extractor.js.map +1 -0
  46. package/dist/platforms/juejin/juejin.types.d.ts +13 -0
  47. package/dist/platforms/juejin/juejin.types.d.ts.map +1 -0
  48. package/dist/platforms/juejin/juejin.types.js +5 -0
  49. package/dist/platforms/juejin/juejin.types.js.map +1 -0
  50. package/dist/platforms/wechat/wechat-album.extractor.d.ts +25 -0
  51. package/dist/platforms/wechat/wechat-album.extractor.d.ts.map +1 -0
  52. package/dist/platforms/wechat/wechat-album.extractor.js +190 -0
  53. package/dist/platforms/wechat/wechat-album.extractor.js.map +1 -0
  54. package/dist/platforms/wechat/wechat-article.extractor.d.ts +20 -0
  55. package/dist/platforms/wechat/wechat-article.extractor.d.ts.map +1 -0
  56. package/dist/platforms/wechat/wechat-article.extractor.js +132 -0
  57. package/dist/platforms/wechat/wechat-article.extractor.js.map +1 -0
  58. package/dist/platforms/wechat/wechat.types.d.ts +17 -0
  59. package/dist/platforms/wechat/wechat.types.d.ts.map +1 -0
  60. package/dist/platforms/wechat/wechat.types.js +5 -0
  61. package/dist/platforms/wechat/wechat.types.js.map +1 -0
  62. package/dist/sdk/fetch-album-list.d.ts +10 -0
  63. package/dist/sdk/fetch-album-list.d.ts.map +1 -0
  64. package/dist/sdk/fetch-album-list.js +31 -0
  65. package/dist/sdk/fetch-album-list.js.map +1 -0
  66. package/dist/sdk/fetch-album.d.ts +24 -0
  67. package/dist/sdk/fetch-album.d.ts.map +1 -0
  68. package/dist/sdk/fetch-album.js +67 -0
  69. package/dist/sdk/fetch-album.js.map +1 -0
  70. package/dist/sdk/fetch-article.d.ts +24 -0
  71. package/dist/sdk/fetch-article.d.ts.map +1 -0
  72. package/dist/sdk/fetch-article.js +111 -0
  73. package/dist/sdk/fetch-article.js.map +1 -0
  74. package/dist/utils/fs.d.ts +16 -0
  75. package/dist/utils/fs.d.ts.map +1 -0
  76. package/dist/utils/fs.js +26 -0
  77. package/dist/utils/fs.js.map +1 -0
  78. package/dist/utils/text.d.ts +20 -0
  79. package/dist/utils/text.d.ts.map +1 -0
  80. package/dist/utils/text.js +96 -0
  81. package/dist/utils/text.js.map +1 -0
  82. package/dist/utils/url.d.ts +22 -0
  83. package/dist/utils/url.d.ts.map +1 -0
  84. package/dist/utils/url.js +63 -0
  85. package/dist/utils/url.js.map +1 -0
  86. package/package.json +64 -0
@@ -0,0 +1,111 @@
1
+ /**
2
+ * fetchArticle SDK API
3
+ */
4
+ import { FetchArticleInputSchema } from '../core/types.js';
5
+ import { HttpClient } from '../fetcher/http-client.js';
6
+ import { detectPlatform } from '../utils/url.js';
7
+ import { fetchWechatArticle } from '../platforms/wechat/wechat-article.extractor.js';
8
+ import { fetchJuejinArticle } from '../platforms/juejin/juejin-article.extractor.js';
9
+ import { fetchGenericArticle } from '../platforms/generic/generic-article.extractor.js';
10
+ import { unsupportedUrlError, invalidInputError } from '../core/errors.js';
11
+ import { fetchAlbum } from './fetch-album.js';
12
+ /**
13
+ * 获取单篇文章
14
+ *
15
+ * @example
16
+ * ```ts
17
+ * import { fetchArticle } from 'wespy'
18
+ *
19
+ * const result = await fetchArticle({
20
+ * url: 'https://mp.weixin.qq.com/s/xxxxx',
21
+ * format: ['markdown', 'json'],
22
+ * })
23
+ *
24
+ * if (result.ok) {
25
+ * console.log(result.article.title)
26
+ * console.log(result.article.markdown)
27
+ * }
28
+ * ```
29
+ */
30
+ export async function fetchArticle(input) {
31
+ // 输入校验
32
+ const parsed = FetchArticleInputSchema.safeParse(input);
33
+ if (!parsed.success) {
34
+ return failure(invalidInputError(parsed.error.issues.map((i) => i.message).join('; ')));
35
+ }
36
+ const { url, outputDir, format, timeoutMs, userAgent, headers, downloadImages } = parsed.data;
37
+ if (downloadImages) {
38
+ return failure(invalidInputError('downloadImages 尚未实现;Python 原版仅在 Markdown 中转换图片代理 URL'));
39
+ }
40
+ const httpClient = new HttpClient({ timeoutMs, userAgent, headers });
41
+ const platform = detectPlatform(url);
42
+ if (!platform) {
43
+ return failure(unsupportedUrlError(url));
44
+ }
45
+ try {
46
+ let result;
47
+ switch (platform) {
48
+ case 'wechat': {
49
+ const res = await fetchWechatArticle(url, httpClient, outputDir, format);
50
+ if (!res.ok)
51
+ return failure(res.error);
52
+ result = res.value;
53
+ break;
54
+ }
55
+ case 'juejin': {
56
+ const res = await fetchJuejinArticle(url, httpClient, outputDir, format);
57
+ if (!res.ok)
58
+ return failure(res.error);
59
+ result = res.value;
60
+ break;
61
+ }
62
+ case 'generic': {
63
+ const res = await fetchGenericArticle(url, httpClient, outputDir, format);
64
+ if (!res.ok)
65
+ return failure(res.error);
66
+ result = res.value;
67
+ break;
68
+ }
69
+ case 'wechat-album': {
70
+ const albumResult = await fetchAlbum({ url, outputDir, format, timeoutMs, maxArticles: 10 });
71
+ if (!albumResult.ok)
72
+ return failure(albumResult.error);
73
+ return albumSuccess(albumResult.articles, albumResult.artifacts, albumResult.warnings, albumResult.summaryFile, albumResult.failedCount);
74
+ }
75
+ default:
76
+ return failure(unsupportedUrlError(url));
77
+ }
78
+ return success(result.article, result.artifacts);
79
+ }
80
+ catch (e) {
81
+ const message = e instanceof Error ? e.message : String(e);
82
+ return failure({
83
+ code: 'UNKNOWN_ERROR',
84
+ message,
85
+ retryable: false,
86
+ details: { url },
87
+ });
88
+ }
89
+ }
90
+ function albumSuccess(articles, artifacts, warnings, summaryFile, failedCount) {
91
+ return {
92
+ ok: true,
93
+ articles,
94
+ artifacts,
95
+ warnings,
96
+ summaryFile,
97
+ failedCount,
98
+ };
99
+ }
100
+ function success(article, artifacts) {
101
+ return {
102
+ ok: true,
103
+ article,
104
+ artifacts,
105
+ warnings: article.warnings,
106
+ };
107
+ }
108
+ function failure(error) {
109
+ return { ok: false, error };
110
+ }
111
+ //# sourceMappingURL=fetch-article.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-article.js","sourceRoot":"","sources":["../../src/sdk/fetch-article.ts"],"names":[],"mappings":"AAAA;;GAEG;AAUH,OAAO,EAAE,uBAAuB,EAAE,MAAM,kBAAkB,CAAA;AAE1D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAA;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,iDAAiD,CAAA;AACpF,OAAO,EAAE,kBAAkB,EAAE,MAAM,iDAAiD,CAAA;AACpF,OAAO,EAAE,mBAAmB,EAAE,MAAM,mDAAmD,CAAA;AACvF,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAC1E,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAE7C;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAwB;IACzD,OAAO;IACP,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACvD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,OAAO,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzF,CAAC;IAED,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,IAAI,CAAA;IAE7F,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC,iBAAiB,CAAC,sDAAsD,CAAC,CAAC,CAAA;IAC3F,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,CAAA;IACpE,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,OAAO,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAA;IAC1C,CAAC;IAED,IAAI,CAAC;QACH,IAAI,MAAyF,CAAA;QAE7F,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACxE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACxE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,MAAM,GAAG,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;gBACzE,IAAI,CAAC,GAAG,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;gBACtC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAA;gBAClB,MAAK;YACP,CAAC;YACD,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC,CAAA;gBAC5F,IAAI,CAAC,WAAW,CAAC,EAAE;oBAAE,OAAO,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,CAAA;gBACtD,OAAO,YAAY,CACjB,WAAW,CAAC,QAAQ,EACpB,WAAW,CAAC,SAAS,EACrB,WAAW,CAAC,QAAQ,EACpB,WAAW,CAAC,WAAW,EACvB,WAAW,CAAC,WAAW,CACxB,CAAA;YACH,CAAC;YACD;gBACE,OAAO,OAAO,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAA;QAC5C,CAAC;QAED,OAAO,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,CAAA;IAClD,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,OAAO,CAAC;YACb,IAAI,EAAE,eAAe;YACrB,OAAO;YACP,SAAS,EAAE,KAAK;YAChB,OAAO,EAAE,EAAE,GAAG,EAAE;SACjB,CAAC,CAAA;IACJ,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CACnB,QAAmD,EACnD,SAA2B,EAC3B,QAAkB,EAClB,WAAoB,EACpB,WAAoB;IAEpB,OAAO;QACL,EAAE,EAAE,IAAI;QACR,QAAQ;QACR,SAAS;QACT,QAAQ;QACR,WAAW;QACX,WAAW;KACZ,CAAA;AACH,CAAC;AAED,SAAS,OAAO,CACd,OAAgD,EAChD,SAA2B;IAE3B,OAAO;QACL,EAAE,EAAE,IAAI;QACR,OAAO;QACP,SAAS;QACT,QAAQ,EAAE,OAAO,CAAC,QAAQ;KAC3B,CAAA;AACH,CAAC;AAED,SAAS,OAAO,CAAC,KAAiB;IAChC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAA;AAC7B,CAAC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * 文件系统工具函数
3
+ */
4
+ /**
5
+ * 确保目录存在,不存在则递归创建
6
+ */
7
+ export declare function ensureDir(dirPath: string): Promise<void>;
8
+ /**
9
+ * 写入文件,自动创建父目录
10
+ */
11
+ export declare function writeFileSafe(filePath: string, content: string): Promise<void>;
12
+ /**
13
+ * 写入 JSON 文件,自动创建父目录
14
+ */
15
+ export declare function writeJsonSafe(filePath: string, data: unknown, pretty?: boolean): Promise<void>;
16
+ //# sourceMappingURL=fs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fs.d.ts","sourceRoot":"","sources":["../../src/utils/fs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH;;GAEG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAE9D;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGpF;AAED;;GAEG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,OAAO,EACb,MAAM,UAAO,GACZ,OAAO,CAAC,IAAI,CAAC,CAGf"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * 文件系统工具函数
3
+ */
4
+ import { mkdir, writeFile } from 'node:fs/promises';
5
+ import { dirname } from 'node:path';
6
+ /**
7
+ * 确保目录存在,不存在则递归创建
8
+ */
9
+ export async function ensureDir(dirPath) {
10
+ await mkdir(dirPath, { recursive: true });
11
+ }
12
+ /**
13
+ * 写入文件,自动创建父目录
14
+ */
15
+ export async function writeFileSafe(filePath, content) {
16
+ await ensureDir(dirname(filePath));
17
+ await writeFile(filePath, content, 'utf-8');
18
+ }
19
+ /**
20
+ * 写入 JSON 文件,自动创建父目录
21
+ */
22
+ export async function writeJsonSafe(filePath, data, pretty = true) {
23
+ const content = JSON.stringify(data, null, pretty ? 2 : undefined);
24
+ await writeFileSafe(filePath, content);
25
+ }
26
+ //# sourceMappingURL=fs.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fs.js","sourceRoot":"","sources":["../../src/utils/fs.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAEnC;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,OAAe;IAC7C,MAAM,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,QAAgB,EAAE,OAAe;IACnE,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAA;IAClC,MAAM,SAAS,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,CAAA;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAgB,EAChB,IAAa,EACb,MAAM,GAAG,IAAI;IAEb,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IAClE,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAA;AACxC,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * 文本工具函数
3
+ */
4
+ /**
5
+ * 清理文件名中的非法字符
6
+ */
7
+ export declare function sanitizeFilename(title: string, maxLen?: number): string;
8
+ /**
9
+ * 从 CSS class 列表中检测代码语言
10
+ */
11
+ export declare function detectCodeLanguage(classes: string[]): string | null;
12
+ /**
13
+ * 清理代码块内容:去除前导空行,保留缩进(与 Python 版一致,保留末尾空行)
14
+ */
15
+ export declare function cleanCodeContent(code: string): string;
16
+ /**
17
+ * 格式化本地时间: YYYY-MM-DD HH:MM:SS(与 Python time.strftime 一致)
18
+ */
19
+ export declare function formatLocalTime(date?: Date): string;
20
+ //# sourceMappingURL=text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,SAAK,GAAG,MAAM,CAMnE;AA6CD;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAgBnE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASrD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,OAAa,GAAG,MAAM,CAGzD"}
@@ -0,0 +1,96 @@
1
+ /**
2
+ * 文本工具函数
3
+ */
4
+ /**
5
+ * 清理文件名中的非法字符
6
+ */
7
+ export function sanitizeFilename(title, maxLen = 50) {
8
+ return title
9
+ .replace(/[<>:"/\\|?*]/g, '_')
10
+ .replace(/\s+/g, ' ')
11
+ .trim()
12
+ .slice(0, maxLen);
13
+ }
14
+ /**
15
+ * 语言 class 到语言名的映射表
16
+ */
17
+ const LANGUAGE_MAPPING = {
18
+ 'language-python': 'python',
19
+ 'language-javascript': 'javascript',
20
+ 'language-js': 'javascript',
21
+ 'language-typescript': 'typescript',
22
+ 'language-ts': 'typescript',
23
+ 'language-java': 'java',
24
+ 'language-cpp': 'cpp',
25
+ 'language-c++': 'cpp',
26
+ 'language-c': 'c',
27
+ 'language-csharp': 'csharp',
28
+ 'language-c#': 'csharp',
29
+ 'language-go': 'go',
30
+ 'language-rust': 'rust',
31
+ 'language-php': 'php',
32
+ 'language-ruby': 'ruby',
33
+ 'language-python3': 'python',
34
+ 'language-py': 'python',
35
+ 'language-html': 'html',
36
+ 'language-css': 'css',
37
+ 'language-scss': 'scss',
38
+ 'language-sass': 'sass',
39
+ 'language-json': 'json',
40
+ 'language-xml': 'xml',
41
+ 'language-yaml': 'yaml',
42
+ 'language-yml': 'yaml',
43
+ 'language-sql': 'sql',
44
+ 'language-bash': 'bash',
45
+ 'language-shell': 'bash',
46
+ 'language-sh': 'bash',
47
+ 'language-markdown': 'markdown',
48
+ 'language-md': 'markdown',
49
+ 'language-dockerfile': 'dockerfile',
50
+ 'language-docker': 'dockerfile',
51
+ 'language-git': 'git',
52
+ 'language-diff': 'diff',
53
+ 'language-text': 'text',
54
+ 'language-plain': 'text',
55
+ };
56
+ /**
57
+ * 从 CSS class 列表中检测代码语言
58
+ */
59
+ export function detectCodeLanguage(classes) {
60
+ // 精确匹配
61
+ for (const cls of classes) {
62
+ if (cls in LANGUAGE_MAPPING) {
63
+ return LANGUAGE_MAPPING[cls];
64
+ }
65
+ }
66
+ // 部分匹配
67
+ for (const cls of classes) {
68
+ for (const [key, lang] of Object.entries(LANGUAGE_MAPPING)) {
69
+ if (cls.includes(key)) {
70
+ return lang;
71
+ }
72
+ }
73
+ }
74
+ return null;
75
+ }
76
+ /**
77
+ * 清理代码块内容:去除前导空行,保留缩进(与 Python 版一致,保留末尾空行)
78
+ */
79
+ export function cleanCodeContent(code) {
80
+ const lines = code.split('\n');
81
+ const cleaned = [];
82
+ for (const line of lines) {
83
+ if (line.trim() || cleaned.length > 0) {
84
+ cleaned.push(line);
85
+ }
86
+ }
87
+ return cleaned.join('\n');
88
+ }
89
+ /**
90
+ * 格式化本地时间: YYYY-MM-DD HH:MM:SS(与 Python time.strftime 一致)
91
+ */
92
+ export function formatLocalTime(date = new Date()) {
93
+ const pad = (n) => String(n).padStart(2, '0');
94
+ return `${date.getFullYear()}-${pad(date.getMonth() + 1)}-${pad(date.getDate())} ${pad(date.getHours())}:${pad(date.getMinutes())}:${pad(date.getSeconds())}`;
95
+ }
96
+ //# sourceMappingURL=text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAa,EAAE,MAAM,GAAG,EAAE;IACzD,OAAO,KAAK;SACT,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC;SAC7B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE;SACN,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAA;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,gBAAgB,GAA2B;IAC/C,iBAAiB,EAAE,QAAQ;IAC3B,qBAAqB,EAAE,YAAY;IACnC,aAAa,EAAE,YAAY;IAC3B,qBAAqB,EAAE,YAAY;IACnC,aAAa,EAAE,YAAY;IAC3B,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,cAAc,EAAE,KAAK;IACrB,YAAY,EAAE,GAAG;IACjB,iBAAiB,EAAE,QAAQ;IAC3B,aAAa,EAAE,QAAQ;IACvB,aAAa,EAAE,IAAI;IACnB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,kBAAkB,EAAE,QAAQ;IAC5B,aAAa,EAAE,QAAQ;IACvB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,cAAc,EAAE,MAAM;IACtB,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,gBAAgB,EAAE,MAAM;IACxB,aAAa,EAAE,MAAM;IACrB,mBAAmB,EAAE,UAAU;IAC/B,aAAa,EAAE,UAAU;IACzB,qBAAqB,EAAE,YAAY;IACnC,iBAAiB,EAAE,YAAY;IAC/B,cAAc,EAAE,KAAK;IACrB,eAAe,EAAE,MAAM;IACvB,eAAe,EAAE,MAAM;IACvB,gBAAgB,EAAE,MAAM;CACzB,CAAA;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAiB;IAClD,OAAO;IACP,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,GAAG,IAAI,gBAAgB,EAAE,CAAC;YAC5B,OAAO,gBAAgB,CAAC,GAAG,CAAC,CAAA;QAC9B,CAAC;IACH,CAAC;IACD,OAAO;IACP,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAC3D,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAA;YACb,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IAC9B,MAAM,OAAO,GAAa,EAAE,CAAA;IAC5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACpB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAC3B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAI,GAAG,IAAI,IAAI,EAAE;IAC/C,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IACrD,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,CAAA;AAC/J,CAAC"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * URL 工具函数
3
+ */
4
+ /**
5
+ * 从 URL 中提取域名(不含 www. 前缀)
6
+ * URL 无效时返回 null
7
+ */
8
+ export declare function extractDomain(url: string): string | null;
9
+ /**
10
+ * 检测 URL 对应的平台
11
+ * URL 无效时返回 null(不静默路由到 generic)
12
+ */
13
+ export declare function detectPlatform(url: string): 'wechat' | 'wechat-album' | 'juejin' | 'generic' | null;
14
+ /**
15
+ * 移除 URL 末尾的 #rd 后缀(微信文章常见)
16
+ */
17
+ export declare function stripHashSuffix(url: string): string;
18
+ /**
19
+ * 构建代理图片 URL(使用 images.weserv.nl 绕过防盗链)
20
+ */
21
+ export declare function buildProxyImageUrl(originalUrl: string): string;
22
+ //# sourceMappingURL=url.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;GAGG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAQxD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,cAAc,GAAG,QAAQ,GAAG,SAAS,GAAG,IAAI,CAoBnG;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAiB9D"}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * URL 工具函数
3
+ */
4
+ /**
5
+ * 从 URL 中提取域名(不含 www. 前缀)
6
+ * URL 无效时返回 null
7
+ */
8
+ export function extractDomain(url) {
9
+ try {
10
+ const parsed = new URL(url);
11
+ const host = parsed.hostname.toLowerCase();
12
+ return host.startsWith('www.') ? host.slice(4) : host;
13
+ }
14
+ catch {
15
+ return null;
16
+ }
17
+ }
18
+ /**
19
+ * 检测 URL 对应的平台
20
+ * URL 无效时返回 null(不静默路由到 generic)
21
+ */
22
+ export function detectPlatform(url) {
23
+ try {
24
+ const parsed = new URL(url);
25
+ const host = parsed.hostname.toLowerCase();
26
+ if (host.includes('mp.weixin.qq.com')) {
27
+ if (parsed.pathname.includes('/mp/appmsgalbum')) {
28
+ return 'wechat-album';
29
+ }
30
+ return 'wechat';
31
+ }
32
+ if (host.includes('juejin.cn')) {
33
+ return 'juejin';
34
+ }
35
+ return 'generic';
36
+ }
37
+ catch {
38
+ return null;
39
+ }
40
+ }
41
+ /**
42
+ * 移除 URL 末尾的 #rd 后缀(微信文章常见)
43
+ */
44
+ export function stripHashSuffix(url) {
45
+ return url.replace(/#rd$/, '');
46
+ }
47
+ /**
48
+ * 构建代理图片 URL(使用 images.weserv.nl 绕过防盗链)
49
+ */
50
+ export function buildProxyImageUrl(originalUrl) {
51
+ if (!originalUrl || !originalUrl.startsWith('http')) {
52
+ return originalUrl;
53
+ }
54
+ const encodedUrl = encodeURIComponent(originalUrl);
55
+ let proxyUrl = `https://images.weserv.nl/?url=${encodedUrl}`;
56
+ // GIF 图片添加特殊参数
57
+ if (originalUrl.toLowerCase().includes('gif') ||
58
+ originalUrl.toLowerCase().includes('wx_fmt=gif')) {
59
+ proxyUrl += '&n=-1';
60
+ }
61
+ return proxyUrl;
62
+ }
63
+ //# sourceMappingURL=url.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url.js","sourceRoot":"","sources":["../../src/utils/url.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;QAC1C,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACvD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;QAE1C,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YACtC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBAChD,OAAO,cAAc,CAAA;YACvB,CAAC;YACD,OAAO,QAAQ,CAAA;QACjB,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,OAAO,QAAQ,CAAA;QACjB,CAAC;QAED,OAAO,SAAS,CAAA;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,OAAO,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,WAAmB;IACpD,IAAI,CAAC,WAAW,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,OAAO,WAAW,CAAA;IACpB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAA;IAClD,IAAI,QAAQ,GAAG,iCAAiC,UAAU,EAAE,CAAA;IAE5D,eAAe;IACf,IACE,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;QACzC,WAAW,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAChD,CAAC;QACD,QAAQ,IAAI,OAAO,CAAA;IACrB,CAAC;IAED,OAAO,QAAQ,CAAA;AACjB,CAAC"}
package/package.json ADDED
@@ -0,0 +1,64 @@
1
+ {
2
+ "name": "wespy-ts",
3
+ "version": "0.2.0",
4
+ "description": "WeSpy - 文章抓取与 Markdown 转换工具 (TypeScript 版)",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist"
16
+ ],
17
+ "bin": {
18
+ "wespy": "dist/cli/main.js"
19
+ },
20
+ "scripts": {
21
+ "build": "tsc",
22
+ "dev": "tsc --watch",
23
+ "test": "vitest run",
24
+ "test:watch": "vitest",
25
+ "lint": "tsc --noEmit",
26
+ "clean": "rm -rf dist",
27
+ "prepublishOnly": "npm run build"
28
+ },
29
+ "keywords": [
30
+ "wechat",
31
+ "article",
32
+ "scraper",
33
+ "markdown",
34
+ "juejin",
35
+ "fetch",
36
+ "crawl"
37
+ ],
38
+ "license": "MIT",
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "git+https://github.com/Cuimc/WeSpy-TS.git"
42
+ },
43
+ "bugs": {
44
+ "url": "https://github.com/Cuimc/WeSpy-TS/issues"
45
+ },
46
+ "homepage": "https://github.com/Cuimc/WeSpy-TS#readme",
47
+ "sideEffects": false,
48
+ "engines": {
49
+ "node": ">=18"
50
+ },
51
+ "dependencies": {
52
+ "cheerio": "^1.0.0",
53
+ "commander": "^12.0.0",
54
+ "turndown": "^7.1.0",
55
+ "undici": "^6.0.0",
56
+ "zod": "^3.22.0"
57
+ },
58
+ "devDependencies": {
59
+ "@types/node": "^20.0.0",
60
+ "@types/turndown": "^5.0.1",
61
+ "typescript": "^5.4.0",
62
+ "vitest": "^1.0.0"
63
+ }
64
+ }