wespy-ts 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +146 -0
  2. package/dist/cli/main.d.ts +7 -0
  3. package/dist/cli/main.d.ts.map +1 -0
  4. package/dist/cli/main.js +312 -0
  5. package/dist/cli/main.js.map +1 -0
  6. package/dist/converter/html-to-markdown.d.ts +9 -0
  7. package/dist/converter/html-to-markdown.d.ts.map +1 -0
  8. package/dist/converter/html-to-markdown.js +171 -0
  9. package/dist/converter/html-to-markdown.js.map +1 -0
  10. package/dist/converter/sanitize-html.d.ts +12 -0
  11. package/dist/converter/sanitize-html.d.ts.map +1 -0
  12. package/dist/converter/sanitize-html.js +22 -0
  13. package/dist/converter/sanitize-html.js.map +1 -0
  14. package/dist/core/errors.d.ts +17 -0
  15. package/dist/core/errors.d.ts.map +1 -0
  16. package/dist/core/errors.js +36 -0
  17. package/dist/core/errors.js.map +1 -0
  18. package/dist/core/result.d.ts +26 -0
  19. package/dist/core/result.d.ts.map +1 -0
  20. package/dist/core/result.js +26 -0
  21. package/dist/core/result.js.map +1 -0
  22. package/dist/core/types.d.ts +156 -0
  23. package/dist/core/types.d.ts.map +1 -0
  24. package/dist/core/types.js +29 -0
  25. package/dist/core/types.js.map +1 -0
  26. package/dist/fetcher/http-client.d.ts +31 -0
  27. package/dist/fetcher/http-client.d.ts.map +1 -0
  28. package/dist/fetcher/http-client.js +124 -0
  29. package/dist/fetcher/http-client.js.map +1 -0
  30. package/dist/index.d.ts +14 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +14 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/platforms/detector.d.ts +15 -0
  35. package/dist/platforms/detector.d.ts.map +1 -0
  36. package/dist/platforms/detector.js +30 -0
  37. package/dist/platforms/detector.js.map +1 -0
  38. package/dist/platforms/generic/generic-article.extractor.d.ts +25 -0
  39. package/dist/platforms/generic/generic-article.extractor.d.ts.map +1 -0
  40. package/dist/platforms/generic/generic-article.extractor.js +171 -0
  41. package/dist/platforms/generic/generic-article.extractor.js.map +1 -0
  42. package/dist/platforms/juejin/juejin-article.extractor.d.ts +20 -0
  43. package/dist/platforms/juejin/juejin-article.extractor.d.ts.map +1 -0
  44. package/dist/platforms/juejin/juejin-article.extractor.js +167 -0
  45. package/dist/platforms/juejin/juejin-article.extractor.js.map +1 -0
  46. package/dist/platforms/juejin/juejin.types.d.ts +13 -0
  47. package/dist/platforms/juejin/juejin.types.d.ts.map +1 -0
  48. package/dist/platforms/juejin/juejin.types.js +5 -0
  49. package/dist/platforms/juejin/juejin.types.js.map +1 -0
  50. package/dist/platforms/wechat/wechat-album.extractor.d.ts +25 -0
  51. package/dist/platforms/wechat/wechat-album.extractor.d.ts.map +1 -0
  52. package/dist/platforms/wechat/wechat-album.extractor.js +190 -0
  53. package/dist/platforms/wechat/wechat-album.extractor.js.map +1 -0
  54. package/dist/platforms/wechat/wechat-article.extractor.d.ts +20 -0
  55. package/dist/platforms/wechat/wechat-article.extractor.d.ts.map +1 -0
  56. package/dist/platforms/wechat/wechat-article.extractor.js +132 -0
  57. package/dist/platforms/wechat/wechat-article.extractor.js.map +1 -0
  58. package/dist/platforms/wechat/wechat.types.d.ts +17 -0
  59. package/dist/platforms/wechat/wechat.types.d.ts.map +1 -0
  60. package/dist/platforms/wechat/wechat.types.js +5 -0
  61. package/dist/platforms/wechat/wechat.types.js.map +1 -0
  62. package/dist/sdk/fetch-album-list.d.ts +10 -0
  63. package/dist/sdk/fetch-album-list.d.ts.map +1 -0
  64. package/dist/sdk/fetch-album-list.js +31 -0
  65. package/dist/sdk/fetch-album-list.js.map +1 -0
  66. package/dist/sdk/fetch-album.d.ts +24 -0
  67. package/dist/sdk/fetch-album.d.ts.map +1 -0
  68. package/dist/sdk/fetch-album.js +67 -0
  69. package/dist/sdk/fetch-album.js.map +1 -0
  70. package/dist/sdk/fetch-article.d.ts +24 -0
  71. package/dist/sdk/fetch-article.d.ts.map +1 -0
  72. package/dist/sdk/fetch-article.js +111 -0
  73. package/dist/sdk/fetch-article.js.map +1 -0
  74. package/dist/utils/fs.d.ts +16 -0
  75. package/dist/utils/fs.d.ts.map +1 -0
  76. package/dist/utils/fs.js +26 -0
  77. package/dist/utils/fs.js.map +1 -0
  78. package/dist/utils/text.d.ts +20 -0
  79. package/dist/utils/text.d.ts.map +1 -0
  80. package/dist/utils/text.js +96 -0
  81. package/dist/utils/text.js.map +1 -0
  82. package/dist/utils/url.d.ts +22 -0
  83. package/dist/utils/url.d.ts.map +1 -0
  84. package/dist/utils/url.js +63 -0
  85. package/dist/utils/url.js.map +1 -0
  86. package/package.json +64 -0
@@ -0,0 +1,190 @@
1
+ /**
2
+ * WeChat 专辑提取器
3
+ */
4
+ import { ok, err } from '../../core/result.js';
5
+ import { fileSystemError, networkError } from '../../core/errors.js';
6
+ import { stripHashSuffix } from '../../utils/url.js';
7
+ import { fetchWechatArticle } from './wechat-article.extractor.js';
8
+ import { writeJsonSafe } from '../../utils/fs.js';
9
+ import { formatLocalTime } from '../../utils/text.js';
10
+ import { join } from 'node:path';
11
+ const WECHAT_MOBILE_UA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.5';
12
+ /**
13
+ * 解析专辑 URL 参数
14
+ */
15
+ export function parseAlbumInfo(albumUrl) {
16
+ try {
17
+ const parsed = new URL(albumUrl);
18
+ const params = parsed.searchParams;
19
+ const biz = params.get('__biz') || '';
20
+ const action = params.get('action') || '';
21
+ const albumId = params.get('album_id') || '';
22
+ if (!biz || !action || !albumId)
23
+ return null;
24
+ return { biz, action, albumId, originalUrl: albumUrl };
25
+ }
26
+ catch {
27
+ return null;
28
+ }
29
+ }
30
+ /**
31
+ * 获取专辑中的文章列表(分页)
32
+ */
33
+ export async function fetchAlbumArticleList(albumUrl, httpClient, maxArticles) {
34
+ const albumInfo = parseAlbumInfo(albumUrl);
35
+ if (!albumInfo) {
36
+ return err(networkError('无法解析专辑 URL', { url: albumUrl }));
37
+ }
38
+ const articles = [];
39
+ let beginMsgid = 0;
40
+ let beginItemidx = 0;
41
+ const count = 10;
42
+ while (true) {
43
+ const apiUrl = new URL('https://mp.weixin.qq.com/mp/appmsgalbum');
44
+ apiUrl.searchParams.set('action', 'getalbum');
45
+ apiUrl.searchParams.set('__biz', albumInfo.biz);
46
+ apiUrl.searchParams.set('album_id', albumInfo.albumId);
47
+ apiUrl.searchParams.set('count', String(count));
48
+ apiUrl.searchParams.set('begin_msgid', String(beginMsgid));
49
+ apiUrl.searchParams.set('begin_itemidx', String(beginItemidx));
50
+ apiUrl.searchParams.set('f', 'json');
51
+ const res = await httpClient.get(apiUrl.toString(), {
52
+ 'User-Agent': WECHAT_MOBILE_UA,
53
+ Referer: 'https://mp.weixin.qq.com/',
54
+ });
55
+ if (!res.ok) {
56
+ // 如果已经获取到部分文章,返回成功
57
+ if (articles.length > 0)
58
+ break;
59
+ return err(res.error);
60
+ }
61
+ let data;
62
+ try {
63
+ data = JSON.parse(res.value.body);
64
+ }
65
+ catch {
66
+ if (articles.length > 0)
67
+ break;
68
+ return err(networkError('专辑 API 响应不是有效 JSON', { url: albumUrl }));
69
+ }
70
+ const baseResp = data['base_resp'];
71
+ if (baseResp && baseResp['ret'] !== 0) {
72
+ if (articles.length > 0)
73
+ break;
74
+ return err(networkError(`专辑 API 返回错误: ${JSON.stringify(baseResp)}`, { url: albumUrl }));
75
+ }
76
+ const albumResp = data['getalbum_resp'];
77
+ const articleList = (albumResp?.['article_list'] ?? []);
78
+ if (!articleList.length)
79
+ break;
80
+ for (const item of articleList) {
81
+ const title = String(item['title'] ?? '').trim();
82
+ const url = stripHashSuffix(String(item['url'] ?? '')).trim();
83
+ // 关键字段为空则跳过该条目
84
+ if (!title || !url)
85
+ continue;
86
+ const entry = {
87
+ title,
88
+ url,
89
+ msgid: String(item['msgid'] ?? ''),
90
+ createTime: String(item['create_time'] ?? ''),
91
+ coverImg: String(item['cover_img_1_1'] ?? ''),
92
+ itemidx: String(item['itemidx'] ?? ''),
93
+ key: String(item['key'] ?? ''),
94
+ };
95
+ articles.push(entry);
96
+ if (maxArticles && articles.length >= maxArticles) {
97
+ return ok(articles);
98
+ }
99
+ }
100
+ // 检查是否还有更多
101
+ if (!albumResp)
102
+ break;
103
+ const continueFlag = String(albumResp['continue_flag'] ?? '0');
104
+ if (continueFlag !== '1')
105
+ break;
106
+ // 更新分页游标 — 缺失则终止(避免死循环)
107
+ const last = articleList[articleList.length - 1];
108
+ const lastMsgid = last['msgid'];
109
+ const lastItemidx = last['itemidx'];
110
+ if (lastMsgid == null || lastItemidx == null)
111
+ break;
112
+ beginMsgid = Number(lastMsgid);
113
+ beginItemidx = Number(lastItemidx);
114
+ // 延迟
115
+ await delay(500);
116
+ }
117
+ return ok(articles);
118
+ }
119
+ /**
120
+ * 批量下载专辑文章
121
+ */
122
+ export async function fetchAlbumArticles(albumUrl, httpClient, outputDir, formats, maxArticles) {
123
+ const listResult = await fetchAlbumArticleList(albumUrl, httpClient, maxArticles);
124
+ if (!listResult.ok)
125
+ return err(listResult.error);
126
+ const entries = listResult.value;
127
+ if (!entries.length) {
128
+ // Python: 返回空列表(成功),不是错误
129
+ return ok({ articles: [], artifacts: [], failedCount: 0 });
130
+ }
131
+ const albumName = `album_${Math.floor(Date.now() / 1000)}`;
132
+ const albumDir = join(outputDir, albumName);
133
+ const articles = [];
134
+ const allArtifacts = [];
135
+ const failed = [];
136
+ for (let i = 0; i < entries.length; i++) {
137
+ const entry = entries[i];
138
+ console.log(`\n[${i + 1}/${entries.length}] 正在下载: ${entry.title}`);
139
+ const articleResult = await fetchWechatArticle(entry.url, httpClient, albumDir, formats);
140
+ if (articleResult.ok) {
141
+ // 合并专辑元数据到文章(与 Python 版一致)
142
+ const article = articleResult.value.article;
143
+ article.metadata['album_title'] = entry.title;
144
+ article.metadata['album_url'] = albumUrl;
145
+ article.metadata['msgid'] = entry.msgid;
146
+ article.metadata['create_time'] = entry.createTime;
147
+ article.metadata['cover_img'] = entry.coverImg;
148
+ articles.push(article);
149
+ allArtifacts.push(...articleResult.value.artifacts);
150
+ console.log(`✅ 下载成功`);
151
+ }
152
+ else {
153
+ failed.push({ title: entry.title, url: entry.url, msgid: entry.msgid, error: articleResult.error.message });
154
+ console.log(`❌ 下载失败: ${articleResult.error.message}`);
155
+ }
156
+ // 延迟
157
+ if (i < entries.length - 1)
158
+ await delay(1000);
159
+ }
160
+ // 保存汇总(与 Python 版字段一致)
161
+ const summaryFile = join(outputDir, `${albumName}_summary.json`);
162
+ try {
163
+ await writeJsonSafe(summaryFile, {
164
+ album_url: albumUrl,
165
+ download_time: formatLocalTime(),
166
+ statistics: {
167
+ total_count: entries.length,
168
+ successful_count: articles.length,
169
+ failed_count: failed.length,
170
+ },
171
+ successful_articles: articles.map((a) => ({
172
+ title: a.title,
173
+ author: a.author,
174
+ url: a.url,
175
+ msgid: String(a.metadata['msgid'] ?? ''),
176
+ create_time: String(a.metadata['create_time'] ?? ''),
177
+ })),
178
+ failed_articles: failed,
179
+ });
180
+ }
181
+ catch (e) {
182
+ const message = e instanceof Error ? e.message : String(e);
183
+ return err(fileSystemError(message, { outputDir, albumUrl }));
184
+ }
185
+ return ok({ articles, artifacts: allArtifacts, summaryFile, failedCount: failed.length });
186
+ }
187
+ function delay(ms) {
188
+ return new Promise((resolve) => setTimeout(resolve, ms));
189
+ }
190
+ //# sourceMappingURL=wechat-album.extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat-album.extractor.js","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat-album.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAA;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AACpD,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAA;AAClE,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,gBAAgB,GACpB,oIAAoI,CAAA;AAEtI;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC7C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAA;QAChC,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAA;QAElC,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QACrC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAA;QAE5C,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QAE5C,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAA;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,QAAgB,EAChB,UAAsB,EACtB,WAAoB;IAEpB,MAAM,SAAS,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAA;IAC1C,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,GAAG,CAAC,YAAY,CAAC,YAAY,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAA;IAC3D,CAAC;IAED,MAAM,QAAQ,GAAwB,EAAE,CAAA;IACxC,IAAI,UAAU,GAAG,CAAC,CAAA;IAClB,IAAI,YAAY,GAAG,CAAC,CAAA;IACpB,MAAM,KAAK,GAAG,EAAE,CAAA;IAEhB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,yCAAyC,CAAC,CAAA;QACjE,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAA;QAC7C,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,CAAC,GAAG,CAAC,CAAA;QAC/C,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,SAAS,CAAC,OAAO,CAAC,CAAA;QACtD,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAA;QAC/C,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAA;QAC1D,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,eAAe,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC,CAAA;QAC9D,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAEpC,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE;YAClD,YAAY,EAAE,gBAAgB;YAC9B,OAAO,EAAE,2BAA2B;SACrC,CAAC,CAAA;QACF,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,mBAAmB;YACnB,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAK;YAC9B,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACvB,CAAC;QAED,IAAI,IAA6B,CAAA;QACjC,IAAI,CAAC;YACH,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAA4B,CAAA;QAC9D,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAK;YAC9B,OAAO,GAAG,CAAC,YAAY,CAAC,oBAAoB,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAA;QACnE,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAwC,CAAA;QACzE,IAAI,QAAQ,IAAI,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;YACtC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAK;YAC9B,OAAO,GAAG,CAAC,YAAY,CAAC,gBAAgB,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAA;QACzF,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAwC,CAAA;QAC9E,MAAM,WAAW,GAAG,CAAC,SAAS,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,CAAmC,CAAA;QAEzF,IAAI,CAAC,WAAW,CAAC,MAAM;YAAE,MAAK;QAE9B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YAChD,MAAM,GAAG,GAAG,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAE7D,eAAe;YACf,IAAI,CAAC,KAAK,IAAI,CAAC,GAAG;gBAAE,SAAQ;YAE5B,MAAM,KAAK,GAAsB;gBAC/B,KAAK;gBACL,GAAG;gBACH,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBAClC,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;gBAC7C,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;gBAC7C,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;gBACtC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;aAC/B,CAAA;YACD,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAEpB,IAAI,WAAW,IAAI,QAAQ,CAAC,MAAM,IAAI,WAAW,EAAE,CAAC;gBAClD,OAAO,EAAE,CAAC,QAAQ,CAAC,CAAA;YACrB,CAAC;QACH,CAAC;QAED,WAAW;QACX,IAAI,CAAC,SAAS;YAAE,MAAK;QACrB,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC,eAAe,CAAC,IAAI,GAAG,CAAC,CAAA;QAC9D,IAAI,YAAY,KAAK,GAAG;YAAE,MAAK;QAE/B,wBAAwB;QACxB,MAAM,IAAI,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QACjD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,CAAA;QAC/B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,CAAA;QACnC,IAAI,SAAS,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI;YAAE,MAAK;QACnD,UAAU,GAAG,MAAM,CAAC,SAAS,CAAC,CAAA;QAC9B,YAAY,GAAG,MAAM,CAAC,WAAW,CAAC,CAAA;QAElC,KAAK;QACL,MAAM,KAAK,CAAC,GAAG,CAAC,CAAA;IAClB,CAAC;IAED,OAAO,EAAE,CAAC,QAAQ,CAAC,CAAA;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,UAAsB,EACtB,SAAiB,EACjB,OAAuB,EACvB,WAAoB;IAEpB,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC,QAAQ,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IACjF,IAAI,CAAC,UAAU,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,CAAA;IAEhD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAA;IAChC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QACpB,yBAAyB;QACzB,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,CAAA;IAC5D,CAAC;IAED,MAAM,SAAS,GAAG,SAAS,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,EAAE,CAAA;IAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,SAAS,CAAC,CAAA;IAE3C,MAAM,QAAQ,GAAmB,EAAE,CAAA;IACnC,MAAM,YAAY,GAAqB,EAAE,CAAA;IACzC,MAAM,MAAM,GAAwE,EAAE,CAAA;IAEtF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAE,CAAA;QACzB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,WAAW,KAAK,CAAC,KAAK,EAAE,CAAC,CAAA;QAElE,MAAM,aAAa,GAAG,MAAM,kBAAkB,CAAC,KAAK,CAAC,GAAG,EAAE,UAAU,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAA;QAExF,IAAI,aAAa,CAAC,EAAE,EAAE,CAAC;YACrB,2BAA2B;YAC3B,MAAM,OAAO,GAAG,aAAa,CAAC,KAAK,CAAC,OAAO,CAAA;YAC3C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,KAAK,CAAC,KAAK,CAAA;YAC7C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,QAAQ,CAAA;YACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAA;YACvC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,KAAK,CAAC,UAAU,CAAA;YAClD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAA;YAC9C,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACtB,YAAY,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YACnD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;QACvB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,aAAa,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;YAC3G,OAAO,CAAC,GAAG,CAAC,WAAW,aAAa,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAA;QACvD,CAAC;QAED,KAAK;QACL,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,MAAM,KAAK,CAAC,IAAI,CAAC,CAAA;IAC/C,CAAC;IAED,uBAAuB;IACvB,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,eAAe,CAAC,CAAA;IAChE,IAAI,CAAC;QACH,MAAM,aAAa,CAAC,WAAW,EAAE;YAC/B,SAAS,EAAE,QAAQ;YACnB,aAAa,EAAE,eAAe,EAAE;YAChC,UAAU,EAAE;gBACV,WAAW,EAAE,OAAO,CAAC,MAAM;gBAC3B,gBAAgB,EAAE,QAAQ,CAAC,MAAM;gBACjC,YAAY,EAAE,MAAM,CAAC,MAAM;aAC5B;YACD,mBAAmB,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBACxC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;aACrD,CAAC,CAAC;YACH,eAAe,EAAE,MAAM;SACxB,CAAC,CAAA;IACJ,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAA;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;AAC3F,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAA;AAC1D,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * WeChat 文章提取器
3
+ */
4
+ import { HttpClient } from '../../fetcher/http-client.js';
5
+ import type { ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
6
+ import type { Result } from '../../core/result.js';
7
+ import type { WechatArticleInfo } from './wechat.types.js';
8
+ /**
9
+ * 从 HTML 中提取微信文章信息
10
+ * 返回空字符串表示未找到对应字段,不注入假数据
11
+ */
12
+ export declare function extractWechatInfo(html: string): WechatArticleInfo;
13
+ /**
14
+ * 获取单篇微信文章
15
+ */
16
+ export declare function fetchWechatArticle(url: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[]): Promise<Result<{
17
+ article: ArticleDraft;
18
+ artifacts: OutputArtifact[];
19
+ }>>;
20
+ //# sourceMappingURL=wechat-article.extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat-article.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AAEzD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAKlD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAO1D;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAoCjE;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,GACtB,OAAO,CAAC,MAAM,CAAC;IAAE,OAAO,EAAE,YAAY,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAA;CAAE,CAAC,CAAC,CAyCzE"}
@@ -0,0 +1,132 @@
1
+ /**
2
+ * WeChat 文章提取器
3
+ */
4
+ import * as cheerio from 'cheerio';
5
+ import { htmlToMarkdown } from '../../converter/html-to-markdown.js';
6
+ import { ok, err } from '../../core/result.js';
7
+ import { fileSystemError, parseEmptyError } from '../../core/errors.js';
8
+ import { sanitizeFilename, formatLocalTime } from '../../utils/text.js';
9
+ import { writeFileSafe, writeJsonSafe, ensureDir } from '../../utils/fs.js';
10
+ import { join } from 'node:path';
11
+ const WECHAT_HEADERS = {
12
+ Referer: 'https://mp.weixin.qq.com/',
13
+ };
14
+ /**
15
+ * 从 HTML 中提取微信文章信息
16
+ * 返回空字符串表示未找到对应字段,不注入假数据
17
+ */
18
+ export function extractWechatInfo(html) {
19
+ const $ = cheerio.load(html);
20
+ // 标题
21
+ const titleEl = $('h1.rich_media_title').first().length
22
+ ? $('h1.rich_media_title').first()
23
+ : $('h1').first();
24
+ const title = titleEl.text().trim();
25
+ // 作者
26
+ const authorEl = $('#js_name').first().length ? $('#js_name').first()
27
+ : $('a.profile_nickname').first().length ? $('a.profile_nickname').first()
28
+ : $('span.profile_nickname').first();
29
+ const author = authorEl.text().trim().replace(/[\n\r\t]/g, '');
30
+ // 发布时间
31
+ let publishTime = '';
32
+ const timeEl = $('#publish_time').first().length
33
+ ? $('#publish_time').first()
34
+ : $('span.publish_time').first();
35
+ if (timeEl.length) {
36
+ publishTime = timeEl.text().trim();
37
+ }
38
+ // JS 渲染的时间 fallback
39
+ if (!publishTime) {
40
+ const match = html.match(/create_time:\s*JsDecode\('([^']+)'\)/);
41
+ if (match)
42
+ publishTime = match[1];
43
+ }
44
+ // 内容
45
+ const contentEl = $('#js_content');
46
+ const contentHtml = contentEl.length ? (contentEl.html() ?? '') : '';
47
+ const contentText = contentEl.length ? contentEl.text().trim() : '';
48
+ return { title, author, publishTime, contentHtml, contentText };
49
+ }
50
+ /**
51
+ * 获取单篇微信文章
52
+ */
53
+ export async function fetchWechatArticle(url, httpClient, outputDir, formats) {
54
+ const res = await httpClient.get(url, WECHAT_HEADERS);
55
+ if (!res.ok)
56
+ return err(res.error);
57
+ const html = res.value.body;
58
+ const info = extractWechatInfo(html);
59
+ // 标题为空 → 解析失败
60
+ if (!info.title) {
61
+ return err(parseEmptyError(url));
62
+ }
63
+ // 记录缺失字段为 warnings,不注入假数据
64
+ const warnings = [];
65
+ if (!info.author)
66
+ warnings.push('未找到作者信息');
67
+ if (!info.publishTime)
68
+ warnings.push('未找到发布时间');
69
+ const article = {
70
+ platform: 'wechat',
71
+ url,
72
+ title: info.title,
73
+ author: info.author || undefined,
74
+ publishTime: info.publishTime || undefined,
75
+ rawHtml: html,
76
+ contentHtml: info.contentHtml,
77
+ contentText: info.contentText,
78
+ markdown: htmlToMarkdown(info.contentHtml),
79
+ metadata: {},
80
+ fetchedAt: formatLocalTime(),
81
+ warnings,
82
+ };
83
+ let artifacts;
84
+ try {
85
+ artifacts = await saveArtifacts(article, outputDir, formats);
86
+ }
87
+ catch (e) {
88
+ const message = e instanceof Error ? e.message : String(e);
89
+ return err(fileSystemError(message, { outputDir, url }));
90
+ }
91
+ return ok({ article, artifacts, warnings });
92
+ }
93
+ async function saveArtifacts(article, outputDir, formats) {
94
+ const artifacts = [];
95
+ const safeTitle = sanitizeFilename(article.title);
96
+ const timestamp = Math.floor(Date.now() / 1000);
97
+ await ensureDir(outputDir);
98
+ // HTML 文件保存完整页面(与 Python 版一致)
99
+ const htmlFileName = formats.includes('html') && article.rawHtml
100
+ ? `${safeTitle}_${timestamp}.html`
101
+ : null;
102
+ if (htmlFileName && article.rawHtml) {
103
+ const filePath = join(outputDir, htmlFileName);
104
+ await writeFileSafe(filePath, article.rawHtml);
105
+ artifacts.push({ type: 'html', path: filePath });
106
+ }
107
+ if (formats.includes('json')) {
108
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}_info.json`);
109
+ await writeJsonSafe(filePath, {
110
+ title: article.title,
111
+ author: article.author ?? null,
112
+ publish_time: article.publishTime ?? null,
113
+ url: article.url,
114
+ html_file: htmlFileName,
115
+ fetch_time: article.fetchedAt,
116
+ });
117
+ artifacts.push({ type: 'json', path: filePath });
118
+ }
119
+ if (formats.includes('markdown') && article.markdown) {
120
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}.md`);
121
+ const lines = [`# ${article.title}`, ''];
122
+ if (article.author)
123
+ lines.push(`**作者**: ${article.author}`);
124
+ if (article.publishTime)
125
+ lines.push(`**发布时间**: ${article.publishTime}`);
126
+ lines.push(`**原文链接**: ${article.url}`, '', '---', '', article.markdown);
127
+ await writeFileSafe(filePath, lines.join('\n'));
128
+ artifacts.push({ type: 'markdown', path: filePath });
129
+ }
130
+ return artifacts;
131
+ }
132
+ //# sourceMappingURL=wechat-article.extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat-article.extractor.js","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAA;AAElC,OAAO,EAAE,cAAc,EAAE,MAAM,qCAAqC,CAAA;AAGpE,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACvE,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAE3E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,cAAc,GAA2B;IAC7C,OAAO,EAAE,2BAA2B;CACrC,CAAA;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,KAAK;IACL,MAAM,OAAO,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM;QACrD,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAClC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAEnC,KAAK;IACL,MAAM,QAAQ,GACZ,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE;QACpD,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,KAAK,EAAE;YAC1E,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAA;IACtC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAA;IAE9D,OAAO;IACP,IAAI,WAAW,GAAG,EAAE,CAAA;IACpB,MAAM,MAAM,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM;QAC9C,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE;QAC5B,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAA;IAClC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,WAAW,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IACpC,CAAC;IACD,oBAAoB;IACpB,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAA;QAChE,IAAI,KAAK;YAAE,WAAW,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;IACpC,CAAC;IAED,KAAK;IACL,MAAM,SAAS,GAAG,CAAC,CAAC,aAAa,CAAC,CAAA;IAClC,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACpE,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;IAEnE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,EAAE,CAAA;AACjE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,UAAsB,EACtB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,cAAc,CAAC,CAAA;IACrD,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAElC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAA;IAC3B,MAAM,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAA;IAEpC,cAAc;IACd,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,OAAO,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,CAAC;IAED,0BAA0B;IAC1B,MAAM,QAAQ,GAAa,EAAE,CAAA;IAC7B,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAC1C,IAAI,CAAC,IAAI,CAAC,WAAW;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAE/C,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,QAAQ;QAClB,GAAG;QACH,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;QAC1C,OAAO,EAAE,IAAI;QACb,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1C,QAAQ,EAAE,EAAE;QACZ,SAAS,EAAE,eAAe,EAAE;QAC5B,QAAQ;KACT,CAAA;IAED,IAAI,SAA2B,CAAA;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAA;IAC9D,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAqB,EACrB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,SAAS,GAAqB,EAAE,CAAA;IACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAA;IAC/C,MAAM,SAAS,CAAC,SAAS,CAAC,CAAA;IAE1B,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO;QAC9D,CAAC,CAAC,GAAG,SAAS,IAAI,SAAS,OAAO;QAClC,CAAC,CAAC,IAAI,CAAA;IAER,IAAI,YAAY,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAA;QAC9C,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;QAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,YAAY,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,YAAY,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI;YACzC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,YAAY;YACvB,UAAU,EAAE,OAAO,CAAC,SAAS;SAC9B,CAAC,CAAA;QACF,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,KAAK,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,CAAC,KAAK,OAAO,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,CAAA;QACxC,IAAI,OAAO,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,IAAI,OAAO,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;QACvE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * WeChat 平台相关类型
3
+ */
4
+ export interface WechatArticleInfo {
5
+ title: string;
6
+ author: string;
7
+ publishTime: string;
8
+ contentHtml: string;
9
+ contentText: string;
10
+ }
11
+ export interface WechatAlbumInfo {
12
+ biz: string;
13
+ action: string;
14
+ albumId: string;
15
+ originalUrl: string;
16
+ }
17
+ //# sourceMappingURL=wechat.types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat.types.d.ts","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat.types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,MAAM,CAAA;IACf,WAAW,EAAE,MAAM,CAAA;CACpB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * WeChat 平台相关类型
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=wechat.types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat.types.js","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat.types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * fetchAlbumList SDK API —— 仅获取专辑文章列表,不下载文章内容
3
+ * 对应 Python 版 album_fetcher.fetch_album_articles()
4
+ */
5
+ import type { FetchAlbumListInput, FetchAlbumListResult } from '../core/types.js';
6
+ /**
7
+ * 获取微信专辑文章列表(仅 API,不下载文章内容)
8
+ */
9
+ export declare function fetchAlbumList(input: FetchAlbumListInput): Promise<FetchAlbumListResult>;
10
+ //# sourceMappingURL=fetch-album-list.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-album-list.d.ts","sourceRoot":"","sources":["../../src/sdk/fetch-album-list.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EACV,mBAAmB,EACnB,oBAAoB,EAErB,MAAM,kBAAkB,CAAA;AAQzB;;GAEG;AACH,wBAAsB,cAAc,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAkB9F"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * fetchAlbumList SDK API —— 仅获取专辑文章列表,不下载文章内容
3
+ * 对应 Python 版 album_fetcher.fetch_album_articles()
4
+ */
5
+ import { FetchAlbumListInputSchema } from '../core/types.js';
6
+ import { HttpClient } from '../fetcher/http-client.js';
7
+ import { fetchAlbumArticleList } from '../platforms/wechat/wechat-album.extractor.js';
8
+ import { isWechatAlbumUrl } from '../platforms/detector.js';
9
+ import { unsupportedUrlError, invalidInputError } from '../core/errors.js';
10
+ /**
11
+ * 获取微信专辑文章列表(仅 API,不下载文章内容)
12
+ */
13
+ export async function fetchAlbumList(input) {
14
+ const parsed = FetchAlbumListInputSchema.safeParse(input);
15
+ if (!parsed.success) {
16
+ return failure(invalidInputError(parsed.error.issues.map((i) => i.message).join('; ')));
17
+ }
18
+ const { url, maxArticles, timeoutMs } = parsed.data;
19
+ if (!isWechatAlbumUrl(url)) {
20
+ return failure(unsupportedUrlError('fetchAlbumList 仅支持微信专辑 URL'));
21
+ }
22
+ const httpClient = new HttpClient({ timeoutMs });
23
+ const res = await fetchAlbumArticleList(url, httpClient, maxArticles);
24
+ if (!res.ok)
25
+ return failure(res.error);
26
+ return { ok: true, articles: res.value, warnings: [] };
27
+ }
28
+ function failure(error) {
29
+ return { ok: false, error };
30
+ }
31
+ //# sourceMappingURL=fetch-album-list.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-album-list.js","sourceRoot":"","sources":["../../src/sdk/fetch-album-list.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAA;AAE5D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAA;AACtD,OAAO,EAAE,qBAAqB,EAAE,MAAM,+CAA+C,CAAA;AACrF,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAC3D,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAE1E;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAA0B;IAC7D,MAAM,MAAM,GAAG,yBAAyB,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACzD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,OAAO,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzF,CAAC;IAED,MAAM,EAAE,GAAG,EAAE,WAAW,EAAE,SAAS,EAAE,GAAG,MAAM,CAAC,IAAI,CAAA;IAEnD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,OAAO,CAAC,mBAAmB,CAAC,4BAA4B,CAAC,CAAC,CAAA;IACnE,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC,CAAA;IAEhD,MAAM,GAAG,GAAG,MAAM,qBAAqB,CAAC,GAAG,EAAE,UAAU,EAAE,WAAW,CAAC,CAAA;IACrE,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAEtC,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;AACxD,CAAC;AAED,SAAS,OAAO,CAAC,KAAiB;IAChC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAA;AAC7B,CAAC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * fetchAlbum SDK API
3
+ */
4
+ import type { FetchAlbumInput, FetchAlbumResult } from '../core/types.js';
5
+ /**
6
+ * 获取微信专辑中的文章列表并批量下载
7
+ *
8
+ * @example
9
+ * ```ts
10
+ * import { fetchAlbum } from 'wespy'
11
+ *
12
+ * const result = await fetchAlbum({
13
+ * url: 'https://mp.weixin.qq.com/mp/appmsgalbum?...',
14
+ * maxArticles: 5,
15
+ * format: ['markdown'],
16
+ * })
17
+ *
18
+ * if (result.ok) {
19
+ * console.log(`成功获取 ${result.articles.length} 篇文章`)
20
+ * }
21
+ * ```
22
+ */
23
+ export declare function fetchAlbum(input: FetchAlbumInput): Promise<FetchAlbumResult>;
24
+ //# sourceMappingURL=fetch-album.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-album.d.ts","sourceRoot":"","sources":["../../src/sdk/fetch-album.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,eAAe,EACf,gBAAgB,EAEjB,MAAM,kBAAkB,CAAA;AAQzB;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAwClF"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * fetchAlbum SDK API
3
+ */
4
+ import { FetchAlbumInputSchema } from '../core/types.js';
5
+ import { HttpClient } from '../fetcher/http-client.js';
6
+ import { fetchAlbumArticles } from '../platforms/wechat/wechat-album.extractor.js';
7
+ import { isWechatAlbumUrl } from '../platforms/detector.js';
8
+ import { unsupportedUrlError, invalidInputError } from '../core/errors.js';
9
+ /**
10
+ * 获取微信专辑中的文章列表并批量下载
11
+ *
12
+ * @example
13
+ * ```ts
14
+ * import { fetchAlbum } from 'wespy'
15
+ *
16
+ * const result = await fetchAlbum({
17
+ * url: 'https://mp.weixin.qq.com/mp/appmsgalbum?...',
18
+ * maxArticles: 5,
19
+ * format: ['markdown'],
20
+ * })
21
+ *
22
+ * if (result.ok) {
23
+ * console.log(`成功获取 ${result.articles.length} 篇文章`)
24
+ * }
25
+ * ```
26
+ */
27
+ export async function fetchAlbum(input) {
28
+ // 输入校验
29
+ const parsed = FetchAlbumInputSchema.safeParse(input);
30
+ if (!parsed.success) {
31
+ return failure(invalidInputError(parsed.error.issues.map((i) => i.message).join('; ')));
32
+ }
33
+ const { url, outputDir, format, maxArticles, timeoutMs, downloadImages } = parsed.data;
34
+ if (downloadImages) {
35
+ return failure(invalidInputError('downloadImages 尚未实现;Python 原版仅在 Markdown 中转换图片代理 URL'));
36
+ }
37
+ if (!isWechatAlbumUrl(url)) {
38
+ return failure(unsupportedUrlError('fetchAlbum 仅支持微信专辑 URL'));
39
+ }
40
+ const httpClient = new HttpClient({ timeoutMs });
41
+ try {
42
+ const res = await fetchAlbumArticles(url, httpClient, outputDir, format, maxArticles);
43
+ if (!res.ok)
44
+ return failure(res.error);
45
+ return {
46
+ ok: true,
47
+ articles: res.value.articles,
48
+ artifacts: res.value.artifacts,
49
+ warnings: [],
50
+ summaryFile: res.value.summaryFile,
51
+ failedCount: res.value.failedCount,
52
+ };
53
+ }
54
+ catch (e) {
55
+ const message = e instanceof Error ? e.message : String(e);
56
+ return failure({
57
+ code: 'UNKNOWN_ERROR',
58
+ message,
59
+ retryable: false,
60
+ details: { url },
61
+ });
62
+ }
63
+ }
64
+ function failure(error) {
65
+ return { ok: false, error };
66
+ }
67
+ //# sourceMappingURL=fetch-album.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-album.js","sourceRoot":"","sources":["../../src/sdk/fetch-album.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAA;AAExD,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAA;AACtD,OAAO,EAAE,kBAAkB,EAAE,MAAM,+CAA+C,CAAA;AAClF,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAC3D,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAE1E;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,KAAsB;IACrD,OAAO;IACP,MAAM,MAAM,GAAG,qBAAqB,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IACrD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,OAAO,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzF,CAAC;IAED,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,IAAI,CAAA;IAEtF,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC,iBAAiB,CAAC,sDAAsD,CAAC,CAAC,CAAA;IAC3F,CAAC;IAED,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,OAAO,CAAC,mBAAmB,CAAC,wBAAwB,CAAC,CAAC,CAAA;IAC/D,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC,CAAA;IAEhD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,CAAC,CAAA;QACrF,IAAI,CAAC,GAAG,CAAC,EAAE;YAAE,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QAEtC,OAAO;YACL,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE,GAAG,CAAC,KAAK,CAAC,QAAQ;YAC5B,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,SAAS;YAC9B,QAAQ,EAAE,EAAE;YACZ,WAAW,EAAE,GAAG,CAAC,KAAK,CAAC,WAAW;YAClC,WAAW,EAAE,GAAG,CAAC,KAAK,CAAC,WAAW;SACnC,CAAA;IACH,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,OAAO,CAAC;YACb,IAAI,EAAE,eAAe;YACrB,OAAO;YACP,SAAS,EAAE,KAAK;YAChB,OAAO,EAAE,EAAE,GAAG,EAAE;SACjB,CAAC,CAAA;IACJ,CAAC;AACH,CAAC;AAED,SAAS,OAAO,CAAC,KAAiB;IAChC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAA;AAC7B,CAAC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * fetchArticle SDK API
3
+ */
4
+ import type { FetchArticleInput, FetchArticleResult } from '../core/types.js';
5
+ /**
6
+ * 获取单篇文章
7
+ *
8
+ * @example
9
+ * ```ts
10
+ * import { fetchArticle } from 'wespy'
11
+ *
12
+ * const result = await fetchArticle({
13
+ * url: 'https://mp.weixin.qq.com/s/xxxxx',
14
+ * format: ['markdown', 'json'],
15
+ * })
16
+ *
17
+ * if (result.ok) {
18
+ * console.log(result.article.title)
19
+ * console.log(result.article.markdown)
20
+ * }
21
+ * ```
22
+ */
23
+ export declare function fetchArticle(input: FetchArticleInput): Promise<FetchArticleResult>;
24
+ //# sourceMappingURL=fetch-article.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fetch-article.d.ts","sourceRoot":"","sources":["../../src/sdk/fetch-article.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,iBAAiB,EACjB,kBAAkB,EAKnB,MAAM,kBAAkB,CAAA;AAWzB;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,YAAY,CAAC,KAAK,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAmExF"}