wespy-ts 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +146 -0
  2. package/dist/cli/main.d.ts +7 -0
  3. package/dist/cli/main.d.ts.map +1 -0
  4. package/dist/cli/main.js +312 -0
  5. package/dist/cli/main.js.map +1 -0
  6. package/dist/converter/html-to-markdown.d.ts +9 -0
  7. package/dist/converter/html-to-markdown.d.ts.map +1 -0
  8. package/dist/converter/html-to-markdown.js +171 -0
  9. package/dist/converter/html-to-markdown.js.map +1 -0
  10. package/dist/converter/sanitize-html.d.ts +12 -0
  11. package/dist/converter/sanitize-html.d.ts.map +1 -0
  12. package/dist/converter/sanitize-html.js +22 -0
  13. package/dist/converter/sanitize-html.js.map +1 -0
  14. package/dist/core/errors.d.ts +17 -0
  15. package/dist/core/errors.d.ts.map +1 -0
  16. package/dist/core/errors.js +36 -0
  17. package/dist/core/errors.js.map +1 -0
  18. package/dist/core/result.d.ts +26 -0
  19. package/dist/core/result.d.ts.map +1 -0
  20. package/dist/core/result.js +26 -0
  21. package/dist/core/result.js.map +1 -0
  22. package/dist/core/types.d.ts +156 -0
  23. package/dist/core/types.d.ts.map +1 -0
  24. package/dist/core/types.js +29 -0
  25. package/dist/core/types.js.map +1 -0
  26. package/dist/fetcher/http-client.d.ts +31 -0
  27. package/dist/fetcher/http-client.d.ts.map +1 -0
  28. package/dist/fetcher/http-client.js +124 -0
  29. package/dist/fetcher/http-client.js.map +1 -0
  30. package/dist/index.d.ts +14 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +14 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/platforms/detector.d.ts +15 -0
  35. package/dist/platforms/detector.d.ts.map +1 -0
  36. package/dist/platforms/detector.js +30 -0
  37. package/dist/platforms/detector.js.map +1 -0
  38. package/dist/platforms/generic/generic-article.extractor.d.ts +25 -0
  39. package/dist/platforms/generic/generic-article.extractor.d.ts.map +1 -0
  40. package/dist/platforms/generic/generic-article.extractor.js +171 -0
  41. package/dist/platforms/generic/generic-article.extractor.js.map +1 -0
  42. package/dist/platforms/juejin/juejin-article.extractor.d.ts +20 -0
  43. package/dist/platforms/juejin/juejin-article.extractor.d.ts.map +1 -0
  44. package/dist/platforms/juejin/juejin-article.extractor.js +167 -0
  45. package/dist/platforms/juejin/juejin-article.extractor.js.map +1 -0
  46. package/dist/platforms/juejin/juejin.types.d.ts +13 -0
  47. package/dist/platforms/juejin/juejin.types.d.ts.map +1 -0
  48. package/dist/platforms/juejin/juejin.types.js +5 -0
  49. package/dist/platforms/juejin/juejin.types.js.map +1 -0
  50. package/dist/platforms/wechat/wechat-album.extractor.d.ts +25 -0
  51. package/dist/platforms/wechat/wechat-album.extractor.d.ts.map +1 -0
  52. package/dist/platforms/wechat/wechat-album.extractor.js +190 -0
  53. package/dist/platforms/wechat/wechat-album.extractor.js.map +1 -0
  54. package/dist/platforms/wechat/wechat-article.extractor.d.ts +20 -0
  55. package/dist/platforms/wechat/wechat-article.extractor.d.ts.map +1 -0
  56. package/dist/platforms/wechat/wechat-article.extractor.js +132 -0
  57. package/dist/platforms/wechat/wechat-article.extractor.js.map +1 -0
  58. package/dist/platforms/wechat/wechat.types.d.ts +17 -0
  59. package/dist/platforms/wechat/wechat.types.d.ts.map +1 -0
  60. package/dist/platforms/wechat/wechat.types.js +5 -0
  61. package/dist/platforms/wechat/wechat.types.js.map +1 -0
  62. package/dist/sdk/fetch-album-list.d.ts +10 -0
  63. package/dist/sdk/fetch-album-list.d.ts.map +1 -0
  64. package/dist/sdk/fetch-album-list.js +31 -0
  65. package/dist/sdk/fetch-album-list.js.map +1 -0
  66. package/dist/sdk/fetch-album.d.ts +24 -0
  67. package/dist/sdk/fetch-album.d.ts.map +1 -0
  68. package/dist/sdk/fetch-album.js +67 -0
  69. package/dist/sdk/fetch-album.js.map +1 -0
  70. package/dist/sdk/fetch-article.d.ts +24 -0
  71. package/dist/sdk/fetch-article.d.ts.map +1 -0
  72. package/dist/sdk/fetch-article.js +111 -0
  73. package/dist/sdk/fetch-article.js.map +1 -0
  74. package/dist/utils/fs.d.ts +16 -0
  75. package/dist/utils/fs.d.ts.map +1 -0
  76. package/dist/utils/fs.js +26 -0
  77. package/dist/utils/fs.js.map +1 -0
  78. package/dist/utils/text.d.ts +20 -0
  79. package/dist/utils/text.d.ts.map +1 -0
  80. package/dist/utils/text.js +96 -0
  81. package/dist/utils/text.js.map +1 -0
  82. package/dist/utils/url.d.ts +22 -0
  83. package/dist/utils/url.d.ts.map +1 -0
  84. package/dist/utils/url.js +63 -0
  85. package/dist/utils/url.js.map +1 -0
  86. package/package.json +64 -0
@@ -0,0 +1,30 @@
1
+ /**
2
+ * URL 平台检测器
3
+ */
4
+ import { detectPlatform } from '../utils/url.js';
5
+ export { detectPlatform };
6
+ export function isWechatUrl(url) {
7
+ const p = detectPlatform(url);
8
+ return p === 'wechat' || p === 'wechat-album';
9
+ }
10
+ export function isWechatAlbumUrl(url) {
11
+ return detectPlatform(url) === 'wechat-album';
12
+ }
13
+ export function isJuejinUrl(url) {
14
+ return detectPlatform(url) === 'juejin';
15
+ }
16
+ /**
17
+ * 检测 URL 是否有效(可被 URL 解析器解析)
18
+ */
19
+ export function isValidUrl(url) {
20
+ return detectPlatform(url) !== null;
21
+ }
22
+ export function getPlatformLabel(platform) {
23
+ switch (platform) {
24
+ case 'wechat': return '微信公众号';
25
+ case 'wechat-album': return '微信专辑';
26
+ case 'juejin': return '掘金';
27
+ case 'generic': return '通用网页';
28
+ }
29
+ }
30
+ //# sourceMappingURL=detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detector.js","sourceRoot":"","sources":["../../src/platforms/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAGhD,OAAO,EAAE,cAAc,EAAE,CAAA;AAEzB,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,MAAM,CAAC,GAAG,cAAc,CAAC,GAAG,CAAC,CAAA;IAC7B,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,cAAc,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW;IAC1C,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,cAAc,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,QAAQ,CAAA;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,IAAI,CAAA;AACrC,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,QAA2B;IAC1D,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ,CAAC,CAAC,OAAO,OAAO,CAAA;QAC7B,KAAK,cAAc,CAAC,CAAC,OAAO,MAAM,CAAA;QAClC,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAA;QAC1B,KAAK,SAAS,CAAC,CAAC,OAAO,MAAM,CAAA;IAC/B,CAAC;AACH,CAAC"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * 通用网页文章提取器
3
+ */
4
+ import { HttpClient } from '../../fetcher/http-client.js';
5
+ import type { ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
6
+ import type { Result } from '../../core/result.js';
7
+ /**
8
+ * 从 HTML 中提取通用网页文章信息
9
+ * 返回空字符串表示未找到对应字段,不注入假数据
10
+ */
11
+ export declare function extractGenericInfo(html: string): {
12
+ title: string;
13
+ author: string;
14
+ publishTime: string;
15
+ contentHtml: string;
16
+ contentText: string;
17
+ };
18
+ /**
19
+ * 获取通用网页文章
20
+ */
21
+ export declare function fetchGenericArticle(url: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[]): Promise<Result<{
22
+ article: ArticleDraft;
23
+ artifacts: OutputArtifact[];
24
+ }>>;
25
+ //# sourceMappingURL=generic-article.extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generic-article.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/generic/generic-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AAEzD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAOlD;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG;IAChD,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;CACpB,CAiFA;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,GACtB,OAAO,CAAC,MAAM,CAAC;IAAE,OAAO,EAAE,YAAY,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAA;CAAE,CAAC,CAAC,CAqCzE"}
@@ -0,0 +1,171 @@
1
+ /**
2
+ * 通用网页文章提取器
3
+ */
4
+ import * as cheerio from 'cheerio';
5
+ import { htmlToMarkdown } from '../../converter/html-to-markdown.js';
6
+ import { ok, err } from '../../core/result.js';
7
+ import { fileSystemError } from '../../core/errors.js';
8
+ import { sanitizeFilename, formatLocalTime } from '../../utils/text.js';
9
+ import { writeFileSafe, writeJsonSafe, ensureDir } from '../../utils/fs.js';
10
+ import { join } from 'node:path';
11
+ /**
12
+ * 从 HTML 中提取通用网页文章信息
13
+ * 返回空字符串表示未找到对应字段,不注入假数据
14
+ */
15
+ export function extractGenericInfo(html) {
16
+ const $ = cheerio.load(html);
17
+ // 标题 - 尝试多种方式
18
+ let title = '';
19
+ const titleSources = [
20
+ $('title').first(),
21
+ $('h1').first(),
22
+ $('h2').first(),
23
+ $('meta[property="og:title"]').first(),
24
+ ];
25
+ for (const el of titleSources) {
26
+ if (el.length) {
27
+ title = el.attr('content') ?? el.text().trim();
28
+ if (title)
29
+ break;
30
+ }
31
+ }
32
+ // 作者(大小写不敏感匹配 class*="author",与 Python re.I 一致)
33
+ let author = '';
34
+ const authorByClass = $('span, div').filter((_, el) => {
35
+ const cls = $(el).attr('class') ?? '';
36
+ return /author/i.test(cls);
37
+ }).first();
38
+ const authorSources = [
39
+ $('meta[name="author"]').first(),
40
+ authorByClass,
41
+ $('#js_name').first(),
42
+ ];
43
+ for (const el of authorSources) {
44
+ if (el.length) {
45
+ author = el.attr('content') ?? el.text().trim();
46
+ if (author)
47
+ break;
48
+ }
49
+ }
50
+ // 发布时间
51
+ let publishTime = '';
52
+ const timeSources = [
53
+ $('time').first(),
54
+ $('span[class*="time"]').first(),
55
+ $('span[class*="date"]').first(),
56
+ $('meta[property="article:published_time"]').first(),
57
+ ];
58
+ for (const el of timeSources) {
59
+ if (el.length) {
60
+ publishTime = el.attr('content') ?? el.text().trim();
61
+ if (publishTime)
62
+ break;
63
+ }
64
+ }
65
+ // 内容区域
66
+ const contentSelectors = [
67
+ 'article',
68
+ '.article-content',
69
+ '.content',
70
+ '.post-content',
71
+ '.entry-content',
72
+ '#content',
73
+ '.main-content',
74
+ 'main',
75
+ ];
76
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
77
+ let contentEl = null;
78
+ for (const selector of contentSelectors) {
79
+ const el = $(selector).first();
80
+ if (el.length) {
81
+ contentEl = el;
82
+ break;
83
+ }
84
+ }
85
+ if (!contentEl) {
86
+ contentEl = $('body').first();
87
+ }
88
+ const contentHtml = contentEl?.html() ?? '';
89
+ const contentText = contentEl?.text().trim() ?? '';
90
+ return { title, author, publishTime, contentHtml, contentText };
91
+ }
92
+ /**
93
+ * 获取通用网页文章
94
+ */
95
+ export async function fetchGenericArticle(url, httpClient, outputDir, formats) {
96
+ const res = await httpClient.get(url);
97
+ if (!res.ok)
98
+ return err(res.error);
99
+ const html = res.value.body;
100
+ const info = extractGenericInfo(html);
101
+ // 通用网页不强制要求标题,但记录为 warning
102
+ const warnings = [];
103
+ if (!info.title)
104
+ warnings.push('未找到标题');
105
+ if (!info.author)
106
+ warnings.push('未找到作者信息');
107
+ if (!info.publishTime)
108
+ warnings.push('未找到发布时间');
109
+ const article = {
110
+ platform: 'generic',
111
+ url,
112
+ title: info.title || '(无标题)',
113
+ author: info.author || undefined,
114
+ publishTime: info.publishTime || undefined,
115
+ rawHtml: html,
116
+ contentHtml: info.contentHtml,
117
+ contentText: info.contentText,
118
+ markdown: htmlToMarkdown(info.contentHtml),
119
+ metadata: {},
120
+ fetchedAt: formatLocalTime(),
121
+ warnings,
122
+ };
123
+ let artifacts;
124
+ try {
125
+ artifacts = await saveArtifacts(article, outputDir, formats);
126
+ }
127
+ catch (e) {
128
+ const message = e instanceof Error ? e.message : String(e);
129
+ return err(fileSystemError(message, { outputDir, url }));
130
+ }
131
+ return ok({ article, artifacts, warnings });
132
+ }
133
+ async function saveArtifacts(article, outputDir, formats) {
134
+ const artifacts = [];
135
+ const safeTitle = sanitizeFilename(article.title);
136
+ const timestamp = Math.floor(Date.now() / 1000);
137
+ await ensureDir(outputDir);
138
+ const htmlFileName = formats.includes('html') && article.rawHtml
139
+ ? `${safeTitle}_${timestamp}.html`
140
+ : null;
141
+ if (htmlFileName && article.rawHtml) {
142
+ const filePath = join(outputDir, htmlFileName);
143
+ await writeFileSafe(filePath, article.rawHtml);
144
+ artifacts.push({ type: 'html', path: filePath });
145
+ }
146
+ if (formats.includes('json')) {
147
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}_info.json`);
148
+ await writeJsonSafe(filePath, {
149
+ title: article.title,
150
+ author: article.author ?? null,
151
+ publish_time: article.publishTime ?? null,
152
+ url: article.url,
153
+ html_file: htmlFileName,
154
+ fetch_time: article.fetchedAt,
155
+ });
156
+ artifacts.push({ type: 'json', path: filePath });
157
+ }
158
+ if (formats.includes('markdown') && article.markdown) {
159
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}.md`);
160
+ const lines = [`# ${article.title}`, ''];
161
+ if (article.author)
162
+ lines.push(`**作者**: ${article.author}`);
163
+ if (article.publishTime)
164
+ lines.push(`**发布时间**: ${article.publishTime}`);
165
+ lines.push(`**原文链接**: ${article.url}`, '', '---', '', article.markdown);
166
+ await writeFileSafe(filePath, lines.join('\n'));
167
+ artifacts.push({ type: 'markdown', path: filePath });
168
+ }
169
+ return artifacts;
170
+ }
171
+ //# sourceMappingURL=generic-article.extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generic-article.extractor.js","sourceRoot":"","sources":["../../../src/platforms/generic/generic-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAA;AAElC,OAAO,EAAE,cAAc,EAAE,MAAM,qCAAqC,CAAA;AAGpE,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACtD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAC3E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAO7C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,cAAc;IACd,IAAI,KAAK,GAAG,EAAE,CAAA;IACd,MAAM,YAAY,GAAG;QACnB,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE;QAClB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE;QACf,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE;QACf,CAAC,CAAC,2BAA2B,CAAC,CAAC,KAAK,EAAE;KACvC,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;QAC9B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YAC9C,IAAI,KAAK;gBAAE,MAAK;QAClB,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,MAAM,aAAa,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpD,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QACrC,OAAO,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC5B,CAAC,CAAC,CAAC,KAAK,EAAE,CAAA;IACV,MAAM,aAAa,GAAG;QACpB,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,aAAa;QACb,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE;KACtB,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,MAAM,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YAC/C,IAAI,MAAM;gBAAE,MAAK;QACnB,CAAC;IACH,CAAC;IAED,OAAO;IACP,IAAI,WAAW,GAAG,EAAE,CAAA;IACpB,MAAM,WAAW,GAAG;QAClB,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE;QACjB,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,CAAC,CAAC,yCAAyC,CAAC,CAAC,KAAK,EAAE;KACrD,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,WAAW,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YACpD,IAAI,WAAW;gBAAE,MAAK;QACxB,CAAC;IACH,CAAC;IAED,OAAO;IACP,MAAM,gBAAgB,GAAG;QACvB,SAAS;QACT,kBAAkB;QAClB,UAAU;QACV,eAAe;QACf,gBAAgB;QAChB,UAAU;QACV,eAAe;QACf,MAAM;KACP,CAAA;IAED,8DAA8D;IAC9D,IAAI,SAAS,GAAgC,IAAI,CAAA;IACjD,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAA;QAC9B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,SAAS,GAAG,EAAE,CAAA;YACd,MAAK;QACP,CAAC;IACH,CAAC;IAED,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,SAAS,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAA;IAC/B,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC3C,MAAM,WAAW,GAAG,SAAS,EAAE,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;IAElD,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,EAAE,CAAA;AACjE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,GAAW,EACX,UAAsB,EACtB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACrC,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAElC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAA;IAC3B,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAErC,2BAA2B;IAC3B,MAAM,QAAQ,GAAa,EAAE,CAAA;IAC7B,IAAI,CAAC,IAAI,CAAC,KAAK;QAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAC1C,IAAI,CAAC,IAAI,CAAC,WAAW;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAE/C,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,SAAS;QACnB,GAAG;QACH,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,OAAO;QAC5B,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;QAC1C,OAAO,EAAE,IAAI;QACb,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1C,QAAQ,EAAE,EAAE;QACZ,SAAS,EAAE,eAAe,EAAE;QAC5B,QAAQ;KACT,CAAA;IAED,IAAI,SAA2B,CAAA;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAA;IAC9D,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAqB,EACrB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,SAAS,GAAqB,EAAE,CAAA;IACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAA;IAC/C,MAAM,SAAS,CAAC,SAAS,CAAC,CAAA;IAE1B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO;QAC9D,CAAC,CAAC,GAAG,SAAS,IAAI,SAAS,OAAO;QAClC,CAAC,CAAC,IAAI,CAAA;IAER,IAAI,YAAY,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAA;QAC9C,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;QAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,YAAY,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,YAAY,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI;YACzC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,YAAY;YACvB,UAAU,EAAE,OAAO,CAAC,SAAS;SAC9B,CAAC,CAAA;QACF,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,KAAK,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,CAAC,KAAK,OAAO,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,CAAA;QACxC,IAAI,OAAO,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,IAAI,OAAO,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;QACvE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Juejin 文章提取器
3
+ */
4
+ import { HttpClient } from '../../fetcher/http-client.js';
5
+ import type { ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
6
+ import type { Result } from '../../core/result.js';
7
+ import type { JuejinArticleInfo } from './juejin.types.js';
8
+ /**
9
+ * 从 HTML 中提取掘金文章信息
10
+ * 返回空字符串表示未找到对应字段,不注入假数据
11
+ */
12
+ export declare function extractJuejinInfo(html: string): JuejinArticleInfo;
13
+ /**
14
+ * 获取单篇掘金文章
15
+ */
16
+ export declare function fetchJuejinArticle(url: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[]): Promise<Result<{
17
+ article: ArticleDraft;
18
+ artifacts: OutputArtifact[];
19
+ }>>;
20
+ //# sourceMappingURL=juejin-article.extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"juejin-article.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AAGzD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAKlD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAO1D;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAoEjE;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,GACtB,OAAO,CAAC,MAAM,CAAC;IAAE,OAAO,EAAE,YAAY,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAA;CAAE,CAAC,CAAC,CA6CzE"}
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Juejin 文章提取器
3
+ */
4
+ import * as cheerio from 'cheerio';
5
+ import { htmlToMarkdown } from '../../converter/html-to-markdown.js';
6
+ import { sanitizeHtml } from '../../converter/sanitize-html.js';
7
+ import { ok, err } from '../../core/result.js';
8
+ import { fileSystemError, parseEmptyError } from '../../core/errors.js';
9
+ import { sanitizeFilename, formatLocalTime } from '../../utils/text.js';
10
+ import { writeFileSafe, writeJsonSafe, ensureDir } from '../../utils/fs.js';
11
+ import { join } from 'node:path';
12
+ const JUEJIN_HEADERS = {
13
+ Referer: 'https://juejin.cn/',
14
+ };
15
+ /**
16
+ * 从 HTML 中提取掘金文章信息
17
+ * 返回空字符串表示未找到对应字段,不注入假数据
18
+ */
19
+ export function extractJuejinInfo(html) {
20
+ const $ = cheerio.load(html);
21
+ // 标题
22
+ const titleEl = $('h1.article-title').first().length ? $('h1.article-title').first()
23
+ : $('h1.article-title-text').first().length ? $('h1.article-title-text').first()
24
+ : $('h1').first();
25
+ const title = titleEl.text().trim();
26
+ // 作者
27
+ const authorEl = $('span.name').first();
28
+ const author = authorEl.text().trim();
29
+ // 发布时间
30
+ const timeEl = $('span.time').first().length ? $('span.time').first()
31
+ : $('time').first().length ? $('time').first()
32
+ : $('span.date').first();
33
+ const publishTime = timeEl.text().trim();
34
+ // 内容区域
35
+ const contentEl = $('#article-root').first().length ? $('#article-root').first()
36
+ : $('div.article-content').first().length ? $('div.article-content').first()
37
+ : $('div.markdown-body').first().length ? $('div.markdown-body').first()
38
+ : $('article').first().length ? $('article').first()
39
+ : $('#article-content').first();
40
+ let contentHtml = '';
41
+ let contentText = '';
42
+ if (contentEl.length) {
43
+ // 清理内容
44
+ const cleanedHtml = sanitizeHtml(contentEl.html() ?? '');
45
+ const cleaned$ = cheerio.load(cleanedHtml);
46
+ contentHtml = cleaned$.html() ?? '';
47
+ contentText = cleaned$.text().trim();
48
+ // Fallback: 如果清理后内容为空,尝试从 #article-root 重新清理
49
+ if (!contentText) {
50
+ const root$ = cheerio.load(html);
51
+ const articleRoot = root$('#article-root').first();
52
+ if (articleRoot.length) {
53
+ const reCleaned = sanitizeHtml(articleRoot.html() ?? '');
54
+ const reCleaned$ = cheerio.load(reCleaned);
55
+ contentHtml = reCleaned$.html() ?? '';
56
+ contentText = reCleaned$.text().trim();
57
+ }
58
+ }
59
+ }
60
+ // 标签(Python 语义: a.tag 优先,有结果则忽略 span.tag)
61
+ const tags = [];
62
+ const aTags = $('a.tag');
63
+ const tagEls = aTags.length > 0 ? aTags : $('span.tag');
64
+ tagEls.each((_, el) => {
65
+ const tag = $(el).text().trim();
66
+ if (tag)
67
+ tags.push(tag);
68
+ });
69
+ // 阅读数
70
+ const viewEl = $('span.view-count').first().length ? $('span.view-count').first()
71
+ : $('span.read-count').first();
72
+ const viewCount = viewEl.text().trim();
73
+ return { title, author, publishTime, contentHtml, contentText, tags, viewCount };
74
+ }
75
+ /**
76
+ * 获取单篇掘金文章
77
+ */
78
+ export async function fetchJuejinArticle(url, httpClient, outputDir, formats) {
79
+ const res = await httpClient.get(url, JUEJIN_HEADERS);
80
+ if (!res.ok)
81
+ return err(res.error);
82
+ const html = res.value.body;
83
+ const info = extractJuejinInfo(html);
84
+ // 标题为空 → 解析失败
85
+ if (!info.title) {
86
+ return err(parseEmptyError(url));
87
+ }
88
+ // 记录缺失字段为 warnings
89
+ const warnings = [];
90
+ if (!info.author)
91
+ warnings.push('未找到作者信息');
92
+ if (!info.publishTime)
93
+ warnings.push('未找到发布时间');
94
+ const metadata = {};
95
+ if (info.tags.length)
96
+ metadata.tags = info.tags;
97
+ if (info.viewCount)
98
+ metadata.viewCount = info.viewCount;
99
+ const article = {
100
+ platform: 'juejin',
101
+ url,
102
+ title: info.title,
103
+ author: info.author || undefined,
104
+ publishTime: info.publishTime || undefined,
105
+ rawHtml: html,
106
+ contentHtml: info.contentHtml,
107
+ contentText: info.contentText,
108
+ markdown: htmlToMarkdown(info.contentHtml),
109
+ metadata,
110
+ fetchedAt: formatLocalTime(),
111
+ warnings,
112
+ };
113
+ let artifacts;
114
+ try {
115
+ artifacts = await saveArtifacts(article, info, outputDir, formats);
116
+ }
117
+ catch (e) {
118
+ const message = e instanceof Error ? e.message : String(e);
119
+ return err(fileSystemError(message, { outputDir, url }));
120
+ }
121
+ return ok({ article, artifacts, warnings });
122
+ }
123
+ async function saveArtifacts(article, info, outputDir, formats) {
124
+ const artifacts = [];
125
+ const safeTitle = sanitizeFilename(article.title);
126
+ const timestamp = Math.floor(Date.now() / 1000);
127
+ await ensureDir(outputDir);
128
+ const htmlFileName = formats.includes('html') && article.rawHtml
129
+ ? `${safeTitle}_${timestamp}.html`
130
+ : null;
131
+ if (htmlFileName && article.rawHtml) {
132
+ const filePath = join(outputDir, htmlFileName);
133
+ await writeFileSafe(filePath, article.rawHtml);
134
+ artifacts.push({ type: 'html', path: filePath });
135
+ }
136
+ if (formats.includes('json')) {
137
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}_info.json`);
138
+ await writeJsonSafe(filePath, {
139
+ title: article.title,
140
+ author: article.author ?? null,
141
+ publish_time: article.publishTime ?? null,
142
+ tags: info.tags,
143
+ view_count: info.viewCount || null,
144
+ url: article.url,
145
+ html_file: htmlFileName,
146
+ fetch_time: article.fetchedAt,
147
+ });
148
+ artifacts.push({ type: 'json', path: filePath });
149
+ }
150
+ if (formats.includes('markdown') && article.markdown) {
151
+ const filePath = join(outputDir, `${safeTitle}_${timestamp}.md`);
152
+ const lines = [`# ${article.title}`, ''];
153
+ if (article.author)
154
+ lines.push(`**作者**: ${article.author}`);
155
+ if (article.publishTime)
156
+ lines.push(`**发布时间**: ${article.publishTime}`);
157
+ if (info.viewCount)
158
+ lines.push(`**阅读量**: ${info.viewCount}`);
159
+ if (info.tags.length)
160
+ lines.push(`**标签**: ${info.tags.join(', ')}`);
161
+ lines.push(`**原文链接**: ${article.url}`, '', '---', '', article.markdown);
162
+ await writeFileSafe(filePath, lines.join('\n'));
163
+ artifacts.push({ type: 'markdown', path: filePath });
164
+ }
165
+ return artifacts;
166
+ }
167
+ //# sourceMappingURL=juejin-article.extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"juejin-article.extractor.js","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAA;AAElC,OAAO,EAAE,cAAc,EAAE,MAAM,qCAAqC,CAAA;AACpE,OAAO,EAAE,YAAY,EAAE,MAAM,kCAAkC,CAAA;AAG/D,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACvE,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAE3E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,cAAc,GAA2B;IAC7C,OAAO,EAAE,oBAAoB;CAC9B,CAAA;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,KAAK;IACL,MAAM,OAAO,GACX,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE;QACpE,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE;YAChF,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAEnC,KAAK;IACL,MAAM,QAAQ,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAA;IACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAErC,OAAO;IACP,MAAM,MAAM,GACV,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE;QACtD,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE;YAC9C,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAA;IAC1B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAExC,OAAO;IACP,MAAM,SAAS,GACb,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE;QAC9D,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;YAC5E,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE;gBACxE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE;oBACpD,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAA;IAEjC,IAAI,WAAW,GAAG,EAAE,CAAA;IACpB,IAAI,WAAW,GAAG,EAAE,CAAA;IAEpB,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;QACrB,OAAO;QACP,MAAM,WAAW,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1C,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;QACnC,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;QAEpC,6CAA6C;QAC7C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAChC,MAAM,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAA;YAClD,IAAI,WAAW,CAAC,MAAM,EAAE,CAAC;gBACvB,MAAM,SAAS,GAAG,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;gBACxD,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;gBAC1C,WAAW,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;gBACrC,WAAW,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YACxC,CAAC;QACH,CAAC;IACH,CAAC;IAED,0CAA0C;IAC1C,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAA;IACxB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAA;IACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;QAC/B,IAAI,GAAG;YAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC,CAAC,CAAA;IAEF,MAAM;IACN,MAAM,MAAM,GACV,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE;QAClE,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAA;IAChC,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAEtC,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,SAAS,EAAE,CAAA;AAClF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,UAAsB,EACtB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,cAAc,CAAC,CAAA;IACrD,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAElC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAA;IAC3B,MAAM,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAA;IAEpC,cAAc;IACd,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,OAAO,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,CAAC;IAED,mBAAmB;IACnB,MAAM,QAAQ,GAAa,EAAE,CAAA;IAC7B,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAC1C,IAAI,CAAC,IAAI,CAAC,WAAW;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAE/C,MAAM,QAAQ,GAA4B,EAAE,CAAA;IAC5C,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAA;IAC/C,IAAI,IAAI,CAAC,SAAS;QAAE,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAA;IAEvD,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,QAAQ;QAClB,GAAG;QACH,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;QAC1C,OAAO,EAAE,IAAI;QACb,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1C,QAAQ;QACR,SAAS,EAAE,eAAe,EAAE;QAC5B,QAAQ;KACT,CAAA;IAED,IAAI,SAA2B,CAAA;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,CAAA;IACpE,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAqB,EACrB,IAAuB,EACvB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,SAAS,GAAqB,EAAE,CAAA;IACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAA;IAC/C,MAAM,SAAS,CAAC,SAAS,CAAC,CAAA;IAE1B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO;QAC9D,CAAC,CAAC,GAAG,SAAS,IAAI,SAAS,OAAO;QAClC,CAAC,CAAC,IAAI,CAAA;IAER,IAAI,YAAY,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAA;QAC9C,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;QAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,YAAY,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,YAAY,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI;YACzC,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,UAAU,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;YAClC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,YAAY;YACvB,UAAU,EAAE,OAAO,CAAC,SAAS;SAC9B,CAAC,CAAA;QACF,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,KAAK,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,CAAC,KAAK,OAAO,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,CAAA;QACxC,IAAI,OAAO,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,IAAI,OAAO,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;QACvE,IAAI,IAAI,CAAC,SAAS;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QAC5D,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QACnE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Juejin 平台相关类型
3
+ */
4
+ export interface JuejinArticleInfo {
5
+ title: string;
6
+ author: string;
7
+ publishTime: string;
8
+ contentHtml: string;
9
+ contentText: string;
10
+ tags: string[];
11
+ viewCount: string;
12
+ }
13
+ //# sourceMappingURL=juejin.types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"juejin.types.d.ts","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin.types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,IAAI,EAAE,MAAM,EAAE,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Juejin 平台相关类型
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=juejin.types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"juejin.types.js","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin.types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * WeChat 专辑提取器
3
+ */
4
+ import { HttpClient } from '../../fetcher/http-client.js';
5
+ import type { AlbumArticleEntry, ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
6
+ import type { Result } from '../../core/result.js';
7
+ import type { WechatAlbumInfo } from './wechat.types.js';
8
+ /**
9
+ * 解析专辑 URL 参数
10
+ */
11
+ export declare function parseAlbumInfo(albumUrl: string): WechatAlbumInfo | null;
12
+ /**
13
+ * 获取专辑中的文章列表(分页)
14
+ */
15
+ export declare function fetchAlbumArticleList(albumUrl: string, httpClient: HttpClient, maxArticles?: number): Promise<Result<AlbumArticleEntry[]>>;
16
+ /**
17
+ * 批量下载专辑文章
18
+ */
19
+ export declare function fetchAlbumArticles(albumUrl: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[], maxArticles?: number): Promise<Result<{
20
+ articles: ArticleDraft[];
21
+ artifacts: OutputArtifact[];
22
+ summaryFile?: string;
23
+ failedCount?: number;
24
+ }>>;
25
+ //# sourceMappingURL=wechat-album.extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wechat-album.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat-album.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AACzD,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACxG,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAGlD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AAUxD;;GAEG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAevE;AAED;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,UAAU,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAC,CA2FtC;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,MAAM,CAAC;IAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAqExH"}