wespy-ts 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -0
- package/dist/cli/main.d.ts +7 -0
- package/dist/cli/main.d.ts.map +1 -0
- package/dist/cli/main.js +312 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/converter/html-to-markdown.d.ts +9 -0
- package/dist/converter/html-to-markdown.d.ts.map +1 -0
- package/dist/converter/html-to-markdown.js +171 -0
- package/dist/converter/html-to-markdown.js.map +1 -0
- package/dist/converter/sanitize-html.d.ts +12 -0
- package/dist/converter/sanitize-html.d.ts.map +1 -0
- package/dist/converter/sanitize-html.js +22 -0
- package/dist/converter/sanitize-html.js.map +1 -0
- package/dist/core/errors.d.ts +17 -0
- package/dist/core/errors.d.ts.map +1 -0
- package/dist/core/errors.js +36 -0
- package/dist/core/errors.js.map +1 -0
- package/dist/core/result.d.ts +26 -0
- package/dist/core/result.d.ts.map +1 -0
- package/dist/core/result.js +26 -0
- package/dist/core/result.js.map +1 -0
- package/dist/core/types.d.ts +156 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +29 -0
- package/dist/core/types.js.map +1 -0
- package/dist/fetcher/http-client.d.ts +31 -0
- package/dist/fetcher/http-client.d.ts.map +1 -0
- package/dist/fetcher/http-client.js +124 -0
- package/dist/fetcher/http-client.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/dist/platforms/detector.d.ts +15 -0
- package/dist/platforms/detector.d.ts.map +1 -0
- package/dist/platforms/detector.js +30 -0
- package/dist/platforms/detector.js.map +1 -0
- package/dist/platforms/generic/generic-article.extractor.d.ts +25 -0
- package/dist/platforms/generic/generic-article.extractor.d.ts.map +1 -0
- package/dist/platforms/generic/generic-article.extractor.js +171 -0
- package/dist/platforms/generic/generic-article.extractor.js.map +1 -0
- package/dist/platforms/juejin/juejin-article.extractor.d.ts +20 -0
- package/dist/platforms/juejin/juejin-article.extractor.d.ts.map +1 -0
- package/dist/platforms/juejin/juejin-article.extractor.js +167 -0
- package/dist/platforms/juejin/juejin-article.extractor.js.map +1 -0
- package/dist/platforms/juejin/juejin.types.d.ts +13 -0
- package/dist/platforms/juejin/juejin.types.d.ts.map +1 -0
- package/dist/platforms/juejin/juejin.types.js +5 -0
- package/dist/platforms/juejin/juejin.types.js.map +1 -0
- package/dist/platforms/wechat/wechat-album.extractor.d.ts +25 -0
- package/dist/platforms/wechat/wechat-album.extractor.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat-album.extractor.js +190 -0
- package/dist/platforms/wechat/wechat-album.extractor.js.map +1 -0
- package/dist/platforms/wechat/wechat-article.extractor.d.ts +20 -0
- package/dist/platforms/wechat/wechat-article.extractor.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat-article.extractor.js +132 -0
- package/dist/platforms/wechat/wechat-article.extractor.js.map +1 -0
- package/dist/platforms/wechat/wechat.types.d.ts +17 -0
- package/dist/platforms/wechat/wechat.types.d.ts.map +1 -0
- package/dist/platforms/wechat/wechat.types.js +5 -0
- package/dist/platforms/wechat/wechat.types.js.map +1 -0
- package/dist/sdk/fetch-album-list.d.ts +10 -0
- package/dist/sdk/fetch-album-list.d.ts.map +1 -0
- package/dist/sdk/fetch-album-list.js +31 -0
- package/dist/sdk/fetch-album-list.js.map +1 -0
- package/dist/sdk/fetch-album.d.ts +24 -0
- package/dist/sdk/fetch-album.d.ts.map +1 -0
- package/dist/sdk/fetch-album.js +67 -0
- package/dist/sdk/fetch-album.js.map +1 -0
- package/dist/sdk/fetch-article.d.ts +24 -0
- package/dist/sdk/fetch-article.d.ts.map +1 -0
- package/dist/sdk/fetch-article.js +111 -0
- package/dist/sdk/fetch-article.js.map +1 -0
- package/dist/utils/fs.d.ts +16 -0
- package/dist/utils/fs.d.ts.map +1 -0
- package/dist/utils/fs.js +26 -0
- package/dist/utils/fs.js.map +1 -0
- package/dist/utils/text.d.ts +20 -0
- package/dist/utils/text.d.ts.map +1 -0
- package/dist/utils/text.js +96 -0
- package/dist/utils/text.js.map +1 -0
- package/dist/utils/url.d.ts +22 -0
- package/dist/utils/url.d.ts.map +1 -0
- package/dist/utils/url.js +63 -0
- package/dist/utils/url.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL 平台检测器
|
|
3
|
+
*/
|
|
4
|
+
import { detectPlatform } from '../utils/url.js';
|
|
5
|
+
export { detectPlatform };
|
|
6
|
+
export function isWechatUrl(url) {
|
|
7
|
+
const p = detectPlatform(url);
|
|
8
|
+
return p === 'wechat' || p === 'wechat-album';
|
|
9
|
+
}
|
|
10
|
+
export function isWechatAlbumUrl(url) {
|
|
11
|
+
return detectPlatform(url) === 'wechat-album';
|
|
12
|
+
}
|
|
13
|
+
export function isJuejinUrl(url) {
|
|
14
|
+
return detectPlatform(url) === 'juejin';
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* 检测 URL 是否有效(可被 URL 解析器解析)
|
|
18
|
+
*/
|
|
19
|
+
export function isValidUrl(url) {
|
|
20
|
+
return detectPlatform(url) !== null;
|
|
21
|
+
}
|
|
22
|
+
export function getPlatformLabel(platform) {
|
|
23
|
+
switch (platform) {
|
|
24
|
+
case 'wechat': return '微信公众号';
|
|
25
|
+
case 'wechat-album': return '微信专辑';
|
|
26
|
+
case 'juejin': return '掘金';
|
|
27
|
+
case 'generic': return '通用网页';
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.js","sourceRoot":"","sources":["../../src/platforms/detector.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAA;AAGhD,OAAO,EAAE,cAAc,EAAE,CAAA;AAEzB,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,MAAM,CAAC,GAAG,cAAc,CAAC,GAAG,CAAC,CAAA;IAC7B,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,cAAc,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAW;IAC1C,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,cAAc,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,QAAQ,CAAA;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,OAAO,cAAc,CAAC,GAAG,CAAC,KAAK,IAAI,CAAA;AACrC,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,QAA2B;IAC1D,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ,CAAC,CAAC,OAAO,OAAO,CAAA;QAC7B,KAAK,cAAc,CAAC,CAAC,OAAO,MAAM,CAAA;QAClC,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAA;QAC1B,KAAK,SAAS,CAAC,CAAC,OAAO,MAAM,CAAA;IAC/B,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 通用网页文章提取器
|
|
3
|
+
*/
|
|
4
|
+
import { HttpClient } from '../../fetcher/http-client.js';
|
|
5
|
+
import type { ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
|
|
6
|
+
import type { Result } from '../../core/result.js';
|
|
7
|
+
/**
|
|
8
|
+
* 从 HTML 中提取通用网页文章信息
|
|
9
|
+
* 返回空字符串表示未找到对应字段,不注入假数据
|
|
10
|
+
*/
|
|
11
|
+
export declare function extractGenericInfo(html: string): {
|
|
12
|
+
title: string;
|
|
13
|
+
author: string;
|
|
14
|
+
publishTime: string;
|
|
15
|
+
contentHtml: string;
|
|
16
|
+
contentText: string;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* 获取通用网页文章
|
|
20
|
+
*/
|
|
21
|
+
export declare function fetchGenericArticle(url: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[]): Promise<Result<{
|
|
22
|
+
article: ArticleDraft;
|
|
23
|
+
artifacts: OutputArtifact[];
|
|
24
|
+
}>>;
|
|
25
|
+
//# sourceMappingURL=generic-article.extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-article.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/generic/generic-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AAEzD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAOlD;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG;IAChD,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;CACpB,CAiFA;AAED;;GAEG;AACH,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,GACtB,OAAO,CAAC,MAAM,CAAC;IAAE,OAAO,EAAE,YAAY,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAA;CAAE,CAAC,CAAC,CAqCzE"}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 通用网页文章提取器
|
|
3
|
+
*/
|
|
4
|
+
import * as cheerio from 'cheerio';
|
|
5
|
+
import { htmlToMarkdown } from '../../converter/html-to-markdown.js';
|
|
6
|
+
import { ok, err } from '../../core/result.js';
|
|
7
|
+
import { fileSystemError } from '../../core/errors.js';
|
|
8
|
+
import { sanitizeFilename, formatLocalTime } from '../../utils/text.js';
|
|
9
|
+
import { writeFileSafe, writeJsonSafe, ensureDir } from '../../utils/fs.js';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
/**
|
|
12
|
+
* 从 HTML 中提取通用网页文章信息
|
|
13
|
+
* 返回空字符串表示未找到对应字段,不注入假数据
|
|
14
|
+
*/
|
|
15
|
+
export function extractGenericInfo(html) {
|
|
16
|
+
const $ = cheerio.load(html);
|
|
17
|
+
// 标题 - 尝试多种方式
|
|
18
|
+
let title = '';
|
|
19
|
+
const titleSources = [
|
|
20
|
+
$('title').first(),
|
|
21
|
+
$('h1').first(),
|
|
22
|
+
$('h2').first(),
|
|
23
|
+
$('meta[property="og:title"]').first(),
|
|
24
|
+
];
|
|
25
|
+
for (const el of titleSources) {
|
|
26
|
+
if (el.length) {
|
|
27
|
+
title = el.attr('content') ?? el.text().trim();
|
|
28
|
+
if (title)
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// 作者(大小写不敏感匹配 class*="author",与 Python re.I 一致)
|
|
33
|
+
let author = '';
|
|
34
|
+
const authorByClass = $('span, div').filter((_, el) => {
|
|
35
|
+
const cls = $(el).attr('class') ?? '';
|
|
36
|
+
return /author/i.test(cls);
|
|
37
|
+
}).first();
|
|
38
|
+
const authorSources = [
|
|
39
|
+
$('meta[name="author"]').first(),
|
|
40
|
+
authorByClass,
|
|
41
|
+
$('#js_name').first(),
|
|
42
|
+
];
|
|
43
|
+
for (const el of authorSources) {
|
|
44
|
+
if (el.length) {
|
|
45
|
+
author = el.attr('content') ?? el.text().trim();
|
|
46
|
+
if (author)
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// 发布时间
|
|
51
|
+
let publishTime = '';
|
|
52
|
+
const timeSources = [
|
|
53
|
+
$('time').first(),
|
|
54
|
+
$('span[class*="time"]').first(),
|
|
55
|
+
$('span[class*="date"]').first(),
|
|
56
|
+
$('meta[property="article:published_time"]').first(),
|
|
57
|
+
];
|
|
58
|
+
for (const el of timeSources) {
|
|
59
|
+
if (el.length) {
|
|
60
|
+
publishTime = el.attr('content') ?? el.text().trim();
|
|
61
|
+
if (publishTime)
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// 内容区域
|
|
66
|
+
const contentSelectors = [
|
|
67
|
+
'article',
|
|
68
|
+
'.article-content',
|
|
69
|
+
'.content',
|
|
70
|
+
'.post-content',
|
|
71
|
+
'.entry-content',
|
|
72
|
+
'#content',
|
|
73
|
+
'.main-content',
|
|
74
|
+
'main',
|
|
75
|
+
];
|
|
76
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
77
|
+
let contentEl = null;
|
|
78
|
+
for (const selector of contentSelectors) {
|
|
79
|
+
const el = $(selector).first();
|
|
80
|
+
if (el.length) {
|
|
81
|
+
contentEl = el;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
if (!contentEl) {
|
|
86
|
+
contentEl = $('body').first();
|
|
87
|
+
}
|
|
88
|
+
const contentHtml = contentEl?.html() ?? '';
|
|
89
|
+
const contentText = contentEl?.text().trim() ?? '';
|
|
90
|
+
return { title, author, publishTime, contentHtml, contentText };
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* 获取通用网页文章
|
|
94
|
+
*/
|
|
95
|
+
export async function fetchGenericArticle(url, httpClient, outputDir, formats) {
|
|
96
|
+
const res = await httpClient.get(url);
|
|
97
|
+
if (!res.ok)
|
|
98
|
+
return err(res.error);
|
|
99
|
+
const html = res.value.body;
|
|
100
|
+
const info = extractGenericInfo(html);
|
|
101
|
+
// 通用网页不强制要求标题,但记录为 warning
|
|
102
|
+
const warnings = [];
|
|
103
|
+
if (!info.title)
|
|
104
|
+
warnings.push('未找到标题');
|
|
105
|
+
if (!info.author)
|
|
106
|
+
warnings.push('未找到作者信息');
|
|
107
|
+
if (!info.publishTime)
|
|
108
|
+
warnings.push('未找到发布时间');
|
|
109
|
+
const article = {
|
|
110
|
+
platform: 'generic',
|
|
111
|
+
url,
|
|
112
|
+
title: info.title || '(无标题)',
|
|
113
|
+
author: info.author || undefined,
|
|
114
|
+
publishTime: info.publishTime || undefined,
|
|
115
|
+
rawHtml: html,
|
|
116
|
+
contentHtml: info.contentHtml,
|
|
117
|
+
contentText: info.contentText,
|
|
118
|
+
markdown: htmlToMarkdown(info.contentHtml),
|
|
119
|
+
metadata: {},
|
|
120
|
+
fetchedAt: formatLocalTime(),
|
|
121
|
+
warnings,
|
|
122
|
+
};
|
|
123
|
+
let artifacts;
|
|
124
|
+
try {
|
|
125
|
+
artifacts = await saveArtifacts(article, outputDir, formats);
|
|
126
|
+
}
|
|
127
|
+
catch (e) {
|
|
128
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
129
|
+
return err(fileSystemError(message, { outputDir, url }));
|
|
130
|
+
}
|
|
131
|
+
return ok({ article, artifacts, warnings });
|
|
132
|
+
}
|
|
133
|
+
async function saveArtifacts(article, outputDir, formats) {
|
|
134
|
+
const artifacts = [];
|
|
135
|
+
const safeTitle = sanitizeFilename(article.title);
|
|
136
|
+
const timestamp = Math.floor(Date.now() / 1000);
|
|
137
|
+
await ensureDir(outputDir);
|
|
138
|
+
const htmlFileName = formats.includes('html') && article.rawHtml
|
|
139
|
+
? `${safeTitle}_${timestamp}.html`
|
|
140
|
+
: null;
|
|
141
|
+
if (htmlFileName && article.rawHtml) {
|
|
142
|
+
const filePath = join(outputDir, htmlFileName);
|
|
143
|
+
await writeFileSafe(filePath, article.rawHtml);
|
|
144
|
+
artifacts.push({ type: 'html', path: filePath });
|
|
145
|
+
}
|
|
146
|
+
if (formats.includes('json')) {
|
|
147
|
+
const filePath = join(outputDir, `${safeTitle}_${timestamp}_info.json`);
|
|
148
|
+
await writeJsonSafe(filePath, {
|
|
149
|
+
title: article.title,
|
|
150
|
+
author: article.author ?? null,
|
|
151
|
+
publish_time: article.publishTime ?? null,
|
|
152
|
+
url: article.url,
|
|
153
|
+
html_file: htmlFileName,
|
|
154
|
+
fetch_time: article.fetchedAt,
|
|
155
|
+
});
|
|
156
|
+
artifacts.push({ type: 'json', path: filePath });
|
|
157
|
+
}
|
|
158
|
+
if (formats.includes('markdown') && article.markdown) {
|
|
159
|
+
const filePath = join(outputDir, `${safeTitle}_${timestamp}.md`);
|
|
160
|
+
const lines = [`# ${article.title}`, ''];
|
|
161
|
+
if (article.author)
|
|
162
|
+
lines.push(`**作者**: ${article.author}`);
|
|
163
|
+
if (article.publishTime)
|
|
164
|
+
lines.push(`**发布时间**: ${article.publishTime}`);
|
|
165
|
+
lines.push(`**原文链接**: ${article.url}`, '', '---', '', article.markdown);
|
|
166
|
+
await writeFileSafe(filePath, lines.join('\n'));
|
|
167
|
+
artifacts.push({ type: 'markdown', path: filePath });
|
|
168
|
+
}
|
|
169
|
+
return artifacts;
|
|
170
|
+
}
|
|
171
|
+
//# sourceMappingURL=generic-article.extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-article.extractor.js","sourceRoot":"","sources":["../../../src/platforms/generic/generic-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAA;AAElC,OAAO,EAAE,cAAc,EAAE,MAAM,qCAAqC,CAAA;AAGpE,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACtD,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAC3E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAO7C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,cAAc;IACd,IAAI,KAAK,GAAG,EAAE,CAAA;IACd,MAAM,YAAY,GAAG;QACnB,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE;QAClB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE;QACf,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE;QACf,CAAC,CAAC,2BAA2B,CAAC,CAAC,KAAK,EAAE;KACvC,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;QAC9B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YAC9C,IAAI,KAAK;gBAAE,MAAK;QAClB,CAAC;IACH,CAAC;IAED,gDAAgD;IAChD,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,MAAM,aAAa,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpD,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QACrC,OAAO,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAC5B,CAAC,CAAC,CAAC,KAAK,EAAE,CAAA;IACV,MAAM,aAAa,GAAG;QACpB,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,aAAa;QACb,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE;KACtB,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;QAC/B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,MAAM,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YAC/C,IAAI,MAAM;gBAAE,MAAK;QACnB,CAAC;IACH,CAAC;IAED,OAAO;IACP,IAAI,WAAW,GAAG,EAAE,CAAA;IACpB,MAAM,WAAW,GAAG;QAClB,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE;QACjB,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;QAChC,CAAC,CAAC,yCAAyC,CAAC,CAAC,KAAK,EAAE;KACrD,CAAA;IACD,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,WAAW,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YACpD,IAAI,WAAW;gBAAE,MAAK;QACxB,CAAC;IACH,CAAC;IAED,OAAO;IACP,MAAM,gBAAgB,GAAG;QACvB,SAAS;QACT,kBAAkB;QAClB,UAAU;QACV,eAAe;QACf,gBAAgB;QAChB,UAAU;QACV,eAAe;QACf,MAAM;KACP,CAAA;IAED,8DAA8D;IAC9D,IAAI,SAAS,GAAgC,IAAI,CAAA;IACjD,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAA;QAC9B,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,SAAS,GAAG,EAAE,CAAA;YACd,MAAK;QACP,CAAC;IACH,CAAC;IAED,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,SAAS,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAA;IAC/B,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC3C,MAAM,WAAW,GAAG,SAAS,EAAE,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;IAElD,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,EAAE,CAAA;AACjE,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,GAAW,EACX,UAAsB,EACtB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACrC,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAElC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAA;IAC3B,MAAM,IAAI,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAErC,2BAA2B;IAC3B,MAAM,QAAQ,GAAa,EAAE,CAAA;IAC7B,IAAI,CAAC,IAAI,CAAC,KAAK;QAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAC1C,IAAI,CAAC,IAAI,CAAC,WAAW;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAE/C,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,SAAS;QACnB,GAAG;QACH,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,OAAO;QAC5B,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;QAC1C,OAAO,EAAE,IAAI;QACb,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1C,QAAQ,EAAE,EAAE;QACZ,SAAS,EAAE,eAAe,EAAE;QAC5B,QAAQ;KACT,CAAA;IAED,IAAI,SAA2B,CAAA;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,CAAA;IAC9D,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAqB,EACrB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,SAAS,GAAqB,EAAE,CAAA;IACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAA;IAC/C,MAAM,SAAS,CAAC,SAAS,CAAC,CAAA;IAE1B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO;QAC9D,CAAC,CAAC,GAAG,SAAS,IAAI,SAAS,OAAO;QAClC,CAAC,CAAC,IAAI,CAAA;IAER,IAAI,YAAY,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAA;QAC9C,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;QAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,YAAY,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,YAAY,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI;YACzC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,YAAY;YACvB,UAAU,EAAE,OAAO,CAAC,SAAS;SAC9B,CAAC,CAAA;QACF,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,KAAK,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,CAAC,KAAK,OAAO,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,CAAA;QACxC,IAAI,OAAO,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,IAAI,OAAO,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;QACvE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Juejin 文章提取器
|
|
3
|
+
*/
|
|
4
|
+
import { HttpClient } from '../../fetcher/http-client.js';
|
|
5
|
+
import type { ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
|
|
6
|
+
import type { Result } from '../../core/result.js';
|
|
7
|
+
import type { JuejinArticleInfo } from './juejin.types.js';
|
|
8
|
+
/**
|
|
9
|
+
* 从 HTML 中提取掘金文章信息
|
|
10
|
+
* 返回空字符串表示未找到对应字段,不注入假数据
|
|
11
|
+
*/
|
|
12
|
+
export declare function extractJuejinInfo(html: string): JuejinArticleInfo;
|
|
13
|
+
/**
|
|
14
|
+
* 获取单篇掘金文章
|
|
15
|
+
*/
|
|
16
|
+
export declare function fetchJuejinArticle(url: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[]): Promise<Result<{
|
|
17
|
+
article: ArticleDraft;
|
|
18
|
+
artifacts: OutputArtifact[];
|
|
19
|
+
}>>;
|
|
20
|
+
//# sourceMappingURL=juejin-article.extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"juejin-article.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AAGzD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAKlD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAA;AAO1D;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,CAoEjE;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,GACtB,OAAO,CAAC,MAAM,CAAC;IAAE,OAAO,EAAE,YAAY,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAA;CAAE,CAAC,CAAC,CA6CzE"}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Juejin 文章提取器
|
|
3
|
+
*/
|
|
4
|
+
import * as cheerio from 'cheerio';
|
|
5
|
+
import { htmlToMarkdown } from '../../converter/html-to-markdown.js';
|
|
6
|
+
import { sanitizeHtml } from '../../converter/sanitize-html.js';
|
|
7
|
+
import { ok, err } from '../../core/result.js';
|
|
8
|
+
import { fileSystemError, parseEmptyError } from '../../core/errors.js';
|
|
9
|
+
import { sanitizeFilename, formatLocalTime } from '../../utils/text.js';
|
|
10
|
+
import { writeFileSafe, writeJsonSafe, ensureDir } from '../../utils/fs.js';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
const JUEJIN_HEADERS = {
|
|
13
|
+
Referer: 'https://juejin.cn/',
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* 从 HTML 中提取掘金文章信息
|
|
17
|
+
* 返回空字符串表示未找到对应字段,不注入假数据
|
|
18
|
+
*/
|
|
19
|
+
export function extractJuejinInfo(html) {
|
|
20
|
+
const $ = cheerio.load(html);
|
|
21
|
+
// 标题
|
|
22
|
+
const titleEl = $('h1.article-title').first().length ? $('h1.article-title').first()
|
|
23
|
+
: $('h1.article-title-text').first().length ? $('h1.article-title-text').first()
|
|
24
|
+
: $('h1').first();
|
|
25
|
+
const title = titleEl.text().trim();
|
|
26
|
+
// 作者
|
|
27
|
+
const authorEl = $('span.name').first();
|
|
28
|
+
const author = authorEl.text().trim();
|
|
29
|
+
// 发布时间
|
|
30
|
+
const timeEl = $('span.time').first().length ? $('span.time').first()
|
|
31
|
+
: $('time').first().length ? $('time').first()
|
|
32
|
+
: $('span.date').first();
|
|
33
|
+
const publishTime = timeEl.text().trim();
|
|
34
|
+
// 内容区域
|
|
35
|
+
const contentEl = $('#article-root').first().length ? $('#article-root').first()
|
|
36
|
+
: $('div.article-content').first().length ? $('div.article-content').first()
|
|
37
|
+
: $('div.markdown-body').first().length ? $('div.markdown-body').first()
|
|
38
|
+
: $('article').first().length ? $('article').first()
|
|
39
|
+
: $('#article-content').first();
|
|
40
|
+
let contentHtml = '';
|
|
41
|
+
let contentText = '';
|
|
42
|
+
if (contentEl.length) {
|
|
43
|
+
// 清理内容
|
|
44
|
+
const cleanedHtml = sanitizeHtml(contentEl.html() ?? '');
|
|
45
|
+
const cleaned$ = cheerio.load(cleanedHtml);
|
|
46
|
+
contentHtml = cleaned$.html() ?? '';
|
|
47
|
+
contentText = cleaned$.text().trim();
|
|
48
|
+
// Fallback: 如果清理后内容为空,尝试从 #article-root 重新清理
|
|
49
|
+
if (!contentText) {
|
|
50
|
+
const root$ = cheerio.load(html);
|
|
51
|
+
const articleRoot = root$('#article-root').first();
|
|
52
|
+
if (articleRoot.length) {
|
|
53
|
+
const reCleaned = sanitizeHtml(articleRoot.html() ?? '');
|
|
54
|
+
const reCleaned$ = cheerio.load(reCleaned);
|
|
55
|
+
contentHtml = reCleaned$.html() ?? '';
|
|
56
|
+
contentText = reCleaned$.text().trim();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// 标签(Python 语义: a.tag 优先,有结果则忽略 span.tag)
|
|
61
|
+
const tags = [];
|
|
62
|
+
const aTags = $('a.tag');
|
|
63
|
+
const tagEls = aTags.length > 0 ? aTags : $('span.tag');
|
|
64
|
+
tagEls.each((_, el) => {
|
|
65
|
+
const tag = $(el).text().trim();
|
|
66
|
+
if (tag)
|
|
67
|
+
tags.push(tag);
|
|
68
|
+
});
|
|
69
|
+
// 阅读数
|
|
70
|
+
const viewEl = $('span.view-count').first().length ? $('span.view-count').first()
|
|
71
|
+
: $('span.read-count').first();
|
|
72
|
+
const viewCount = viewEl.text().trim();
|
|
73
|
+
return { title, author, publishTime, contentHtml, contentText, tags, viewCount };
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* 获取单篇掘金文章
|
|
77
|
+
*/
|
|
78
|
+
export async function fetchJuejinArticle(url, httpClient, outputDir, formats) {
|
|
79
|
+
const res = await httpClient.get(url, JUEJIN_HEADERS);
|
|
80
|
+
if (!res.ok)
|
|
81
|
+
return err(res.error);
|
|
82
|
+
const html = res.value.body;
|
|
83
|
+
const info = extractJuejinInfo(html);
|
|
84
|
+
// 标题为空 → 解析失败
|
|
85
|
+
if (!info.title) {
|
|
86
|
+
return err(parseEmptyError(url));
|
|
87
|
+
}
|
|
88
|
+
// 记录缺失字段为 warnings
|
|
89
|
+
const warnings = [];
|
|
90
|
+
if (!info.author)
|
|
91
|
+
warnings.push('未找到作者信息');
|
|
92
|
+
if (!info.publishTime)
|
|
93
|
+
warnings.push('未找到发布时间');
|
|
94
|
+
const metadata = {};
|
|
95
|
+
if (info.tags.length)
|
|
96
|
+
metadata.tags = info.tags;
|
|
97
|
+
if (info.viewCount)
|
|
98
|
+
metadata.viewCount = info.viewCount;
|
|
99
|
+
const article = {
|
|
100
|
+
platform: 'juejin',
|
|
101
|
+
url,
|
|
102
|
+
title: info.title,
|
|
103
|
+
author: info.author || undefined,
|
|
104
|
+
publishTime: info.publishTime || undefined,
|
|
105
|
+
rawHtml: html,
|
|
106
|
+
contentHtml: info.contentHtml,
|
|
107
|
+
contentText: info.contentText,
|
|
108
|
+
markdown: htmlToMarkdown(info.contentHtml),
|
|
109
|
+
metadata,
|
|
110
|
+
fetchedAt: formatLocalTime(),
|
|
111
|
+
warnings,
|
|
112
|
+
};
|
|
113
|
+
let artifacts;
|
|
114
|
+
try {
|
|
115
|
+
artifacts = await saveArtifacts(article, info, outputDir, formats);
|
|
116
|
+
}
|
|
117
|
+
catch (e) {
|
|
118
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
119
|
+
return err(fileSystemError(message, { outputDir, url }));
|
|
120
|
+
}
|
|
121
|
+
return ok({ article, artifacts, warnings });
|
|
122
|
+
}
|
|
123
|
+
async function saveArtifacts(article, info, outputDir, formats) {
|
|
124
|
+
const artifacts = [];
|
|
125
|
+
const safeTitle = sanitizeFilename(article.title);
|
|
126
|
+
const timestamp = Math.floor(Date.now() / 1000);
|
|
127
|
+
await ensureDir(outputDir);
|
|
128
|
+
const htmlFileName = formats.includes('html') && article.rawHtml
|
|
129
|
+
? `${safeTitle}_${timestamp}.html`
|
|
130
|
+
: null;
|
|
131
|
+
if (htmlFileName && article.rawHtml) {
|
|
132
|
+
const filePath = join(outputDir, htmlFileName);
|
|
133
|
+
await writeFileSafe(filePath, article.rawHtml);
|
|
134
|
+
artifacts.push({ type: 'html', path: filePath });
|
|
135
|
+
}
|
|
136
|
+
if (formats.includes('json')) {
|
|
137
|
+
const filePath = join(outputDir, `${safeTitle}_${timestamp}_info.json`);
|
|
138
|
+
await writeJsonSafe(filePath, {
|
|
139
|
+
title: article.title,
|
|
140
|
+
author: article.author ?? null,
|
|
141
|
+
publish_time: article.publishTime ?? null,
|
|
142
|
+
tags: info.tags,
|
|
143
|
+
view_count: info.viewCount || null,
|
|
144
|
+
url: article.url,
|
|
145
|
+
html_file: htmlFileName,
|
|
146
|
+
fetch_time: article.fetchedAt,
|
|
147
|
+
});
|
|
148
|
+
artifacts.push({ type: 'json', path: filePath });
|
|
149
|
+
}
|
|
150
|
+
if (formats.includes('markdown') && article.markdown) {
|
|
151
|
+
const filePath = join(outputDir, `${safeTitle}_${timestamp}.md`);
|
|
152
|
+
const lines = [`# ${article.title}`, ''];
|
|
153
|
+
if (article.author)
|
|
154
|
+
lines.push(`**作者**: ${article.author}`);
|
|
155
|
+
if (article.publishTime)
|
|
156
|
+
lines.push(`**发布时间**: ${article.publishTime}`);
|
|
157
|
+
if (info.viewCount)
|
|
158
|
+
lines.push(`**阅读量**: ${info.viewCount}`);
|
|
159
|
+
if (info.tags.length)
|
|
160
|
+
lines.push(`**标签**: ${info.tags.join(', ')}`);
|
|
161
|
+
lines.push(`**原文链接**: ${article.url}`, '', '---', '', article.markdown);
|
|
162
|
+
await writeFileSafe(filePath, lines.join('\n'));
|
|
163
|
+
artifacts.push({ type: 'markdown', path: filePath });
|
|
164
|
+
}
|
|
165
|
+
return artifacts;
|
|
166
|
+
}
|
|
167
|
+
//# sourceMappingURL=juejin-article.extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"juejin-article.extractor.js","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin-article.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,OAAO,MAAM,SAAS,CAAA;AAElC,OAAO,EAAE,cAAc,EAAE,MAAM,qCAAqC,CAAA;AACpE,OAAO,EAAE,YAAY,EAAE,MAAM,kCAAkC,CAAA;AAG/D,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAA;AAC9C,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACvE,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACvE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAE3E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,MAAM,cAAc,GAA2B;IAC7C,OAAO,EAAE,oBAAoB;CAC9B,CAAA;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAE5B,KAAK;IACL,MAAM,OAAO,GACX,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE;QACpE,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,KAAK,EAAE;YAChF,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;IACnB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAEnC,KAAK;IACL,MAAM,QAAQ,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAA;IACvC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAErC,OAAO;IACP,MAAM,MAAM,GACV,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE;QACtD,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE;YAC9C,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAA;IAC1B,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAExC,OAAO;IACP,MAAM,SAAS,GACb,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE;QAC9D,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE;YAC5E,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE;gBACxE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE;oBACpD,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAA;IAEjC,IAAI,WAAW,GAAG,EAAE,CAAA;IACpB,IAAI,WAAW,GAAG,EAAE,CAAA;IAEpB,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;QACrB,OAAO;QACP,MAAM,WAAW,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;QACxD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1C,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;QACnC,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;QAEpC,6CAA6C;QAC7C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAChC,MAAM,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAA;YAClD,IAAI,WAAW,CAAC,MAAM,EAAE,CAAC;gBACvB,MAAM,SAAS,GAAG,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;gBACxD,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;gBAC1C,WAAW,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,CAAA;gBACrC,WAAW,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;YACxC,CAAC;QACH,CAAC;IACH,CAAC;IAED,0CAA0C;IAC1C,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAA;IACxB,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAA;IACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QACpB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;QAC/B,IAAI,GAAG;YAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC,CAAC,CAAA;IAEF,MAAM;IACN,MAAM,MAAM,GACV,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE;QAClE,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAA;IAChC,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAA;IAEtC,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,SAAS,EAAE,CAAA;AAClF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,UAAsB,EACtB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,cAAc,CAAC,CAAA;IACrD,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,OAAO,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAElC,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAA;IAC3B,MAAM,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAA;IAEpC,cAAc;IACd,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAChB,OAAO,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAA;IAClC,CAAC;IAED,mBAAmB;IACnB,MAAM,QAAQ,GAAa,EAAE,CAAA;IAC7B,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAC1C,IAAI,CAAC,IAAI,CAAC,WAAW;QAAE,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;IAE/C,MAAM,QAAQ,GAA4B,EAAE,CAAA;IAC5C,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,QAAQ,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAA;IAC/C,IAAI,IAAI,CAAC,SAAS;QAAE,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAA;IAEvD,MAAM,OAAO,GAAiB;QAC5B,QAAQ,EAAE,QAAQ;QAClB,GAAG;QACH,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,SAAS;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,SAAS;QAC1C,OAAO,EAAE,IAAI;QACb,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC;QAC1C,QAAQ;QACR,SAAS,EAAE,eAAe,EAAE;QAC5B,QAAQ;KACT,CAAA;IAED,IAAI,SAA2B,CAAA;IAC/B,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,CAAA;IACpE,CAAC;IAAC,OAAO,CAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;QAC1D,OAAO,GAAG,CAAC,eAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;IAC1D,CAAC;IAED,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;AAC7C,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,OAAqB,EACrB,IAAuB,EACvB,SAAiB,EACjB,OAAuB;IAEvB,MAAM,SAAS,GAAqB,EAAE,CAAA;IACtC,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAA;IAC/C,MAAM,SAAS,CAAC,SAAS,CAAC,CAAA;IAE1B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO;QAC9D,CAAC,CAAC,GAAG,SAAS,IAAI,SAAS,OAAO;QAClC,CAAC,CAAC,IAAI,CAAA;IAER,IAAI,YAAY,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAA;QAC9C,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,CAAC,CAAA;QAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,YAAY,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE;YAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI;YAC9B,YAAY,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI;YACzC,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,UAAU,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;YAClC,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,SAAS,EAAE,YAAY;YACvB,UAAU,EAAE,OAAO,CAAC,SAAS;SAC9B,CAAC,CAAA;QACF,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAClD,CAAC;IAED,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,SAAS,IAAI,SAAS,KAAK,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,CAAC,KAAK,OAAO,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,CAAA;QACxC,IAAI,OAAO,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,IAAI,OAAO,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;QACvE,IAAI,IAAI,CAAC,SAAS;YAAE,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QAC5D,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QACnE,KAAK,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAA;QACvE,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IACtD,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Juejin 平台相关类型
|
|
3
|
+
*/
|
|
4
|
+
export interface JuejinArticleInfo {
|
|
5
|
+
title: string;
|
|
6
|
+
author: string;
|
|
7
|
+
publishTime: string;
|
|
8
|
+
contentHtml: string;
|
|
9
|
+
contentText: string;
|
|
10
|
+
tags: string[];
|
|
11
|
+
viewCount: string;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=juejin.types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"juejin.types.d.ts","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin.types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,IAAI,EAAE,MAAM,EAAE,CAAA;IACd,SAAS,EAAE,MAAM,CAAA;CAClB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"juejin.types.js","sourceRoot":"","sources":["../../../src/platforms/juejin/juejin.types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WeChat 专辑提取器
|
|
3
|
+
*/
|
|
4
|
+
import { HttpClient } from '../../fetcher/http-client.js';
|
|
5
|
+
import type { AlbumArticleEntry, ArticleDraft, OutputFormat, OutputArtifact } from '../../core/types.js';
|
|
6
|
+
import type { Result } from '../../core/result.js';
|
|
7
|
+
import type { WechatAlbumInfo } from './wechat.types.js';
|
|
8
|
+
/**
|
|
9
|
+
* 解析专辑 URL 参数
|
|
10
|
+
*/
|
|
11
|
+
export declare function parseAlbumInfo(albumUrl: string): WechatAlbumInfo | null;
|
|
12
|
+
/**
|
|
13
|
+
* 获取专辑中的文章列表(分页)
|
|
14
|
+
*/
|
|
15
|
+
export declare function fetchAlbumArticleList(albumUrl: string, httpClient: HttpClient, maxArticles?: number): Promise<Result<AlbumArticleEntry[]>>;
|
|
16
|
+
/**
|
|
17
|
+
* 批量下载专辑文章
|
|
18
|
+
*/
|
|
19
|
+
export declare function fetchAlbumArticles(albumUrl: string, httpClient: HttpClient, outputDir: string, formats: OutputFormat[], maxArticles?: number): Promise<Result<{
|
|
20
|
+
articles: ArticleDraft[];
|
|
21
|
+
artifacts: OutputArtifact[];
|
|
22
|
+
summaryFile?: string;
|
|
23
|
+
failedCount?: number;
|
|
24
|
+
}>>;
|
|
25
|
+
//# sourceMappingURL=wechat-album.extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wechat-album.extractor.d.ts","sourceRoot":"","sources":["../../../src/platforms/wechat/wechat-album.extractor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,8BAA8B,CAAA;AACzD,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AACxG,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAGlD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAA;AAUxD;;GAEG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI,CAevE;AAED;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,UAAU,EACtB,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAC,CA2FtC;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,UAAU,EACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,YAAY,EAAE,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,OAAO,CAAC,MAAM,CAAC;IAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;IAAC,SAAS,EAAE,cAAc,EAAE,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAqExH"}
|