czon 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/extractMetadataFromMarkdown.js +71 -82
- package/dist/build/pipeline.js +1 -0
- package/dist/process/processTranslations.js +5 -2
- package/dist/process/template.js +18 -2
- package/dist/ssg/ContentPage.js +1 -1
- package/dist/ssg/IndexPage.js +2 -1
- package/dist/ssg/resourceMap.js +10 -0
- package/package.json +1 -1
|
@@ -1,13 +1,59 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.extractMetadataFromMarkdown = extractMetadataFromMarkdown;
|
|
4
|
+
const metadata_1 = require("../metadata");
|
|
4
5
|
const openai_1 = require("../services/openai");
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
-
*
|
|
7
|
+
* AI Metadata 提取模块
|
|
8
|
+
*
|
|
9
|
+
* 优化策略说明:
|
|
10
|
+
* 1. 从 MetaData 全局状态读取已有 slug,不作为参数传递
|
|
11
|
+
* 2. 如果已有 slug,条件化 prompt - 完全不提及 slug 相关指令
|
|
12
|
+
* 3. trade-off: 优先提升 AI 质量(减少无关任务干扰),可能降低 context 缓存命中率
|
|
8
13
|
*/
|
|
9
14
|
async function extractMetadataFromMarkdown(filePath, content) {
|
|
10
|
-
const
|
|
15
|
+
const existingSlug = metadata_1.MetaData.files.find(f => f.path === filePath)?.metadata?.slug;
|
|
16
|
+
const hasExistingSlug = !!existingSlug;
|
|
17
|
+
const fields = [
|
|
18
|
+
'title: 文档的标题(简洁明了,不超过 30 个字)',
|
|
19
|
+
'tags: 关键词列表(3-8 个关键词,使用中文或英文)',
|
|
20
|
+
'description: 文档的简短描述,微摘要(用一句话概括本文核心价值,不超过 100 字符),用于 SEO meta description,社交卡片短描述',
|
|
21
|
+
'summary: 文档中型摘要(用一段话总结文章,包含关键论点和结论,控制在 300 字以内),用于 邮件推送内容,newsletter 介绍',
|
|
22
|
+
'inferred_date: 文档中隐含的创建日期(如果有的话,格式:YYYY-MM-DD,没有就留空字符串)',
|
|
23
|
+
'inferred_lang: 文档使用的语言代码(例如:zh-Hans 表示简体中文,en-US 表示美式英语)',
|
|
24
|
+
'key_points: 文章的关键要点列表(5-10 个要点,简洁明了)',
|
|
25
|
+
'audience: 目标读者描述(简短描述,50 字以内)',
|
|
26
|
+
'short_summary: 文档的超短摘要(用 2-3 句话概括文章主要内容,突出核心观点),用于文章列表页摘要,RSS feed 描述',
|
|
27
|
+
...(hasExistingSlug
|
|
28
|
+
? []
|
|
29
|
+
: ['slug: URL 友好别名(使用小写字母、数字和连字符,仅包含英文和数字)']),
|
|
30
|
+
];
|
|
31
|
+
const jsonFields = [
|
|
32
|
+
'{',
|
|
33
|
+
' "title": "文档标题",',
|
|
34
|
+
' "description": "用一句话概括本文核心价值,不超过 100 字符",',
|
|
35
|
+
' "summary": "中型摘要,用一段话总结文章,包含关键论点和结论",',
|
|
36
|
+
' "short_summary": "超短摘要,用 2-3 句话概括文章主要内容,突出核心观点",',
|
|
37
|
+
' "tags": ["关键词1", "关键词2", "关键词3"],',
|
|
38
|
+
' "inferred_date": "2023-01-01",',
|
|
39
|
+
' "inferred_lang": "zh-Hans",',
|
|
40
|
+
' "key_points": ["要点1", "要点2", "要点3"],',
|
|
41
|
+
...(hasExistingSlug ? [] : [' "slug": "URL 友好别名",']),
|
|
42
|
+
' "audience": "目标读者描述"',
|
|
43
|
+
'}',
|
|
44
|
+
];
|
|
45
|
+
const prompt = `请分析以下文档内容,提取以下信息并返回 JSON 格式:
|
|
46
|
+
|
|
47
|
+
文档内容:
|
|
48
|
+
"""
|
|
49
|
+
${content}
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
请提取:
|
|
53
|
+
${fields.join('\n')}
|
|
54
|
+
|
|
55
|
+
请严格按照以下 JSON 格式返回,不要包含任何其他文本:
|
|
56
|
+
${jsonFields.join('\n')}`;
|
|
11
57
|
const messages = [
|
|
12
58
|
{
|
|
13
59
|
role: 'system',
|
|
@@ -22,85 +68,28 @@ async function extractMetadataFromMarkdown(filePath, content) {
|
|
|
22
68
|
response_format: { type: 'json_object' },
|
|
23
69
|
task_id: `extract-metadata:${filePath}`,
|
|
24
70
|
});
|
|
25
|
-
const metadata =
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
71
|
+
const metadata = JSON.parse(response.choices[0].message.content);
|
|
72
|
+
const result = {
|
|
73
|
+
title: metadata.title?.trim() || '',
|
|
74
|
+
description: metadata.description?.trim() || '',
|
|
75
|
+
short_summary: metadata.short_summary?.trim() || '',
|
|
76
|
+
audience: metadata.audience?.trim() || '',
|
|
77
|
+
key_points: Array.isArray(metadata.key_points)
|
|
78
|
+
? metadata.key_points.map((point) => point.trim()).filter(Boolean)
|
|
79
|
+
: [],
|
|
80
|
+
summary: metadata.summary?.trim() || '',
|
|
81
|
+
slug: metadata.slug?.trim() || existingSlug || '',
|
|
82
|
+
tags: Array.isArray(metadata.tags)
|
|
83
|
+
? metadata.tags.map((tag) => tag.trim()).filter(Boolean)
|
|
84
|
+
: [],
|
|
85
|
+
inferred_date: metadata.inferred_date?.trim() || undefined,
|
|
86
|
+
inferred_lang: metadata.inferred_lang?.trim() || undefined,
|
|
87
|
+
tokens_used: {
|
|
88
|
+
prompt: response.usage.prompt_tokens,
|
|
89
|
+
completion: response.usage.completion_tokens,
|
|
90
|
+
total: response.usage.total_tokens,
|
|
91
|
+
},
|
|
31
92
|
};
|
|
32
|
-
return
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* 构建提取 metadata 的 prompt
|
|
36
|
-
*/
|
|
37
|
-
function buildMetadataPrompt(content) {
|
|
38
|
-
// 限制内容长度以避免 token 超限
|
|
39
|
-
const maxContentLength = Infinity; // 可根据需要调整长度限制
|
|
40
|
-
const truncatedContent = content.length > maxContentLength
|
|
41
|
-
? content.substring(0, maxContentLength) + '... [内容已截断]'
|
|
42
|
-
: content;
|
|
43
|
-
return `请分析以下文档内容,提取以下信息并返回 JSON 格式:
|
|
44
|
-
|
|
45
|
-
文档内容:
|
|
46
|
-
"""
|
|
47
|
-
${truncatedContent}
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
请提取:
|
|
51
|
-
1. title: 文档的标题(简洁明了,不超过 30 个字)
|
|
52
|
-
2. slug: URL 友好别名(使用小写字母、数字和连字符,仅包含英文和数字)
|
|
53
|
-
3. tags: 关键词列表(3-8 个关键词,使用中文或英文)
|
|
54
|
-
4. description: 文档的简短描述,微摘要(用一句话概括本文核心价值,不超过 100 字符),用于 SEO meta description,社交卡片短描述
|
|
55
|
-
5. summary: 文档中型摘要(用一段话总结文章,包含关键论点和结论,控制在 300 字以内),用于 邮件推送内容,newsletter 介绍
|
|
56
|
-
6. inferred_date: 文档中隐含的创建日期(如果有的话,格式:YYYY-MM-DD,没有就留空字符串)
|
|
57
|
-
7. inferred_lang: 文档使用的语言代码(例如:zh-Hans 表示简体中文,en-US 表示美式英语)
|
|
58
|
-
8. key_points: 文章的关键要点列表(5-10 个要点,简洁明了)
|
|
59
|
-
9. audience: 目标读者描述(简短描述,50 字以内)
|
|
60
|
-
10. short_summary: 文档的超短摘要(用 2-3 句话概括文章主要内容,突出核心观点),用于文章列表页摘要,RSS feed 描述
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
请严格按照以下 JSON 格式返回,不要包含任何其他文本:
|
|
64
|
-
{
|
|
65
|
-
"title": "文档标题",
|
|
66
|
-
"description": "用一句话概括本文核心价值,不超过 100 字符",
|
|
67
|
-
"summary": "中型摘要,用一段话总结文章,包含关键论点和结论",
|
|
68
|
-
"short_summary": "超短摘要,用 2-3 句话概括文章主要内容,突出核心观点",
|
|
69
|
-
"slug": "URL 友好别名",
|
|
70
|
-
"tags": ["关键词1", "关键词2", "关键词3"],
|
|
71
|
-
"inferred_date": "2023-01-01",
|
|
72
|
-
"inferred_lang": "zh-Hans",
|
|
73
|
-
"key_points": ["要点1", "要点2", "要点3"],
|
|
74
|
-
"audience": "目标读者描述"
|
|
75
|
-
}`;
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* 解析 AI 返回的 metadata
|
|
79
|
-
*/
|
|
80
|
-
function parseMetadataResponse(responseContent) {
|
|
81
|
-
try {
|
|
82
|
-
const metadata = JSON.parse(responseContent);
|
|
83
|
-
// 验证和清理数据
|
|
84
|
-
return {
|
|
85
|
-
title: metadata.title?.trim() || '未命名文档',
|
|
86
|
-
description: metadata.description?.trim() || '',
|
|
87
|
-
short_summary: metadata.short_summary?.trim() || '',
|
|
88
|
-
audience: metadata.audience?.trim() || '',
|
|
89
|
-
key_points: Array.isArray(metadata.key_points)
|
|
90
|
-
? metadata.key_points.map((point) => point.trim()).filter(Boolean)
|
|
91
|
-
: [],
|
|
92
|
-
summary: metadata.summary?.trim() || '',
|
|
93
|
-
slug: metadata.slug?.trim() || '',
|
|
94
|
-
tags: Array.isArray(metadata.tags)
|
|
95
|
-
? metadata.tags.map((tag) => tag.trim()).filter(Boolean)
|
|
96
|
-
: [],
|
|
97
|
-
inferred_date: metadata.inferred_date?.trim() || undefined,
|
|
98
|
-
inferred_lang: metadata.inferred_lang?.trim() || 'zh-Hans',
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
catch (error) {
|
|
102
|
-
console.error('❌ Failed to parse AI response:', error, 'Response:', responseContent);
|
|
103
|
-
throw error;
|
|
104
|
-
}
|
|
93
|
+
return result;
|
|
105
94
|
}
|
|
106
95
|
//# sourceMappingURL=extractMetadataFromMarkdown.js.map
|
package/dist/build/pipeline.js
CHANGED
|
@@ -102,6 +102,7 @@ async function buildPipeline(options) {
|
|
|
102
102
|
await (0, processTranslations_1.processTranslations)();
|
|
103
103
|
// 渲染模板
|
|
104
104
|
await (0, template_1.spiderStaticSiteGenerator)();
|
|
105
|
+
await (0, template_1.downloadCDNResources)();
|
|
105
106
|
// 生成 robots.txt
|
|
106
107
|
await (0, robots_1.generateRobotsTxt)();
|
|
107
108
|
// 生成 sitemap.xml
|
|
@@ -36,6 +36,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
36
|
exports.processTranslations = processTranslations;
|
|
37
37
|
const promises_1 = require("fs/promises");
|
|
38
38
|
const path_1 = __importStar(require("path"));
|
|
39
|
+
const translateMarkdown_1 = require("../ai/translateMarkdown");
|
|
39
40
|
const languages_1 = require("../languages");
|
|
40
41
|
const metadata_1 = require("../metadata");
|
|
41
42
|
const paths_1 = require("../paths");
|
|
@@ -94,9 +95,11 @@ async function processTranslations() {
|
|
|
94
95
|
console.info(`ℹ️ Content unchanged for ${file.path}, skipping translation.`);
|
|
95
96
|
return;
|
|
96
97
|
}
|
|
97
|
-
const
|
|
98
|
+
const translatedResponse = await (0, translateMarkdown_1.translateMarkdown)(sourcePath, content, lang);
|
|
99
|
+
const translatedContent = translatedResponse.choices?.[0].message.content?.trim() || '';
|
|
98
100
|
const translationMeta = ((_a = (file.translations ?? (file.translations = {})))[lang] ?? (_a[lang] = {}));
|
|
99
|
-
translationMeta.content_length = translatedContent.length;
|
|
101
|
+
translationMeta.content_length = translatedContent.length; // 记录翻译后内容长度
|
|
102
|
+
translationMeta.token_used = translatedResponse.usage; // 记录 token 使用情况
|
|
100
103
|
await (0, writeFile_1.writeFile)(targetPath, translatedContent);
|
|
101
104
|
// 存储已增强内容的哈希值
|
|
102
105
|
file.nativeMarkdownHash = hash;
|
package/dist/process/template.js
CHANGED
|
@@ -33,13 +33,14 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.spiderStaticSiteGenerator = void 0;
|
|
36
|
+
exports.downloadCDNResources = exports.spiderStaticSiteGenerator = void 0;
|
|
37
37
|
const fs = __importStar(require("fs/promises"));
|
|
38
38
|
const path = __importStar(require("path"));
|
|
39
|
+
const sitemap_1 = require("../build/sitemap");
|
|
39
40
|
const metadata_1 = require("../metadata");
|
|
40
41
|
const paths_1 = require("../paths");
|
|
41
|
-
const sitemap_1 = require("../build/sitemap");
|
|
42
42
|
const ssg_1 = require("../ssg");
|
|
43
|
+
const resourceMap_1 = require("../ssg/resourceMap");
|
|
43
44
|
const convertMarkdownToHtml_1 = require("../utils/convertMarkdownToHtml");
|
|
44
45
|
const frontmatter_1 = require("../utils/frontmatter");
|
|
45
46
|
const writeFile_1 = require("../utils/writeFile");
|
|
@@ -151,4 +152,19 @@ const spiderStaticSiteGenerator = async () => {
|
|
|
151
152
|
}
|
|
152
153
|
};
|
|
153
154
|
exports.spiderStaticSiteGenerator = spiderStaticSiteGenerator;
|
|
155
|
+
const downloadCDNResources = async () => {
|
|
156
|
+
for (const resource of resourceMap_1.EXTERNAL_RESOURCES) {
|
|
157
|
+
const targetFilePath = path.join(paths_1.CZON_DIST_DIR, 'assets', resource.name);
|
|
158
|
+
console.info(`⬇️ Downloading resource: ${resource.url} -> ${targetFilePath}`);
|
|
159
|
+
const response = await fetch(resource.url);
|
|
160
|
+
if (!response.ok) {
|
|
161
|
+
console.error(`❌ Failed to download resource: ${resource.url}, status: ${response.status}`);
|
|
162
|
+
throw new Error(`Failed to download resource: ${resource.url}`);
|
|
163
|
+
}
|
|
164
|
+
const buffer = await response.arrayBuffer();
|
|
165
|
+
await (0, writeFile_1.writeFile)(targetFilePath, Buffer.from(buffer));
|
|
166
|
+
console.info(`✅ Resource downloaded: ${targetFilePath}`);
|
|
167
|
+
}
|
|
168
|
+
};
|
|
169
|
+
exports.downloadCDNResources = downloadCDNResources;
|
|
154
170
|
//# sourceMappingURL=template.js.map
|
package/dist/ssg/ContentPage.js
CHANGED
|
@@ -30,7 +30,7 @@ const ContentPage = props => {
|
|
|
30
30
|
react_1.default.createElement("meta", { name: "viewport", content: "width=device-width, initial-scale=1.0" }),
|
|
31
31
|
react_1.default.createElement("title", null, title),
|
|
32
32
|
react_1.default.createElement("meta", { name: "description", content: `tags: ${tags.join(', ')}` }),
|
|
33
|
-
react_1.default.createElement("script", { src:
|
|
33
|
+
react_1.default.createElement("script", { src: (0, node_path_1.relative)(props.ctx.path, '/assets/tailwindcss.js'), defer: true }),
|
|
34
34
|
react_1.default.createElement("style", null, style_1.style),
|
|
35
35
|
react_1.default.createElement("script", { dangerouslySetInnerHTML: {
|
|
36
36
|
__html: `
|
package/dist/ssg/IndexPage.js
CHANGED
|
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.IndexPage = void 0;
|
|
7
|
+
const node_path_1 = require("node:path");
|
|
7
8
|
const react_1 = __importDefault(require("react"));
|
|
8
9
|
const sortBy_1 = require("../utils/sortBy");
|
|
9
10
|
const ContentMeta_1 = require("./components/ContentMeta");
|
|
@@ -59,7 +60,7 @@ const IndexPage = props => {
|
|
|
59
60
|
react_1.default.createElement("footer", null,
|
|
60
61
|
react_1.default.createElement(LanguageSwitcher_1.LanguageSwitcher, { ctx: props.ctx, lang: props.lang }),
|
|
61
62
|
react_1.default.createElement(CZONFooter_1.CZONFooter, null))), footer: null }),
|
|
62
|
-
react_1.default.createElement("script", { src:
|
|
63
|
+
react_1.default.createElement("script", { src: (0, node_path_1.relative)(props.ctx.path, '/assets/tailwindcss.js'), defer: true }))));
|
|
63
64
|
// TODO: 渲染多语言首页列表
|
|
64
65
|
// return (
|
|
65
66
|
// <div>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EXTERNAL_RESOURCES = void 0;
|
|
4
|
+
exports.EXTERNAL_RESOURCES = [
|
|
5
|
+
{
|
|
6
|
+
name: 'tailwindcss.js',
|
|
7
|
+
url: 'https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4',
|
|
8
|
+
},
|
|
9
|
+
];
|
|
10
|
+
//# sourceMappingURL=resourceMap.js.map
|