hm-doc-tool 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +230 -0
- package/dist/api-client.d.ts +44 -0
- package/dist/api-client.js +137 -0
- package/dist/common/config.d.ts +11 -0
- package/dist/common/config.js +70 -0
- package/dist/common/path-generator.d.ts +39 -0
- package/dist/common/path-generator.js +192 -0
- package/dist/common/tree-formatter.d.ts +19 -0
- package/dist/common/tree-formatter.js +129 -0
- package/dist/common/tree-processor.d.ts +35 -0
- package/dist/common/tree-processor.js +145 -0
- package/dist/common/types.d.ts +38 -0
- package/dist/common/types.js +5 -0
- package/dist/download/downloader.d.ts +46 -0
- package/dist/download/downloader.js +251 -0
- package/dist/download/index.d.ts +16 -0
- package/dist/download/index.js +168 -0
- package/dist/download/link-localizer.d.ts +13 -0
- package/dist/download/link-localizer.js +116 -0
- package/dist/download/markdown-converter.d.ts +1 -0
- package/dist/download/markdown-converter.js +96 -0
- package/dist/download/summary-generator.d.ts +46 -0
- package/dist/download/summary-generator.js +188 -0
- package/dist/download/turndown-rules.d.ts +2 -0
- package/dist/download/turndown-rules.js +394 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +65 -0
- package/dist/tree/index.d.ts +11 -0
- package/dist/tree/index.js +91 -0
- package/dist/wiki/index.d.ts +7 -0
- package/dist/wiki/index.js +22 -0
- package/dist/wiki/wiki-generator.d.ts +3 -0
- package/dist/wiki/wiki-generator.js +357 -0
- package/dist/wiki/wiki-types.d.ts +61 -0
- package/dist/wiki/wiki-types.js +3 -0
- package/dist/wiki/wiki-utils.d.ts +33 -0
- package/dist/wiki/wiki-utils.js +180 -0
- package/docs_catalog.json +28 -0
- package/package.json +29 -0
- package/wiki_config.json +198 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.downloadSingle = downloadSingle;
|
|
37
|
+
exports.downloadSubtree = downloadSubtree;
|
|
38
|
+
/**
|
|
39
|
+
* 文档下载调度模块
|
|
40
|
+
*
|
|
41
|
+
* 职责:
|
|
42
|
+
* 1. 获取目录树 → 平铺 → 过滤 → 计算路径 → 并发下载 → 写入文件
|
|
43
|
+
* 2. 支持 --dry-run(只打印树形结构)
|
|
44
|
+
* 3. 单个文档失败不中断,最后汇总输出
|
|
45
|
+
*/
|
|
46
|
+
const fs = __importStar(require("fs/promises"));
|
|
47
|
+
const path = __importStar(require("path"));
|
|
48
|
+
const api_client_js_1 = require("../api-client.js");
|
|
49
|
+
const config_js_1 = require("../common/config.js");
|
|
50
|
+
const markdown_converter_js_1 = require("./markdown-converter.js");
|
|
51
|
+
const tree_processor_js_1 = require("../common/tree-processor.js");
|
|
52
|
+
const path_generator_js_1 = require("../common/path-generator.js");
|
|
53
|
+
const tree_formatter_js_1 = require("../common/tree-formatter.js");
|
|
54
|
+
/**
|
|
55
|
+
* 通过 URL 下载单篇文档(不建目录,直接写入 outputDir)
|
|
56
|
+
*
|
|
57
|
+
* @param objectId - 文档 objectId
|
|
58
|
+
* @param catalogName - catalog 名称
|
|
59
|
+
* @param nodeName - 节点名称(用作文件名)
|
|
60
|
+
* @param outputDir - 输出目录
|
|
61
|
+
*/
|
|
62
|
+
async function downloadSingle(objectId, catalogName, nodeName, outputDir) {
|
|
63
|
+
const doc = await (0, api_client_js_1.fetchDocument)(objectId);
|
|
64
|
+
let md = (0, markdown_converter_js_1.htmlToMarkdown)(doc.html);
|
|
65
|
+
const sourceUrl = `https://developer.huawei.com/consumer/cn/doc/${doc.catalogName}/${doc.objectId}`;
|
|
66
|
+
const header = [
|
|
67
|
+
"---",
|
|
68
|
+
`title: ${nodeName}`,
|
|
69
|
+
`source: ${sourceUrl}`,
|
|
70
|
+
`category: ${(0, config_js_1.findCatalogName)(doc.catalogName) || doc.catalogName}`,
|
|
71
|
+
`downloaded_at: ${new Date().toLocaleDateString("sv-SE")}`,
|
|
72
|
+
"---",
|
|
73
|
+
"",
|
|
74
|
+
].join("\n");
|
|
75
|
+
md = header + (md.startsWith("\n") ? md : "\n" + md);
|
|
76
|
+
// 确保输出目录存在
|
|
77
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
78
|
+
const filePath = path.join(outputDir, (0, path_generator_js_1.safeFileName)(nodeName) + ".md");
|
|
79
|
+
await fs.writeFile(filePath, md, "utf-8");
|
|
80
|
+
console.log(`下载完成: ${nodeName}.md`);
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* 并发执行任务,使用简单信号量模式控制并发数
|
|
84
|
+
* @param tasks - 任务列表
|
|
85
|
+
* @param concurrency - 最大并发数
|
|
86
|
+
* @param fn - 执行函数
|
|
87
|
+
*/
|
|
88
|
+
async function runConcurrent(tasks, concurrency, fn) {
|
|
89
|
+
let running = 0;
|
|
90
|
+
let nextIndex = 0;
|
|
91
|
+
return new Promise((resolve, reject) => {
|
|
92
|
+
function scheduleNext() {
|
|
93
|
+
while (running < concurrency && nextIndex < tasks.length) {
|
|
94
|
+
const idx = nextIndex++;
|
|
95
|
+
running++;
|
|
96
|
+
fn(tasks[idx], idx)
|
|
97
|
+
.then(() => {
|
|
98
|
+
running--;
|
|
99
|
+
if (running === 0 && nextIndex >= tasks.length) {
|
|
100
|
+
resolve();
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
scheduleNext();
|
|
104
|
+
}
|
|
105
|
+
})
|
|
106
|
+
.catch(reject);
|
|
107
|
+
}
|
|
108
|
+
if (tasks.length === 0)
|
|
109
|
+
resolve();
|
|
110
|
+
}
|
|
111
|
+
scheduleNext();
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* 下载单篇文档并写入文件
|
|
116
|
+
*
|
|
117
|
+
* 流程:fetchDocument → htmlToMarkdown → 插入元信息头 → writeFile
|
|
118
|
+
*
|
|
119
|
+
* @param task - 下载任务
|
|
120
|
+
* @param total - 任务总数
|
|
121
|
+
* @param current - 当前序号
|
|
122
|
+
*/
|
|
123
|
+
async function downloadOne(task, total, current) {
|
|
124
|
+
if (!task.relateDocument)
|
|
125
|
+
return;
|
|
126
|
+
const doc = await (0, api_client_js_1.fetchDocument)(task.relateDocument);
|
|
127
|
+
let md = (0, markdown_converter_js_1.htmlToMarkdown)(doc.html);
|
|
128
|
+
const sourceUrl = `https://developer.huawei.com/consumer/cn/doc/${doc.catalogName}/${doc.objectId}`;
|
|
129
|
+
// Build metadata front-matter (consistent with harmony-doc2)
|
|
130
|
+
const header = [
|
|
131
|
+
"---",
|
|
132
|
+
`title: ${task.nodeName}`,
|
|
133
|
+
`source: ${sourceUrl}`,
|
|
134
|
+
`category: ${(0, config_js_1.findCatalogName)(doc.catalogName) || doc.catalogName}`,
|
|
135
|
+
`downloaded_at: ${new Date().toLocaleDateString("sv-SE")}`,
|
|
136
|
+
"---",
|
|
137
|
+
"",
|
|
138
|
+
].join("\n");
|
|
139
|
+
md = header + (md.startsWith("\n") ? md : "\n" + md);
|
|
140
|
+
const filePath = path.join(task.outputDir, task.fileName);
|
|
141
|
+
await fs.writeFile(filePath, md, "utf-8");
|
|
142
|
+
console.log(`[${current}/${total}] 下载: ${task.nodeName}.md`);
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* 下载文档子树
|
|
146
|
+
*
|
|
147
|
+
* 完整流程:
|
|
148
|
+
* 1. 获取目录树 → 平铺为 FlatNode[]
|
|
149
|
+
* 2. 定位目标节点(按 nodeId 或 nodeName)
|
|
150
|
+
* 3. 收集子树节点
|
|
151
|
+
* 4. C++ 过滤(--no-cpp,含后代)
|
|
152
|
+
* 5. leafOnly 过滤
|
|
153
|
+
* 6. 计算输出路径(用全量 nodes 算路径,用过滤后 subtreeNodes 生成任务)
|
|
154
|
+
* 7. --dry-run 则打印树形结构后返回
|
|
155
|
+
* 8. 创建所有目录(先建目录避免并发竞态)
|
|
156
|
+
* 9. 并发下载文档并写入文件
|
|
157
|
+
* 10. 输出汇总(成功数、失败列表)
|
|
158
|
+
*
|
|
159
|
+
* @param catalogName - catalog 名称
|
|
160
|
+
* @param rootId - 子树根节点 ID,undefined 表示整个 catalog
|
|
161
|
+
* @param nodeName - 按名称定位节点(与 nodeId 二选一)
|
|
162
|
+
* @param nodeId - 按 ID 定位节点(优先于 nodeName)
|
|
163
|
+
* @param options - 下载选项
|
|
164
|
+
*/
|
|
165
|
+
async function downloadSubtree(catalogName, rootId, nodeName, nodeId, options) {
|
|
166
|
+
const { outputDir, concurrency, leafOnly, noCpp, dryRun, topLevel } = options;
|
|
167
|
+
// 构建实际输出目录:{outputDir}/{topLevel}/
|
|
168
|
+
// catalog 名由 path-generator.ts 自动添加
|
|
169
|
+
let actualOutputDir = outputDir;
|
|
170
|
+
if (topLevel) {
|
|
171
|
+
actualOutputDir = path.join(outputDir, topLevel);
|
|
172
|
+
}
|
|
173
|
+
// 1. 获取目录树
|
|
174
|
+
console.log(`正在获取目录树: ${catalogName} ...`);
|
|
175
|
+
const nodes = await (0, tree_processor_js_1.fetchCatalogTree)(catalogName);
|
|
176
|
+
console.log(`目录树节点数: ${nodes.length}`);
|
|
177
|
+
// 2. 定位目标节点
|
|
178
|
+
let targetRootId = rootId;
|
|
179
|
+
if (!targetRootId && (nodeName || nodeId)) {
|
|
180
|
+
const target = (0, tree_processor_js_1.findNode)(nodes, nodeId, nodeName);
|
|
181
|
+
if (!target) {
|
|
182
|
+
throw new Error(`找不到节点: ${nodeId || nodeName}`);
|
|
183
|
+
}
|
|
184
|
+
targetRootId = target.nodeId;
|
|
185
|
+
}
|
|
186
|
+
// 3. 收集子树
|
|
187
|
+
let subtreeNodes;
|
|
188
|
+
if (targetRootId) {
|
|
189
|
+
subtreeNodes = (0, tree_processor_js_1.collectSubtree)(nodes, targetRootId);
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
subtreeNodes = [...nodes];
|
|
193
|
+
}
|
|
194
|
+
// 4. C++ 过滤(含后代)
|
|
195
|
+
if (noCpp) {
|
|
196
|
+
const skipSet = (0, tree_processor_js_1.buildCppSkipSet)(nodes, catalogName, noCpp);
|
|
197
|
+
subtreeNodes = subtreeNodes.filter((n) => !skipSet.has(n.nodeId));
|
|
198
|
+
}
|
|
199
|
+
// 5. leafOnly 过滤
|
|
200
|
+
if (leafOnly) {
|
|
201
|
+
subtreeNodes = subtreeNodes.filter((n) => n.isLeaf || n.children.length === 0);
|
|
202
|
+
}
|
|
203
|
+
// 6. 计算输出路径
|
|
204
|
+
// nodes(全量)用于路径计算以保证父子关系完整
|
|
205
|
+
// subtreeNodes(过滤后)用于决定生成哪些任务
|
|
206
|
+
const tasks = (0, path_generator_js_1.buildOutputPaths)(nodes, targetRootId, catalogName, actualOutputDir, subtreeNodes);
|
|
207
|
+
const downloadTasks = tasks.filter((t) => t.relateDocument && t.relateDocument !== "_");
|
|
208
|
+
// 7. dry-run:打印树形结构后返回
|
|
209
|
+
if (dryRun) {
|
|
210
|
+
const treeOutput = (0, tree_formatter_js_1.buildTreeOutput)(nodes, targetRootId, catalogName, new Set(), leafOnly, undefined, noCpp);
|
|
211
|
+
console.log(treeOutput);
|
|
212
|
+
console.log(`\n输出目录: ${actualOutputDir}`);
|
|
213
|
+
console.log(`共 ${downloadTasks.length} 篇文档待下载`);
|
|
214
|
+
return undefined;
|
|
215
|
+
}
|
|
216
|
+
// 8. 创建所有目录
|
|
217
|
+
// nodes(全量)用于路径计算,subtreeNodes 用于决定创建哪些目录
|
|
218
|
+
const dirs = (0, path_generator_js_1.collectDirectories)(nodes, targetRootId, catalogName, actualOutputDir, subtreeNodes);
|
|
219
|
+
for (const dir of dirs) {
|
|
220
|
+
await fs.mkdir(dir, { recursive: true });
|
|
221
|
+
}
|
|
222
|
+
// 9. 并发下载
|
|
223
|
+
console.log(`开始下载 ${downloadTasks.length} 篇文档 (并发数: ${concurrency}) ...`);
|
|
224
|
+
const failed = [];
|
|
225
|
+
let completed = 0;
|
|
226
|
+
await runConcurrent(downloadTasks, concurrency, async (task) => {
|
|
227
|
+
try {
|
|
228
|
+
completed++;
|
|
229
|
+
await downloadOne(task, downloadTasks.length, completed);
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
failed.push({
|
|
233
|
+
nodeId: task.nodeId,
|
|
234
|
+
nodeName: task.nodeName,
|
|
235
|
+
error: err instanceof Error ? err.message : String(err),
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
// 10. 汇总
|
|
240
|
+
const successCount = downloadTasks.length - failed.length;
|
|
241
|
+
console.log(`\n完成: 成功 ${successCount} 篇, 失败 ${failed.length} 篇`);
|
|
242
|
+
if (failed.length > 0) {
|
|
243
|
+
console.log("\n失败列表:");
|
|
244
|
+
for (const f of failed) {
|
|
245
|
+
console.log(` - ${f.nodeName} (${f.nodeId}): ${f.error}`);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// 返回任务列表和节点数据,供 summary 生成和链接本地化使用
|
|
249
|
+
// outputDir 是用户指定的原始目录,summary 文件放在那里
|
|
250
|
+
return { tasks: downloadTasks, nodes, outputDir };
|
|
251
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/** Download 命令选项 */
|
|
2
|
+
export interface DownloadOptions {
|
|
3
|
+
catalog?: string[];
|
|
4
|
+
nodeName?: string;
|
|
5
|
+
nodeId?: string;
|
|
6
|
+
url?: string;
|
|
7
|
+
output: string;
|
|
8
|
+
leafOnly: boolean;
|
|
9
|
+
cpp: boolean;
|
|
10
|
+
concurrency: string;
|
|
11
|
+
dryRun: boolean;
|
|
12
|
+
summary: boolean;
|
|
13
|
+
localize: boolean;
|
|
14
|
+
}
|
|
15
|
+
/** Download 命令处理函数 */
|
|
16
|
+
export declare function handleDownload(opts: DownloadOptions): Promise<void>;
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.handleDownload = handleDownload;
|
|
4
|
+
/**
|
|
5
|
+
* Download 命令入口
|
|
6
|
+
*
|
|
7
|
+
* 职责:
|
|
8
|
+
* 1. 处理 CLI 选项
|
|
9
|
+
* 2. 调用 downloader 执行下载
|
|
10
|
+
*/
|
|
11
|
+
const tree_processor_js_1 = require("../common/tree-processor.js");
|
|
12
|
+
const config_js_1 = require("../common/config.js");
|
|
13
|
+
const downloader_js_1 = require("./downloader.js");
|
|
14
|
+
const api_client_js_1 = require("../api-client.js");
|
|
15
|
+
const config_js_2 = require("../common/config.js");
|
|
16
|
+
const summary_generator_js_1 = require("./summary-generator.js");
|
|
17
|
+
const link_localizer_js_1 = require("./link-localizer.js");
|
|
18
|
+
/** Download 命令处理函数 */
|
|
19
|
+
async function handleDownload(opts) {
|
|
20
|
+
try {
|
|
21
|
+
if (opts.url && opts.catalog) {
|
|
22
|
+
console.error("错误: --url 和 --catalog 互斥,请只使用其中一个");
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
let catalogName;
|
|
26
|
+
let targetNodeId;
|
|
27
|
+
let targetNodeName;
|
|
28
|
+
if (opts.url) {
|
|
29
|
+
// URL 模式:单文件下载,不建目录
|
|
30
|
+
const objectId = (0, api_client_js_1.objectIdFromUrl)(opts.url);
|
|
31
|
+
if (!objectId) {
|
|
32
|
+
console.error("错误: 无法从 URL 提取 objectId");
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
catalogName = (0, api_client_js_1.catalogNameFromUrl)(opts.url) ?? undefined;
|
|
36
|
+
if (!catalogName) {
|
|
37
|
+
// URL 中无法提取 catalogName → 遍历所有 catalog 查找
|
|
38
|
+
console.log("正在查找文档所属分类...");
|
|
39
|
+
for (const catName of (0, config_js_2.getAllCatalogIds)()) {
|
|
40
|
+
try {
|
|
41
|
+
const flatNodes = await (0, tree_processor_js_1.fetchCatalogTree)(catName);
|
|
42
|
+
const found = flatNodes.find((n) => n.relateDocument === objectId);
|
|
43
|
+
if (found) {
|
|
44
|
+
catalogName = catName;
|
|
45
|
+
targetNodeName = found.nodeName;
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// 单个 catalog 查询失败则跳过
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (!catalogName) {
|
|
54
|
+
console.error("错误: 找不到文档所属分类");
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// URL 中有 catalogName → 从目录树中查找节点名
|
|
60
|
+
const flatNodes = await (0, tree_processor_js_1.fetchCatalogTree)(catalogName);
|
|
61
|
+
const found = flatNodes.find((n) => n.relateDocument === objectId);
|
|
62
|
+
if (found) {
|
|
63
|
+
targetNodeName = found.nodeName;
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
console.error(`错误: 在 ${catalogName} 中找不到文档 ${objectId}`);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
await (0, downloader_js_1.downloadSingle)(objectId, catalogName, targetNodeName, opts.output);
|
|
71
|
+
}
|
|
72
|
+
else if (opts.catalog) {
|
|
73
|
+
// catalog 模式(支持多个 -c)
|
|
74
|
+
const catalogs = opts.catalog;
|
|
75
|
+
targetNodeName = opts.nodeName;
|
|
76
|
+
targetNodeId = opts.nodeId;
|
|
77
|
+
// --no-cpp 在 commander 中会反转为 opts.cpp(false = 用户传了 --no-cpp)
|
|
78
|
+
const noCpp = opts.cpp === false;
|
|
79
|
+
const results = [];
|
|
80
|
+
for (const cat of catalogs) {
|
|
81
|
+
const topLevel = (0, config_js_1.findTopLevelForCatalog)(cat);
|
|
82
|
+
const result = await (0, downloader_js_1.downloadSubtree)(cat, targetNodeId, targetNodeName, targetNodeId, {
|
|
83
|
+
outputDir: opts.output,
|
|
84
|
+
concurrency: parseInt(opts.concurrency, 10),
|
|
85
|
+
leafOnly: opts.leafOnly,
|
|
86
|
+
noCpp,
|
|
87
|
+
dryRun: opts.dryRun,
|
|
88
|
+
topLevel,
|
|
89
|
+
});
|
|
90
|
+
if (!opts.dryRun && result) {
|
|
91
|
+
results.push({ catalogId: cat, result });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (results.length > 0) {
|
|
95
|
+
// 先生成 summary(使用原始链接提取摘要)
|
|
96
|
+
if (opts.summary) {
|
|
97
|
+
for (const { catalogId, result } of results) {
|
|
98
|
+
await (0, summary_generator_js_1.generateSummaryFromTasks)(catalogId, result.outputDir, result.tasks, result.nodes, targetNodeName);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// 链接本地化(需显式指定 -l,支持跨 catalog)
|
|
102
|
+
if (opts.localize) {
|
|
103
|
+
const globalPathMap = new Map();
|
|
104
|
+
for (const { catalogId, result } of results) {
|
|
105
|
+
(0, link_localizer_js_1.buildCatalogPathMap)(catalogId, result.tasks, globalPathMap);
|
|
106
|
+
}
|
|
107
|
+
for (const { result } of results) {
|
|
108
|
+
await (0, link_localizer_js_1.localizeAllLinks)(result.tasks, globalPathMap);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
// 批量模式:使用 docs_catalog.json 配置文件
|
|
115
|
+
const catalogsWithTopLevel = (0, config_js_1.getAllCatalogsWithTopLevel)();
|
|
116
|
+
if (catalogsWithTopLevel.length === 0) {
|
|
117
|
+
console.error("错误: docs_catalog.json 中没有配置任何分类");
|
|
118
|
+
process.exit(1);
|
|
119
|
+
}
|
|
120
|
+
// --no-cpp 在 commander 中会反转为 opts.cpp(false = 用户传了 --no-cpp)
|
|
121
|
+
const noCpp = opts.cpp === false;
|
|
122
|
+
// 先下载所有 catalog,收集结果
|
|
123
|
+
const results = [];
|
|
124
|
+
let currentTopLevel = "";
|
|
125
|
+
for (const { topLevel, catalog } of catalogsWithTopLevel) {
|
|
126
|
+
if (topLevel !== currentTopLevel) {
|
|
127
|
+
currentTopLevel = topLevel;
|
|
128
|
+
console.log(`\n=== ${topLevel} ===`);
|
|
129
|
+
}
|
|
130
|
+
console.log(`\n开始处理: ${catalog.name} (${catalog.id})`);
|
|
131
|
+
const result = await (0, downloader_js_1.downloadSubtree)(catalog.id, undefined, undefined, undefined, {
|
|
132
|
+
outputDir: opts.output,
|
|
133
|
+
concurrency: parseInt(opts.concurrency, 10),
|
|
134
|
+
leafOnly: opts.leafOnly,
|
|
135
|
+
noCpp,
|
|
136
|
+
dryRun: opts.dryRun,
|
|
137
|
+
topLevel,
|
|
138
|
+
});
|
|
139
|
+
if (!opts.dryRun && result) {
|
|
140
|
+
results.push({ catalogId: catalog.id, result });
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
if (results.length > 0) {
|
|
144
|
+
// 先生成 summary(使用原始链接提取摘要)
|
|
145
|
+
if (opts.summary) {
|
|
146
|
+
for (const { catalogId, result } of results) {
|
|
147
|
+
await (0, summary_generator_js_1.generateSummaryFromTasks)(catalogId, result.outputDir, result.tasks, result.nodes);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// 链接本地化(需显式指定 -l,支持跨 catalog)
|
|
151
|
+
if (opts.localize) {
|
|
152
|
+
console.log("\n正在执行链接本地化 ...");
|
|
153
|
+
const globalPathMap = new Map();
|
|
154
|
+
for (const { catalogId, result } of results) {
|
|
155
|
+
(0, link_localizer_js_1.buildCatalogPathMap)(catalogId, result.tasks, globalPathMap);
|
|
156
|
+
}
|
|
157
|
+
for (const { result } of results) {
|
|
158
|
+
await (0, link_localizer_js_1.localizeAllLinks)(result.tasks, globalPathMap);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
catch (err) {
|
|
165
|
+
console.error("错误:", err instanceof Error ? err.message : String(err));
|
|
166
|
+
process.exit(1);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { OutputTask } from "../common/types.js";
|
|
2
|
+
/**
|
|
3
|
+
* 构建 "catalogName/relateDocument" → 本地文件路径的映射(带 catalogName)
|
|
4
|
+
*/
|
|
5
|
+
export declare function buildCatalogPathMap(catalogName: string, tasks: OutputTask[], map?: Map<string, string>): Map<string, string>;
|
|
6
|
+
/**
|
|
7
|
+
* 本地化单个 Markdown 文件中的链接
|
|
8
|
+
*/
|
|
9
|
+
export declare function localizeLinks(content: string, currentFilePath: string, pathMap: Map<string, string>): string;
|
|
10
|
+
/**
|
|
11
|
+
* 对已下载的 Markdown 文件执行链接本地化
|
|
12
|
+
*/
|
|
13
|
+
export declare function localizeAllLinks(tasks: OutputTask[], pathMap: Map<string, string>): Promise<void>;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.buildCatalogPathMap = buildCatalogPathMap;
|
|
37
|
+
exports.localizeLinks = localizeLinks;
|
|
38
|
+
exports.localizeAllLinks = localizeAllLinks;
|
|
39
|
+
/**
|
|
40
|
+
* 链接本地化模块
|
|
41
|
+
*
|
|
42
|
+
* 将下载的 Markdown 中的华为文档绝对链接替换为本地相对路径
|
|
43
|
+
* 只替换 markdown 链接语法 [text](url) 中的 URL,保留 front-matter source 等元数据
|
|
44
|
+
* 支持跨 catalog 链接替换,支持 #锚点 保留
|
|
45
|
+
*/
|
|
46
|
+
const fs = __importStar(require("fs/promises"));
|
|
47
|
+
const path = __importStar(require("path"));
|
|
48
|
+
/**
|
|
49
|
+
* 匹配 markdown 链接中的华为文档 URL
|
|
50
|
+
* 只匹配 [text](url) 格式,不匹配 front-matter 中的裸 URL
|
|
51
|
+
*
|
|
52
|
+
* 捕获组:
|
|
53
|
+
* \1 = 链接文本
|
|
54
|
+
* \2 = catalogName(如 harmonyos-references)
|
|
55
|
+
* \3 = relateDocument(如 hiai-foundation-c)
|
|
56
|
+
* \4 = 可选的 #锚点(如 #获取应用文件路径)
|
|
57
|
+
*/
|
|
58
|
+
const MD_LINK_PATTERN = /\[([^\]]+)\]\(https:\/\/developer\.huawei\.com\/consumer\/cn\/doc\/([^/]+)\/([^)#]+)(#[^\)]+)?\)/g;
|
|
59
|
+
/**
|
|
60
|
+
* 构建 "catalogName/relateDocument" → 本地文件路径的映射(带 catalogName)
|
|
61
|
+
*/
|
|
62
|
+
function buildCatalogPathMap(catalogName, tasks, map = new Map()) {
|
|
63
|
+
for (const task of tasks) {
|
|
64
|
+
if (!task.relateDocument)
|
|
65
|
+
continue;
|
|
66
|
+
const filePath = path.join(task.outputDir, task.fileName);
|
|
67
|
+
map.set(`${catalogName}/${task.relateDocument}`, filePath);
|
|
68
|
+
}
|
|
69
|
+
return map;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* 本地化单个 Markdown 文件中的链接
|
|
73
|
+
*/
|
|
74
|
+
function localizeLinks(content, currentFilePath, pathMap) {
|
|
75
|
+
const currentDir = path.dirname(currentFilePath);
|
|
76
|
+
return content.replace(MD_LINK_PATTERN, (match, linkText, urlCatalogName, urlDocId, hash) => {
|
|
77
|
+
const key = `${urlCatalogName}/${urlDocId}`;
|
|
78
|
+
const targetPath = pathMap.get(key);
|
|
79
|
+
if (!targetPath)
|
|
80
|
+
return match; // 找不到对应文件,不替换
|
|
81
|
+
let relPath = path.relative(currentDir, targetPath);
|
|
82
|
+
// Windows 路径分隔符统一为 /
|
|
83
|
+
relPath = relPath.replace(/\\/g, "/");
|
|
84
|
+
// 确保以 ./ 开头
|
|
85
|
+
if (!relPath.startsWith("."))
|
|
86
|
+
relPath = "./" + relPath;
|
|
87
|
+
return `[${linkText}](${relPath}${hash || ""})`;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* 对已下载的 Markdown 文件执行链接本地化
|
|
92
|
+
*/
|
|
93
|
+
async function localizeAllLinks(tasks, pathMap) {
|
|
94
|
+
let replacedCount = 0;
|
|
95
|
+
let fileCount = 0;
|
|
96
|
+
for (const task of tasks) {
|
|
97
|
+
const filePath = path.join(task.outputDir, task.fileName);
|
|
98
|
+
try {
|
|
99
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
100
|
+
const newContent = localizeLinks(content, filePath, pathMap);
|
|
101
|
+
if (newContent !== content) {
|
|
102
|
+
await fs.writeFile(filePath, newContent, "utf-8");
|
|
103
|
+
fileCount++;
|
|
104
|
+
const oldMatches = content.match(MD_LINK_PATTERN);
|
|
105
|
+
const newMatches = newContent.match(MD_LINK_PATTERN);
|
|
106
|
+
replacedCount += (oldMatches?.length ?? 0) - (newMatches?.length ?? 0);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// 文件不存在或读取失败,跳过
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
if (fileCount > 0) {
|
|
114
|
+
console.log(`链接本地化: ${fileCount} 个文件, ${replacedCount} 个链接已替换`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function htmlToMarkdown(html: string): string;
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.htmlToMarkdown = htmlToMarkdown;
|
|
7
|
+
const turndown_1 = __importDefault(require("turndown"));
|
|
8
|
+
const turndown_rules_js_1 = require("./turndown-rules.js");
|
|
9
|
+
let instance = null;
|
|
10
|
+
function getTurndown() {
|
|
11
|
+
if (!instance) {
|
|
12
|
+
instance = new turndown_1.default({
|
|
13
|
+
headingStyle: "atx",
|
|
14
|
+
codeBlockStyle: "fenced",
|
|
15
|
+
bulletListMarker: "-",
|
|
16
|
+
});
|
|
17
|
+
(0, turndown_rules_js_1.addCustomRules)(instance);
|
|
18
|
+
}
|
|
19
|
+
return instance;
|
|
20
|
+
}
|
|
21
|
+
function htmlToMarkdown(html) {
|
|
22
|
+
const md = getTurndown().turndown(html);
|
|
23
|
+
return cleanupMarkdown(md);
|
|
24
|
+
}
|
|
25
|
+
function cleanupMarkdown(md) {
|
|
26
|
+
let out = md;
|
|
27
|
+
// Fix heading levels from Huawei docs
|
|
28
|
+
// Rule: <h4>标题</h4> → ## 标题 (h2)
|
|
29
|
+
// <h4>[h2]标题</h4> → ### 标题 (h3)
|
|
30
|
+
// Turndown escapes [ ] as \[ \], so we match the escaped form
|
|
31
|
+
// First convert #### \[h\d+\]... to ### (with tag removed)
|
|
32
|
+
out = out.replace(/^####\s+\\\[h\d+\\\]\s*(.+)$/gm, "### $1");
|
|
33
|
+
// Also handle unescaped form just in case
|
|
34
|
+
out = out.replace(/^####\s+\[h\d+\]\s*(.+)$/gm, "### $1");
|
|
35
|
+
// Then convert remaining #### to ##
|
|
36
|
+
out = out.replace(/^####\s+(.+)$/gm, "## $1");
|
|
37
|
+
// Fix escaped chars inside fenced code blocks
|
|
38
|
+
out = out.replace(/```(\w*)\n?([\s\S]*?)```/g, (_match, lang, code) => {
|
|
39
|
+
let fixed = code;
|
|
40
|
+
fixed = fixed.replace(/\\\\/g, "\x00BS\x00");
|
|
41
|
+
fixed = fixed.replace(/$/g, "$");
|
|
42
|
+
fixed = fixed.replace(/&/g, "&");
|
|
43
|
+
fixed = fixed.replace(/</g, "<");
|
|
44
|
+
fixed = fixed.replace(/>/g, ">");
|
|
45
|
+
// Unescape markdown special chars
|
|
46
|
+
for (const ch of ["`", "*", "_", "{", "}", "[", "]", "(", ")", "#", "+", "-", ".", "!", "|", "~", ">", "<"]) {
|
|
47
|
+
fixed = fixed.replaceAll("\\" + ch, ch);
|
|
48
|
+
}
|
|
49
|
+
fixed = fixed.replace(/\x00BS\x00/g, "\\");
|
|
50
|
+
return "```" + lang + "\n" + fixed + "```";
|
|
51
|
+
});
|
|
52
|
+
// Remove standalone UI text lines
|
|
53
|
+
for (const pattern of [
|
|
54
|
+
/^展开$/gm, /^收起$/gm, /^复制$/gm, /^深色$/gm,
|
|
55
|
+
/^自动换行$/gm, /^深色代码主题$/gm, /^代码主题$/gm, /^收起代码$/gm,
|
|
56
|
+
]) {
|
|
57
|
+
out = out.replace(pattern, "");
|
|
58
|
+
}
|
|
59
|
+
// Remove footer content - find footer start and truncate from there
|
|
60
|
+
const footerPatterns = [
|
|
61
|
+
/华为开发者联盟 版权所有/,
|
|
62
|
+
/版权所有/,
|
|
63
|
+
/Copyright.* Huawei/,
|
|
64
|
+
/\[使用条款\]\(https:\/\/developer\.huawei\.com\/consumer\/cn\/devservice\/use\)/,
|
|
65
|
+
];
|
|
66
|
+
for (const pattern of footerPatterns) {
|
|
67
|
+
const match = out.match(pattern);
|
|
68
|
+
if (match && match.index !== undefined) {
|
|
69
|
+
const matchIndex = match.index;
|
|
70
|
+
const contentBeforeMatch = out.substring(0, matchIndex);
|
|
71
|
+
const lines = contentBeforeMatch.split("\n");
|
|
72
|
+
// Check if there's enough context (at least 10 lines)
|
|
73
|
+
if (lines.length > 10) {
|
|
74
|
+
// Try to find a better truncation point (e.g., empty line)
|
|
75
|
+
const linesAfterMatch = out.substring(matchIndex).split("\n");
|
|
76
|
+
let trimFrom = 0;
|
|
77
|
+
for (let i = 0; i < Math.min(5, linesAfterMatch.length); i++) {
|
|
78
|
+
if (i > 0 && linesAfterMatch[i - 1].trim() === "") {
|
|
79
|
+
trimFrom = i;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
out = out.substring(0, matchIndex) + linesAfterMatch.slice(0, trimFrom).join("\n");
|
|
84
|
+
}
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Remove trailing navigation / ad text
|
|
89
|
+
out = out.replace(/\n\n在 指南 中进行搜索.*$/s, "");
|
|
90
|
+
out = out.replace(/\n\n智能客服.*$/s, "");
|
|
91
|
+
out = out.replace(/\n\n合作咨询.*$/s, "");
|
|
92
|
+
out = out.replace(/\n\n下载APP.*$/s, "");
|
|
93
|
+
// Collapse multiple blank lines
|
|
94
|
+
out = out.replace(/\n{3,}/g, "\n\n");
|
|
95
|
+
return out.trim();
|
|
96
|
+
}
|