lark-docx2md 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -16
- package/dist/cli.js +42 -493
- package/dist/cli.js.map +1 -1
- package/dist/converter-ByfbJV0N.js +2776 -0
- package/dist/converter-ByfbJV0N.js.map +1 -0
- package/dist/converter-RbBjQFTa.d.ts +37 -0
- package/dist/converter-RbBjQFTa.d.ts.map +1 -0
- package/dist/converter.js +2 -0
- package/package.json +2 -2
- /package/dist/{cli-CIsEcoQJ.d.ts → cli-Cq4v6lUa.d.ts} +0 -0
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
> 命令所需权限见下 ‘飞书自创应用需要的权限’
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
npx -y lark-docx2md download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --app-secret xxxx
|
|
14
|
+
npx -y lark-docx2md@latest download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --app-secret xxxx
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
或先设置环境变量(命令行参数可省略):
|
|
@@ -19,23 +19,37 @@ npx -y lark-docx2md download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --a
|
|
|
19
19
|
```bash
|
|
20
20
|
export LARK_DOCX2MD_APP_ID=<APP_ID>
|
|
21
21
|
export LARK_DOCX2MD_APP_SECRET=<APP_SECRET>
|
|
22
|
-
npx -y lark-docx2md download <url>
|
|
22
|
+
npx -y lark-docx2md@latest download <url>
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
| 参数
|
|
26
|
-
|
|
27
|
-
| `<url>`
|
|
28
|
-
| `--app-id
|
|
29
|
-
| `--app-secret
|
|
30
|
-
| `-o, --output <dir>`
|
|
31
|
-
| `--agent`
|
|
32
|
-
| `--image-mode <mode>`
|
|
25
|
+
| 参数 | 说明 | 环境变量 | 默认值 |
|
|
26
|
+
|--------------------------|-------------------------------------------|------------------------------|-----------------------|
|
|
27
|
+
| `<url>` | 飞书文档链接(`https://*.feishu.cn/wiki/*`) | — | — |
|
|
28
|
+
| `--app-id <id>` | 飞书应用 App ID | `LARK_DOCX2MD_APP_ID` | — |
|
|
29
|
+
| `--app-secret <secret>` | 飞书应用 App Secret | `LARK_DOCX2MD_APP_SECRET` | — |
|
|
30
|
+
| `-o, --output <dir>` | 输出目录 | `LARK_DOCX2MD_OUTPUT` | `./larkDocx2mdOutput` |
|
|
31
|
+
| `--agent` | Agent 模式:日志 ERROR,Markdown 输出到 stdout | `LARK_DOCX2MD_AGENT=true` | `false` |
|
|
32
|
+
| `--image-mode <mode>` | 图片处理模式:`local`(下载到本地)或 `online`(24h 临时链接) | `LARK_DOCX2MD_IMAGE_MODE` | `local` |
|
|
33
|
+
| `--wb-format <format>` | 画板输出格式:`base64`、`inline-svg`、`svg`、`yaml` | `LARK_DOCX2MD_WB_FORMAT` | `svg` |
|
|
34
|
+
| `--wb-bg <style>` | 画板 SVG 背景:`none`、`dot` 或颜色值如 `#fff` | `LARK_DOCX2MD_WB_BG` | `none` |
|
|
35
|
+
| `--wb-image-mode <mode>` | 画板图片模式:`online`、`base64` 或 `local` | `LARK_DOCX2MD_WB_IMAGE_MODE` | `local` |
|
|
36
|
+
|
|
37
|
+
> **参数联动规则**
|
|
38
|
+
>
|
|
39
|
+
> - `--agent` 开启时:`--image-mode` 强制为 `online`,`--wb-image-mode` 强制为 `online`,`--wb-format` 默认为 `yaml`(仅允许
|
|
40
|
+
`inline-svg` / `yaml`)。
|
|
41
|
+
> - `--wb-format yaml` 时:`--wb-image-mode` 强制为 `online`。
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
**`--agent` 开启时,飞书文档转换后的内容会直接通过标准流输出**
|
|
33
45
|
|
|
34
46
|
## 功能
|
|
35
47
|
|
|
36
48
|
- 支持飞书 Wiki 文档下载
|
|
37
49
|
- 转换 20+ 种块类型
|
|
38
50
|
- 输出标准 Markdown 文件
|
|
51
|
+
- 支持飞书画板,输出格式:`base64`(data URI 内嵌)、`inline-svg`(SVG 标签内嵌)、`svg`(独立文件)、`yaml`(AI
|
|
52
|
+
友好结构化数据)。详见 [画板支持说明](./WHITEBOARD.md)
|
|
39
53
|
|
|
40
54
|
### 支持的内容块类型
|
|
41
55
|
|
|
@@ -71,7 +85,7 @@ npx -y lark-docx2md download <url>
|
|
|
71
85
|
| @用户 | 用户 ID |
|
|
72
86
|
| @文档 | `[标题](url)` |
|
|
73
87
|
|
|
74
|
-
>
|
|
88
|
+
> 未支持的块类型(如文件附件、视频、内嵌表格等)会被静默忽略。
|
|
75
89
|
|
|
76
90
|
## 开发
|
|
77
91
|
|
|
@@ -94,14 +108,19 @@ pnpm build
|
|
|
94
108
|
{
|
|
95
109
|
"scopes": {
|
|
96
110
|
"tenant": [
|
|
111
|
+
"base:app:read",
|
|
112
|
+
"bitable:app",
|
|
113
|
+
"bitable:app:readonly",
|
|
114
|
+
"board:whiteboard:node:read",
|
|
115
|
+
"contact:user.employee_id:readonly",
|
|
97
116
|
"docs:document.media:download",
|
|
117
|
+
"docx:document",
|
|
98
118
|
"docx:document:readonly",
|
|
99
|
-
"wiki:node:read"
|
|
119
|
+
"wiki:node:read",
|
|
120
|
+
"wiki:wiki",
|
|
121
|
+
"wiki:wiki:readonly"
|
|
100
122
|
],
|
|
101
|
-
"user": [
|
|
102
|
-
"docx:document:readonly",
|
|
103
|
-
"wiki:node:read"
|
|
104
|
-
]
|
|
123
|
+
"user": []
|
|
105
124
|
}
|
|
106
125
|
}
|
|
107
126
|
```
|
package/dist/cli.js
CHANGED
|
@@ -1,506 +1,55 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { r as setLogLevel, t as convert } from "./converter-ByfbJV0N.js";
|
|
2
3
|
import { Command } from "commander";
|
|
3
|
-
import * as fs from "node:fs";
|
|
4
|
-
import * as path from "node:path";
|
|
5
|
-
import * as lark from "@larksuiteoapi/node-sdk";
|
|
6
4
|
import { LoggerLevel } from "@larksuiteoapi/node-sdk";
|
|
7
|
-
//#region src/client.ts
|
|
8
|
-
function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
|
|
9
|
-
const client = new lark.Client({
|
|
10
|
-
appId,
|
|
11
|
-
appSecret,
|
|
12
|
-
loggerLevel
|
|
13
|
-
});
|
|
14
|
-
async function call(name, fn) {
|
|
15
|
-
let res;
|
|
16
|
-
try {
|
|
17
|
-
res = await fn();
|
|
18
|
-
} catch (e) {
|
|
19
|
-
const error = e.response?.data?.error;
|
|
20
|
-
const code = e.response?.data?.code;
|
|
21
|
-
const msg = e.response?.data?.msg;
|
|
22
|
-
if (error) throw new Error(`${name} failed: [${code}] ${msg}: \n${JSON.stringify(error, null, 2)}`);
|
|
23
|
-
throw e;
|
|
24
|
-
}
|
|
25
|
-
if (res.code !== 0) throw new Error(`${name} failed: [${res.code}] ${res.msg}`);
|
|
26
|
-
return res.data;
|
|
27
|
-
}
|
|
28
|
-
async function getWikiNodeInfo(token) {
|
|
29
|
-
return (await call("getWikiNodeInfo", () => client.wiki.v2.space.getNode({ params: { token } }))).node;
|
|
30
|
-
}
|
|
31
|
-
async function getDocxDocument(docToken) {
|
|
32
|
-
const doc = (await call("getDocxDocument", () => client.docx.v1.document.get({ path: { document_id: docToken } }))).document;
|
|
33
|
-
return {
|
|
34
|
-
documentId: doc.document_id,
|
|
35
|
-
title: doc.title
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
async function getDocxBlocks(docToken) {
|
|
39
|
-
const blocks = [];
|
|
40
|
-
let pageToken;
|
|
41
|
-
for (;;) {
|
|
42
|
-
const data = await call("getDocxBlocks", () => client.docx.v1.documentBlock.list({
|
|
43
|
-
path: { document_id: docToken },
|
|
44
|
-
params: {
|
|
45
|
-
page_size: 500,
|
|
46
|
-
document_revision_id: -1,
|
|
47
|
-
page_token: pageToken
|
|
48
|
-
}
|
|
49
|
-
}));
|
|
50
|
-
if (data.items) blocks.push(...data.items);
|
|
51
|
-
if (!data.has_more) break;
|
|
52
|
-
pageToken = data.page_token;
|
|
53
|
-
}
|
|
54
|
-
return blocks;
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
*
|
|
58
|
-
* @param fileTokens 一次最多可传递 5 个素材的 token
|
|
59
|
-
* @return {Record<string, string>} Record<token, downloadLink>
|
|
60
|
-
*/
|
|
61
|
-
async function batchGetTmpDownloadUrl(fileTokens) {
|
|
62
|
-
const list = (await call("batchGetTmpDownloadUrl", () => client.drive.v1.media.batchGetTmpDownloadUrl({ params: { file_tokens: fileTokens } }))).tmp_download_urls ?? [];
|
|
63
|
-
const result = {};
|
|
64
|
-
for (const { file_token, tmp_download_url } of list) result[file_token] = tmp_download_url;
|
|
65
|
-
return result;
|
|
66
|
-
}
|
|
67
|
-
async function downloadImage(imgToken, outDir) {
|
|
68
|
-
try {
|
|
69
|
-
const resp = await client.drive.v1.media.download({ path: { file_token: imgToken } });
|
|
70
|
-
fs.mkdirSync(outDir, { recursive: true });
|
|
71
|
-
const ext = (resp.headers?.["content-type"])?.includes("png") ? ".png" : ".jpg";
|
|
72
|
-
const filename = path.join(outDir, `${imgToken}${ext}`);
|
|
73
|
-
await resp.writeFile(filename);
|
|
74
|
-
return filename;
|
|
75
|
-
} catch (error) {
|
|
76
|
-
if ([
|
|
77
|
-
400,
|
|
78
|
-
401,
|
|
79
|
-
403
|
|
80
|
-
].includes(error.status)) throw new Error(`下载图片[${imgToken}]异常, 检查是否有接口 https://open.feishu.cn/document/server-docs/docs/drive-v1/media/download 的权限。`);
|
|
81
|
-
throw error;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return {
|
|
85
|
-
getWikiNodeInfo,
|
|
86
|
-
getDocxDocument,
|
|
87
|
-
getDocxBlocks,
|
|
88
|
-
downloadImage,
|
|
89
|
-
batchGetTmpDownloadUrl
|
|
90
|
-
};
|
|
91
|
-
}
|
|
92
|
-
//#endregion
|
|
93
|
-
//#region src/parser.ts
|
|
94
|
-
const BlockType = {
|
|
95
|
-
Page: 1,
|
|
96
|
-
Text: 2,
|
|
97
|
-
Heading1: 3,
|
|
98
|
-
Heading2: 4,
|
|
99
|
-
Heading3: 5,
|
|
100
|
-
Heading4: 6,
|
|
101
|
-
Heading5: 7,
|
|
102
|
-
Heading6: 8,
|
|
103
|
-
Heading7: 9,
|
|
104
|
-
Heading8: 10,
|
|
105
|
-
Heading9: 11,
|
|
106
|
-
Bullet: 12,
|
|
107
|
-
Ordered: 13,
|
|
108
|
-
Code: 14,
|
|
109
|
-
Quote: 15,
|
|
110
|
-
Equation: 16,
|
|
111
|
-
Todo: 17,
|
|
112
|
-
Callout: 19,
|
|
113
|
-
Divider: 22,
|
|
114
|
-
Grid: 24,
|
|
115
|
-
GridColumn: 25,
|
|
116
|
-
Image: 27,
|
|
117
|
-
Table: 31,
|
|
118
|
-
TableCell: 32,
|
|
119
|
-
QuoteContainer: 34
|
|
120
|
-
};
|
|
121
|
-
const codeLangMap = {
|
|
122
|
-
1: "",
|
|
123
|
-
2: "abap",
|
|
124
|
-
3: "ada",
|
|
125
|
-
4: "apache",
|
|
126
|
-
5: "apex",
|
|
127
|
-
6: "assembly",
|
|
128
|
-
7: "bash",
|
|
129
|
-
8: "csharp",
|
|
130
|
-
9: "cpp",
|
|
131
|
-
10: "c",
|
|
132
|
-
11: "cobol",
|
|
133
|
-
12: "css",
|
|
134
|
-
13: "coffeescript",
|
|
135
|
-
14: "d",
|
|
136
|
-
15: "dart",
|
|
137
|
-
16: "delphi",
|
|
138
|
-
17: "django",
|
|
139
|
-
18: "dockerfile",
|
|
140
|
-
19: "erlang",
|
|
141
|
-
20: "fortran",
|
|
142
|
-
21: "foxpro",
|
|
143
|
-
22: "go",
|
|
144
|
-
23: "groovy",
|
|
145
|
-
24: "html",
|
|
146
|
-
25: "htmlbars",
|
|
147
|
-
26: "http",
|
|
148
|
-
27: "haskell",
|
|
149
|
-
28: "json",
|
|
150
|
-
29: "java",
|
|
151
|
-
30: "javascript",
|
|
152
|
-
31: "julia",
|
|
153
|
-
32: "kotlin",
|
|
154
|
-
33: "latex",
|
|
155
|
-
34: "lisp",
|
|
156
|
-
35: "logo",
|
|
157
|
-
36: "lua",
|
|
158
|
-
37: "matlab",
|
|
159
|
-
38: "makefile",
|
|
160
|
-
39: "markdown",
|
|
161
|
-
40: "nginx",
|
|
162
|
-
41: "objectivec",
|
|
163
|
-
42: "openedge-abl",
|
|
164
|
-
43: "php",
|
|
165
|
-
44: "perl",
|
|
166
|
-
45: "postscript",
|
|
167
|
-
46: "powershell",
|
|
168
|
-
47: "prolog",
|
|
169
|
-
48: "protobuf",
|
|
170
|
-
49: "python",
|
|
171
|
-
50: "r",
|
|
172
|
-
51: "rpg",
|
|
173
|
-
52: "ruby",
|
|
174
|
-
53: "rust",
|
|
175
|
-
54: "sas",
|
|
176
|
-
55: "scss",
|
|
177
|
-
56: "sql",
|
|
178
|
-
57: "scala",
|
|
179
|
-
58: "scheme",
|
|
180
|
-
59: "scratch",
|
|
181
|
-
60: "shell",
|
|
182
|
-
61: "swift",
|
|
183
|
-
62: "thrift",
|
|
184
|
-
63: "typescript",
|
|
185
|
-
64: "vbscript",
|
|
186
|
-
65: "vbnet",
|
|
187
|
-
66: "xml",
|
|
188
|
-
67: "yaml"
|
|
189
|
-
};
|
|
190
|
-
var Parser = class {
|
|
191
|
-
constructor() {
|
|
192
|
-
this.imgTokens = [];
|
|
193
|
-
this.blockMap = /* @__PURE__ */ new Map();
|
|
194
|
-
}
|
|
195
|
-
parseDocxContent(doc, blocks) {
|
|
196
|
-
for (const b of blocks) if (b.block_id) this.blockMap.set(b.block_id, b);
|
|
197
|
-
const entry = this.blockMap.get(doc.documentId);
|
|
198
|
-
if (!entry) return "";
|
|
199
|
-
return this.parseBlock(entry, 0);
|
|
200
|
-
}
|
|
201
|
-
parseBlock(b, indent) {
|
|
202
|
-
const prefix = " ".repeat(indent);
|
|
203
|
-
const bt = b.block_type;
|
|
204
|
-
if (bt === BlockType.Page) return this.parsePage(b);
|
|
205
|
-
if (bt === BlockType.Text) return prefix + this.parseText(b.text) + "\n";
|
|
206
|
-
if (bt >= BlockType.Heading1 && bt <= BlockType.Heading9) return prefix + this.parseHeading(b, bt - 2);
|
|
207
|
-
if (bt === BlockType.Bullet) return prefix + this.parseBullet(b, indent);
|
|
208
|
-
if (bt === BlockType.Ordered) return prefix + this.parseOrdered(b, indent);
|
|
209
|
-
if (bt === BlockType.Code) return prefix + this.parseCode(b);
|
|
210
|
-
if (bt === BlockType.Quote) return prefix + "> " + this.parseText(b.quote) + "\n";
|
|
211
|
-
if (bt === BlockType.Equation) return prefix + "$$\n" + this.parseText(b.equation) + "$$\n\n";
|
|
212
|
-
if (bt === BlockType.Todo) return prefix + this.parseTodo(b);
|
|
213
|
-
if (bt === BlockType.Callout) return this.parseCallout(b);
|
|
214
|
-
if (bt === BlockType.Divider) return prefix + "---\n\n";
|
|
215
|
-
if (bt === BlockType.Image) return prefix + this.parseImage(b) + "\n";
|
|
216
|
-
if (bt === BlockType.Table) return prefix + this.parseTable(b);
|
|
217
|
-
if (bt === BlockType.TableCell) return this.parseTableCell(b);
|
|
218
|
-
if (bt === BlockType.QuoteContainer) return this.parseQuoteContainer(b);
|
|
219
|
-
if (bt === BlockType.Grid) return this.parseGrid(b, indent);
|
|
220
|
-
return "";
|
|
221
|
-
}
|
|
222
|
-
parsePage(b) {
|
|
223
|
-
let s = "# " + this.parseText(b.page) + "\n";
|
|
224
|
-
for (const id of b.children ?? []) {
|
|
225
|
-
const child = this.blockMap.get(id);
|
|
226
|
-
if (child) s += this.parseBlock(child, 0) + "\n";
|
|
227
|
-
}
|
|
228
|
-
return s;
|
|
229
|
-
}
|
|
230
|
-
parseText(body) {
|
|
231
|
-
const inline = body.elements.length > 1;
|
|
232
|
-
return body.elements.map((e) => this.parseElement(e, inline)).join("") + "\n";
|
|
233
|
-
}
|
|
234
|
-
parseElement(e, inline) {
|
|
235
|
-
if (e.text_run) return this.parseTextRun(e.text_run);
|
|
236
|
-
if (e.mention_user) return e.mention_user.user_id;
|
|
237
|
-
if (e.mention_doc) {
|
|
238
|
-
const url = e.mention_doc.url ? decodeURIComponent(e.mention_doc.url) : "";
|
|
239
|
-
return `[${e.mention_doc.title ?? ""}](${url})`;
|
|
240
|
-
}
|
|
241
|
-
if (e.equation) {
|
|
242
|
-
const sym = inline ? "$" : "$$";
|
|
243
|
-
return sym + e.equation.content.replace(/\n$/, "") + sym;
|
|
244
|
-
}
|
|
245
|
-
return "";
|
|
246
|
-
}
|
|
247
|
-
parseTextRun(tr) {
|
|
248
|
-
const s = tr.text_element_style;
|
|
249
|
-
let pre = "", post = "";
|
|
250
|
-
if (s) {
|
|
251
|
-
if (s.bold) {
|
|
252
|
-
pre = "**";
|
|
253
|
-
post = "**";
|
|
254
|
-
} else if (s.italic) {
|
|
255
|
-
pre = "_";
|
|
256
|
-
post = "_";
|
|
257
|
-
} else if (s.strikethrough) {
|
|
258
|
-
pre = "~~";
|
|
259
|
-
post = "~~";
|
|
260
|
-
} else if (s.underline) {
|
|
261
|
-
pre = "<u>";
|
|
262
|
-
post = "</u>";
|
|
263
|
-
} else if (s.inline_code) {
|
|
264
|
-
pre = "`";
|
|
265
|
-
post = "`";
|
|
266
|
-
} else if (s.link) {
|
|
267
|
-
pre = "[";
|
|
268
|
-
post = `](${decodeURIComponent(s.link.url)})`;
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
return pre + tr.content + post;
|
|
272
|
-
}
|
|
273
|
-
parseHeading(b, level) {
|
|
274
|
-
const body = b[`heading${level}`];
|
|
275
|
-
let s = "#".repeat(level) + " " + (body ? this.parseText(body) : "\n");
|
|
276
|
-
for (const id of b.children ?? []) {
|
|
277
|
-
const child = this.blockMap.get(id);
|
|
278
|
-
if (child) s += this.parseBlock(child, 0);
|
|
279
|
-
}
|
|
280
|
-
return s;
|
|
281
|
-
}
|
|
282
|
-
parseBullet(b, indent) {
|
|
283
|
-
let s = "- " + this.parseText(b.bullet);
|
|
284
|
-
for (const id of b.children ?? []) {
|
|
285
|
-
const child = this.blockMap.get(id);
|
|
286
|
-
if (child) s += this.parseBlock(child, indent + 1);
|
|
287
|
-
}
|
|
288
|
-
return s;
|
|
289
|
-
}
|
|
290
|
-
parseOrdered(b, indent) {
|
|
291
|
-
const parent = this.blockMap.get(b.parent_id);
|
|
292
|
-
let order = 1;
|
|
293
|
-
if (parent?.children) {
|
|
294
|
-
const idx = parent.children.indexOf(b.block_id);
|
|
295
|
-
for (let i = idx - 1; i >= 0; i--) if (this.blockMap.get(parent.children[i])?.block_type === BlockType.Ordered) order++;
|
|
296
|
-
else break;
|
|
297
|
-
}
|
|
298
|
-
let s = `${order}. ` + this.parseText(b.ordered);
|
|
299
|
-
for (const id of b.children ?? []) {
|
|
300
|
-
const child = this.blockMap.get(id);
|
|
301
|
-
if (child) s += this.parseBlock(child, indent + 1);
|
|
302
|
-
}
|
|
303
|
-
return s;
|
|
304
|
-
}
|
|
305
|
-
parseCode(b) {
|
|
306
|
-
const lang = codeLangMap[b.code?.style?.language ?? 1] ?? "";
|
|
307
|
-
const text = this.parseText(b.code).trim();
|
|
308
|
-
return "```" + lang + "\n" + text + "\n```\n";
|
|
309
|
-
}
|
|
310
|
-
parseTodo(b) {
|
|
311
|
-
return `- [${b.todo?.style?.done ? "x" : " "}] ` + this.parseText(b.todo) + "\n";
|
|
312
|
-
}
|
|
313
|
-
parseCallout(b) {
|
|
314
|
-
let s = ">[!TIP] \n";
|
|
315
|
-
for (const id of b.children ?? []) {
|
|
316
|
-
const child = this.blockMap.get(id);
|
|
317
|
-
if (child) s += this.parseBlock(child, 0);
|
|
318
|
-
}
|
|
319
|
-
return s;
|
|
320
|
-
}
|
|
321
|
-
parseImage(b) {
|
|
322
|
-
const token = b.image?.token;
|
|
323
|
-
if (token) {
|
|
324
|
-
this.imgTokens.push(token);
|
|
325
|
-
return `\n`;
|
|
326
|
-
}
|
|
327
|
-
return "";
|
|
328
|
-
}
|
|
329
|
-
parseTableCell(b) {
|
|
330
|
-
let s = "";
|
|
331
|
-
for (const id of b.children ?? []) {
|
|
332
|
-
const child = this.blockMap.get(id);
|
|
333
|
-
if (child) s += this.parseBlock(child, 0).replace(/\n/g, "") + "<br/>";
|
|
334
|
-
}
|
|
335
|
-
return s;
|
|
336
|
-
}
|
|
337
|
-
parseTable(b) {
|
|
338
|
-
const t = b.table;
|
|
339
|
-
const cols = t.property.column_size;
|
|
340
|
-
const rows = [];
|
|
341
|
-
const mergeInfos = t.property.merge_info ?? [];
|
|
342
|
-
for (let i = 0; i < (t.cells?.length ?? 0); i++) {
|
|
343
|
-
const cellId = t.cells[i];
|
|
344
|
-
const cell = this.blockMap.get(cellId);
|
|
345
|
-
const content = cell ? this.parseBlock(cell, 0).replace(/\n/g, "") : "";
|
|
346
|
-
const row = Math.floor(i / cols);
|
|
347
|
-
const col = i % cols;
|
|
348
|
-
if (!rows[row]) rows[row] = [];
|
|
349
|
-
rows[row][col] = content;
|
|
350
|
-
}
|
|
351
|
-
const mergeMap = /* @__PURE__ */ new Map();
|
|
352
|
-
for (let i = 0; i < mergeInfos.length; i++) {
|
|
353
|
-
const m = mergeInfos[i];
|
|
354
|
-
const row = Math.floor(i / cols);
|
|
355
|
-
const col = i % cols;
|
|
356
|
-
mergeMap.set(`${row}-${col}`, {
|
|
357
|
-
rowSpan: m.row_span ?? 1,
|
|
358
|
-
colSpan: m.col_span ?? 1
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
|
-
const processed = /* @__PURE__ */ new Set();
|
|
362
|
-
let buf = "<table>\n";
|
|
363
|
-
for (let r = 0; r < rows.length; r++) {
|
|
364
|
-
buf += "<tr>\n";
|
|
365
|
-
for (let c = 0; c < (rows[r]?.length ?? 0); c++) {
|
|
366
|
-
const key = `${r}-${c}`;
|
|
367
|
-
if (processed.has(key)) continue;
|
|
368
|
-
const merge = mergeMap.get(key);
|
|
369
|
-
let attrs = "";
|
|
370
|
-
if (merge) {
|
|
371
|
-
if (merge.rowSpan > 1) attrs += ` rowspan="${merge.rowSpan}"`;
|
|
372
|
-
if (merge.colSpan > 1) attrs += ` colspan="${merge.colSpan}"`;
|
|
373
|
-
for (let mr = r; mr < r + merge.rowSpan; mr++) for (let mc = c; mc < c + merge.colSpan; mc++) processed.add(`${mr}-${mc}`);
|
|
374
|
-
}
|
|
375
|
-
buf += `<td${attrs}>${rows[r][c] ?? ""}</td>`;
|
|
376
|
-
}
|
|
377
|
-
buf += "</tr>\n";
|
|
378
|
-
}
|
|
379
|
-
buf += "</table>\n";
|
|
380
|
-
return buf;
|
|
381
|
-
}
|
|
382
|
-
parseQuoteContainer(b) {
|
|
383
|
-
let s = "";
|
|
384
|
-
for (const id of b.children ?? []) {
|
|
385
|
-
const child = this.blockMap.get(id);
|
|
386
|
-
if (child) s += "> " + this.parseBlock(child, 0);
|
|
387
|
-
}
|
|
388
|
-
return s;
|
|
389
|
-
}
|
|
390
|
-
parseGrid(b, indent) {
|
|
391
|
-
let s = "";
|
|
392
|
-
for (const colId of b.children ?? []) {
|
|
393
|
-
const col = this.blockMap.get(colId);
|
|
394
|
-
if (!col) continue;
|
|
395
|
-
for (const id of col.children ?? []) {
|
|
396
|
-
const child = this.blockMap.get(id);
|
|
397
|
-
if (child) s += this.parseBlock(child, indent);
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
return s;
|
|
401
|
-
}
|
|
402
|
-
};
|
|
403
|
-
//#endregion
|
|
404
|
-
//#region src/logger.ts
|
|
405
|
-
const COLORS = {
|
|
406
|
-
[LoggerLevel.fatal]: "\x1B[35m",
|
|
407
|
-
[LoggerLevel.error]: "\x1B[31m",
|
|
408
|
-
[LoggerLevel.warn]: "\x1B[33m",
|
|
409
|
-
[LoggerLevel.info]: "\x1B[36m",
|
|
410
|
-
[LoggerLevel.debug]: "\x1B[32m",
|
|
411
|
-
[LoggerLevel.trace]: "\x1B[90m"
|
|
412
|
-
};
|
|
413
|
-
const RESET = "\x1B[0m";
|
|
414
|
-
const LEVEL_NAMES = [
|
|
415
|
-
"FATAL",
|
|
416
|
-
"ERROR",
|
|
417
|
-
"WARN",
|
|
418
|
-
"INFO",
|
|
419
|
-
"DEBUG",
|
|
420
|
-
"TRACE"
|
|
421
|
-
];
|
|
422
|
-
let minLogLevel = LoggerLevel.trace;
|
|
423
|
-
function log(level, module, ...args) {
|
|
424
|
-
if (level > minLogLevel) return;
|
|
425
|
-
const time = (/* @__PURE__ */ new Date()).toISOString();
|
|
426
|
-
const color = COLORS[level] ?? "";
|
|
427
|
-
const name = LEVEL_NAMES[level] ?? "INFO";
|
|
428
|
-
process.stderr.write(`${time} ${color}[${name}]${RESET} [${module}] ${args.map(String).join(" ")}\n`);
|
|
429
|
-
}
|
|
430
|
-
function setLogLevel(level) {
|
|
431
|
-
minLogLevel = level;
|
|
432
|
-
}
|
|
433
|
-
function createLogger(module) {
|
|
434
|
-
return {
|
|
435
|
-
fatal: (...args) => log(LoggerLevel.fatal, module, ...args),
|
|
436
|
-
error: (...args) => log(LoggerLevel.error, module, ...args),
|
|
437
|
-
warn: (...args) => log(LoggerLevel.warn, module, ...args),
|
|
438
|
-
info: (...args) => log(LoggerLevel.info, module, ...args),
|
|
439
|
-
debug: (...args) => log(LoggerLevel.debug, module, ...args),
|
|
440
|
-
trace: (...args) => log(LoggerLevel.trace, module, ...args)
|
|
441
|
-
};
|
|
442
|
-
}
|
|
443
|
-
//#endregion
|
|
444
5
|
//#region src/cli.ts
|
|
445
|
-
const logger = createLogger("cli");
|
|
446
|
-
function parseWikiUrl(url) {
|
|
447
|
-
const m = url.match(/^https:\/\/[\w.-]+\/(docs|docx|wiki)\/([a-zA-Z0-9]+)/);
|
|
448
|
-
if (!m) throw new Error("Invalid feishu document URL");
|
|
449
|
-
return {
|
|
450
|
-
docType: m[1],
|
|
451
|
-
docToken: m[2]
|
|
452
|
-
};
|
|
453
|
-
}
|
|
454
6
|
const program = new Command();
|
|
455
7
|
program.name("larkDocx2md").description("Download Lark/Feishu documents to markdown");
|
|
456
|
-
program.command("download").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory
|
|
8
|
+
program.command("download").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory (or LARK_DOCX2MD_OUTPUT)").option("--agent", "Enable agent mode: ERROR log level, and AI prompt output (or LARK_DOCX2MD_AGENT=true)").option("--wb-format <format>", "Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)").option("--wb-bg <style>", "Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)").option("--wb-image-mode <mode>", "Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)").option("--image-mode <mode>", "Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)").argument("<url>", "Feishu wiki document URL: https://*.feishu.cn/wiki/*").action(async (url, opts) => {
|
|
9
|
+
opts.appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;
|
|
10
|
+
opts.appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;
|
|
11
|
+
opts.output = opts.output ?? process.env.LARK_DOCX2MD_OUTPUT ?? "./larkDocx2mdOutput";
|
|
12
|
+
opts.agent = opts.agent ?? process.env.LARK_DOCX2MD_AGENT === "true";
|
|
13
|
+
opts.imageMode = opts.imageMode ?? process.env.LARK_DOCX2MD_IMAGE_MODE ?? "local";
|
|
14
|
+
opts.wbFormat = opts.wbFormat ?? process.env.LARK_DOCX2MD_WB_FORMAT;
|
|
15
|
+
opts.wbBg = opts.wbBg ?? process.env.LARK_DOCX2MD_WB_BG ?? "none";
|
|
16
|
+
opts.wbImageMode = opts.wbImageMode ?? process.env.LARK_DOCX2MD_WB_IMAGE_MODE ?? "local";
|
|
17
|
+
if (!opts.wbFormat) opts.wbFormat = opts.agent ? "yaml" : "svg";
|
|
457
18
|
if (opts.agent) {
|
|
458
19
|
setLogLevel(LoggerLevel.error);
|
|
459
20
|
opts.imageMode = "online";
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
21
|
+
opts.wbImageMode = "online";
|
|
22
|
+
if (!["inline-svg", "yaml"].includes(opts.wbFormat)) program.error(`Agent mode only supports "inline-svg" or "yaml" for --wb-format`);
|
|
23
|
+
opts.wbImageMode = "online";
|
|
24
|
+
}
|
|
25
|
+
if (opts.imageMode && !["local", "online"].includes(opts.imageMode)) program.error(`Invalid --image-mode "${opts.imageMode}", must be "local" or "online"`);
|
|
26
|
+
if (![
|
|
27
|
+
"base64",
|
|
28
|
+
"inline-svg",
|
|
29
|
+
"svg",
|
|
30
|
+
"yaml"
|
|
31
|
+
].includes(opts.wbFormat)) program.error(`Invalid --wb-format "${opts.wbFormat}", must be "base64", "inline-svg", "svg", or "yaml"`);
|
|
32
|
+
if (opts.wbFormat === "yaml") opts.wbImageMode = "online";
|
|
33
|
+
if (![
|
|
34
|
+
"online",
|
|
35
|
+
"base64",
|
|
36
|
+
"local"
|
|
37
|
+
].includes(opts.wbImageMode)) program.error(`Invalid --wb-image-mode "${opts.wbImageMode}", must be "online", "base64", or "local"`);
|
|
38
|
+
const appId = opts.appId;
|
|
39
|
+
const appSecret = opts.appSecret;
|
|
465
40
|
if (!appId || !appSecret) program.error("Missing credentials: pass --app-id/--app-secret or set LARK_DOCX2MD_APP_ID/LARK_DOCX2MD_APP_SECRET");
|
|
466
|
-
const
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
if (opts.
|
|
478
|
-
const batch = parser.imgTokens.slice(i, i + 5);
|
|
479
|
-
const urlMap = await client.batchGetTmpDownloadUrl(batch);
|
|
480
|
-
for (const token of batch) {
|
|
481
|
-
const onlineUrl = urlMap[token];
|
|
482
|
-
if (onlineUrl) {
|
|
483
|
-
markdown = markdown.replace(`(${token})`, `(${onlineUrl})`);
|
|
484
|
-
logger.info("Replaced image with online URL:", token);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
else {
|
|
489
|
-
const imgDir = path.join(opts.output, "static");
|
|
490
|
-
for (const imgToken of parser.imgTokens) {
|
|
491
|
-
let localPath = await client.downloadImage(imgToken, imgDir);
|
|
492
|
-
localPath = path.relative(opts.output, localPath);
|
|
493
|
-
markdown = markdown.replace(`(${imgToken})`, `(${localPath})`);
|
|
494
|
-
logger.info("Downloaded image:", localPath);
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
if (opts.agent) process.stdout.write(markdown);
|
|
498
|
-
else {
|
|
499
|
-
fs.mkdirSync(opts.output, { recursive: true });
|
|
500
|
-
const mdPath = path.join(opts.output, `${docToken}.md`);
|
|
501
|
-
fs.writeFileSync(mdPath, markdown);
|
|
502
|
-
logger.info("Downloaded markdown file to", mdPath);
|
|
503
|
-
}
|
|
41
|
+
const result = await convert({
|
|
42
|
+
appId,
|
|
43
|
+
appSecret,
|
|
44
|
+
url,
|
|
45
|
+
output: opts.output,
|
|
46
|
+
imageMode: opts.imageMode,
|
|
47
|
+
wbImageMode: opts.wbImageMode,
|
|
48
|
+
wbBg: opts.wbBg,
|
|
49
|
+
wbFormat: opts.wbFormat,
|
|
50
|
+
agent: opts.agent
|
|
51
|
+
});
|
|
52
|
+
if (opts.agent) process.stdout.write(result.markdown);
|
|
504
53
|
});
|
|
505
54
|
program.parse();
|
|
506
55
|
//#endregion
|