lark-docx2md 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@
11
11
  > 命令所需权限见下 ‘飞书自创应用需要的权限’
12
12
 
13
13
  ```bash
14
- npx -y lark-docx2md download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --app-secret xxxx
14
+ npx -y lark-docx2md@latest download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --app-secret xxxx
15
15
  ```
16
16
 
17
17
  或先设置环境变量(命令行参数可省略):
@@ -19,23 +19,37 @@ npx -y lark-docx2md download https://xxx.feishu.cn/wiki/xxx --app-id cli_xxx --a
19
19
  ```bash
20
20
  export LARK_DOCX2MD_APP_ID=<APP_ID>
21
21
  export LARK_DOCX2MD_APP_SECRET=<APP_SECRET>
22
- npx -y lark-docx2md download <url>
22
+ npx -y lark-docx2md@latest download <url>
23
23
  ```
24
24
 
25
- | 参数 | 说明 |
26
- |-----------------------|-----------------------------------------------------------|
27
- | `<url>` | 飞书文档链接 |
28
- | `--app-id` | 飞书应用 App ID(可选;默认读取 `LARK_DOCX2MD_APP_ID`) |
29
- | `--app-secret` | 飞书应用 App Secret(可选;默认读取 `LARK_DOCX2MD_APP_SECRET`) |
30
- | `-o, --output <dir>` | 输出目录(默认:`./larkDocx2mdOutput`) |
31
- | `--agent` | Agent 模式:日志等级为 ERROR,image-mod `online`,Markdown 输出到标准流 |
32
- | `--image-mode <mode>` | 图片处理模式:`local`(下载到本地,默认)或 `online`(使用 24h 临时在线链接) |
25
+ | 参数 | 说明 | 环境变量 | 默认值 |
26
+ |--------------------------|-------------------------------------------|------------------------------|-----------------------|
27
+ | `<url>` | 飞书文档链接(`https://*.feishu.cn/wiki/*`) | — | — |
28
+ | `--app-id <id>` | 飞书应用 App ID | `LARK_DOCX2MD_APP_ID` | — |
29
+ | `--app-secret <secret>` | 飞书应用 App Secret | `LARK_DOCX2MD_APP_SECRET` | — |
30
+ | `-o, --output <dir>` | 输出目录 | `LARK_DOCX2MD_OUTPUT` | `./larkDocx2mdOutput` |
31
+ | `--agent` | Agent 模式:日志 ERROR,Markdown 输出到 stdout | `LARK_DOCX2MD_AGENT=true` | `false` |
32
+ | `--image-mode <mode>` | 图片处理模式:`local`(下载到本地)或 `online`(24h 临时链接) | `LARK_DOCX2MD_IMAGE_MODE` | `local` |
33
+ | `--wb-format <format>` | 画板输出格式:`base64`、`inline-svg`、`svg`、`yaml` | `LARK_DOCX2MD_WB_FORMAT` | `svg` |
34
+ | `--wb-bg <style>` | 画板 SVG 背景:`none`、`dot` 或颜色值如 `#fff` | `LARK_DOCX2MD_WB_BG` | `none` |
35
+ | `--wb-image-mode <mode>` | 画板图片模式:`online`、`base64` 或 `local` | `LARK_DOCX2MD_WB_IMAGE_MODE` | `local` |
36
+
37
+ > **参数联动规则**
38
+ >
39
+ > - `--agent` 开启时:`--image-mode` 强制为 `online`,`--wb-image-mode` 强制为 `online`,`--wb-format` 默认为 `yaml`(仅允许
40
+ `inline-svg` / `yaml`)。
41
+ > - `--wb-format yaml` 时:`--wb-image-mode` 强制为 `online`。
42
+
43
+
44
+ **`--agent` 开启时,飞书文档转换后的内容会直接通过标准流输出**
33
45
 
34
46
  ## 功能
35
47
 
36
48
  - 支持飞书 Wiki 文档下载
37
49
  - 转换 20+ 种块类型
38
50
  - 输出标准 Markdown 文件
51
+ - 支持飞书画板,输出格式:`base64`(data URI 内嵌)、`inline-svg`(SVG 标签内嵌)、`svg`(独立文件)、`yaml`(AI
52
+ 友好结构化数据)。详见 [画板支持说明](./WHITEBOARD.md)
39
53
 
40
54
  ### 支持的内容块类型
41
55
 
@@ -71,7 +85,7 @@ npx -y lark-docx2md download <url>
71
85
  | @用户 | 用户 ID |
72
86
  | @文档 | `[标题](url)` |
73
87
 
74
- > 未支持的块类型(如文件附件、视频、内嵌表格、画板等)会被静默忽略。
88
+ > 未支持的块类型(如文件附件、视频、内嵌表格等)会被静默忽略。
75
89
 
76
90
  ## 开发
77
91
 
@@ -94,14 +108,19 @@ pnpm build
94
108
  {
95
109
  "scopes": {
96
110
  "tenant": [
111
+ "base:app:read",
112
+ "bitable:app",
113
+ "bitable:app:readonly",
114
+ "board:whiteboard:node:read",
115
+ "contact:user.employee_id:readonly",
97
116
  "docs:document.media:download",
117
+ "docx:document",
98
118
  "docx:document:readonly",
99
- "wiki:node:read"
119
+ "wiki:node:read",
120
+ "wiki:wiki",
121
+ "wiki:wiki:readonly"
100
122
  ],
101
- "user": [
102
- "docx:document:readonly",
103
- "wiki:node:read"
104
- ]
123
+ "user": []
105
124
  }
106
125
  }
107
126
  ```
package/dist/cli.js CHANGED
@@ -1,506 +1,55 @@
1
1
  #!/usr/bin/env node
2
+ import { r as setLogLevel, t as convert } from "./converter-ByfbJV0N.js";
2
3
  import { Command } from "commander";
3
- import * as fs from "node:fs";
4
- import * as path from "node:path";
5
- import * as lark from "@larksuiteoapi/node-sdk";
6
4
  import { LoggerLevel } from "@larksuiteoapi/node-sdk";
7
- //#region src/client.ts
8
- function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
9
- const client = new lark.Client({
10
- appId,
11
- appSecret,
12
- loggerLevel
13
- });
14
- async function call(name, fn) {
15
- let res;
16
- try {
17
- res = await fn();
18
- } catch (e) {
19
- const error = e.response?.data?.error;
20
- const code = e.response?.data?.code;
21
- const msg = e.response?.data?.msg;
22
- if (error) throw new Error(`${name} failed: [${code}] ${msg}: \n${JSON.stringify(error, null, 2)}`);
23
- throw e;
24
- }
25
- if (res.code !== 0) throw new Error(`${name} failed: [${res.code}] ${res.msg}`);
26
- return res.data;
27
- }
28
- async function getWikiNodeInfo(token) {
29
- return (await call("getWikiNodeInfo", () => client.wiki.v2.space.getNode({ params: { token } }))).node;
30
- }
31
- async function getDocxDocument(docToken) {
32
- const doc = (await call("getDocxDocument", () => client.docx.v1.document.get({ path: { document_id: docToken } }))).document;
33
- return {
34
- documentId: doc.document_id,
35
- title: doc.title
36
- };
37
- }
38
- async function getDocxBlocks(docToken) {
39
- const blocks = [];
40
- let pageToken;
41
- for (;;) {
42
- const data = await call("getDocxBlocks", () => client.docx.v1.documentBlock.list({
43
- path: { document_id: docToken },
44
- params: {
45
- page_size: 500,
46
- document_revision_id: -1,
47
- page_token: pageToken
48
- }
49
- }));
50
- if (data.items) blocks.push(...data.items);
51
- if (!data.has_more) break;
52
- pageToken = data.page_token;
53
- }
54
- return blocks;
55
- }
56
- /**
57
- *
58
- * @param fileTokens 一次最多可传递 5 个素材的 token
59
- * @return {Record<string, string>} Record<token, downloadLink>
60
- */
61
- async function batchGetTmpDownloadUrl(fileTokens) {
62
- const list = (await call("batchGetTmpDownloadUrl", () => client.drive.v1.media.batchGetTmpDownloadUrl({ params: { file_tokens: fileTokens } }))).tmp_download_urls ?? [];
63
- const result = {};
64
- for (const { file_token, tmp_download_url } of list) result[file_token] = tmp_download_url;
65
- return result;
66
- }
67
- async function downloadImage(imgToken, outDir) {
68
- try {
69
- const resp = await client.drive.v1.media.download({ path: { file_token: imgToken } });
70
- fs.mkdirSync(outDir, { recursive: true });
71
- const ext = (resp.headers?.["content-type"])?.includes("png") ? ".png" : ".jpg";
72
- const filename = path.join(outDir, `${imgToken}${ext}`);
73
- await resp.writeFile(filename);
74
- return filename;
75
- } catch (error) {
76
- if ([
77
- 400,
78
- 401,
79
- 403
80
- ].includes(error.status)) throw new Error(`下载图片[${imgToken}]异常, 检查是否有接口 https://open.feishu.cn/document/server-docs/docs/drive-v1/media/download 的权限。`);
81
- throw error;
82
- }
83
- }
84
- return {
85
- getWikiNodeInfo,
86
- getDocxDocument,
87
- getDocxBlocks,
88
- downloadImage,
89
- batchGetTmpDownloadUrl
90
- };
91
- }
92
- //#endregion
93
- //#region src/parser.ts
94
- const BlockType = {
95
- Page: 1,
96
- Text: 2,
97
- Heading1: 3,
98
- Heading2: 4,
99
- Heading3: 5,
100
- Heading4: 6,
101
- Heading5: 7,
102
- Heading6: 8,
103
- Heading7: 9,
104
- Heading8: 10,
105
- Heading9: 11,
106
- Bullet: 12,
107
- Ordered: 13,
108
- Code: 14,
109
- Quote: 15,
110
- Equation: 16,
111
- Todo: 17,
112
- Callout: 19,
113
- Divider: 22,
114
- Grid: 24,
115
- GridColumn: 25,
116
- Image: 27,
117
- Table: 31,
118
- TableCell: 32,
119
- QuoteContainer: 34
120
- };
121
- const codeLangMap = {
122
- 1: "",
123
- 2: "abap",
124
- 3: "ada",
125
- 4: "apache",
126
- 5: "apex",
127
- 6: "assembly",
128
- 7: "bash",
129
- 8: "csharp",
130
- 9: "cpp",
131
- 10: "c",
132
- 11: "cobol",
133
- 12: "css",
134
- 13: "coffeescript",
135
- 14: "d",
136
- 15: "dart",
137
- 16: "delphi",
138
- 17: "django",
139
- 18: "dockerfile",
140
- 19: "erlang",
141
- 20: "fortran",
142
- 21: "foxpro",
143
- 22: "go",
144
- 23: "groovy",
145
- 24: "html",
146
- 25: "htmlbars",
147
- 26: "http",
148
- 27: "haskell",
149
- 28: "json",
150
- 29: "java",
151
- 30: "javascript",
152
- 31: "julia",
153
- 32: "kotlin",
154
- 33: "latex",
155
- 34: "lisp",
156
- 35: "logo",
157
- 36: "lua",
158
- 37: "matlab",
159
- 38: "makefile",
160
- 39: "markdown",
161
- 40: "nginx",
162
- 41: "objectivec",
163
- 42: "openedge-abl",
164
- 43: "php",
165
- 44: "perl",
166
- 45: "postscript",
167
- 46: "powershell",
168
- 47: "prolog",
169
- 48: "protobuf",
170
- 49: "python",
171
- 50: "r",
172
- 51: "rpg",
173
- 52: "ruby",
174
- 53: "rust",
175
- 54: "sas",
176
- 55: "scss",
177
- 56: "sql",
178
- 57: "scala",
179
- 58: "scheme",
180
- 59: "scratch",
181
- 60: "shell",
182
- 61: "swift",
183
- 62: "thrift",
184
- 63: "typescript",
185
- 64: "vbscript",
186
- 65: "vbnet",
187
- 66: "xml",
188
- 67: "yaml"
189
- };
190
- var Parser = class {
191
- constructor() {
192
- this.imgTokens = [];
193
- this.blockMap = /* @__PURE__ */ new Map();
194
- }
195
- parseDocxContent(doc, blocks) {
196
- for (const b of blocks) if (b.block_id) this.blockMap.set(b.block_id, b);
197
- const entry = this.blockMap.get(doc.documentId);
198
- if (!entry) return "";
199
- return this.parseBlock(entry, 0);
200
- }
201
- parseBlock(b, indent) {
202
- const prefix = " ".repeat(indent);
203
- const bt = b.block_type;
204
- if (bt === BlockType.Page) return this.parsePage(b);
205
- if (bt === BlockType.Text) return prefix + this.parseText(b.text) + "\n";
206
- if (bt >= BlockType.Heading1 && bt <= BlockType.Heading9) return prefix + this.parseHeading(b, bt - 2);
207
- if (bt === BlockType.Bullet) return prefix + this.parseBullet(b, indent);
208
- if (bt === BlockType.Ordered) return prefix + this.parseOrdered(b, indent);
209
- if (bt === BlockType.Code) return prefix + this.parseCode(b);
210
- if (bt === BlockType.Quote) return prefix + "> " + this.parseText(b.quote) + "\n";
211
- if (bt === BlockType.Equation) return prefix + "$$\n" + this.parseText(b.equation) + "$$\n\n";
212
- if (bt === BlockType.Todo) return prefix + this.parseTodo(b);
213
- if (bt === BlockType.Callout) return this.parseCallout(b);
214
- if (bt === BlockType.Divider) return prefix + "---\n\n";
215
- if (bt === BlockType.Image) return prefix + this.parseImage(b) + "\n";
216
- if (bt === BlockType.Table) return prefix + this.parseTable(b);
217
- if (bt === BlockType.TableCell) return this.parseTableCell(b);
218
- if (bt === BlockType.QuoteContainer) return this.parseQuoteContainer(b);
219
- if (bt === BlockType.Grid) return this.parseGrid(b, indent);
220
- return "";
221
- }
222
- parsePage(b) {
223
- let s = "# " + this.parseText(b.page) + "\n";
224
- for (const id of b.children ?? []) {
225
- const child = this.blockMap.get(id);
226
- if (child) s += this.parseBlock(child, 0) + "\n";
227
- }
228
- return s;
229
- }
230
- parseText(body) {
231
- const inline = body.elements.length > 1;
232
- return body.elements.map((e) => this.parseElement(e, inline)).join("") + "\n";
233
- }
234
- parseElement(e, inline) {
235
- if (e.text_run) return this.parseTextRun(e.text_run);
236
- if (e.mention_user) return e.mention_user.user_id;
237
- if (e.mention_doc) {
238
- const url = e.mention_doc.url ? decodeURIComponent(e.mention_doc.url) : "";
239
- return `[${e.mention_doc.title ?? ""}](${url})`;
240
- }
241
- if (e.equation) {
242
- const sym = inline ? "$" : "$$";
243
- return sym + e.equation.content.replace(/\n$/, "") + sym;
244
- }
245
- return "";
246
- }
247
- parseTextRun(tr) {
248
- const s = tr.text_element_style;
249
- let pre = "", post = "";
250
- if (s) {
251
- if (s.bold) {
252
- pre = "**";
253
- post = "**";
254
- } else if (s.italic) {
255
- pre = "_";
256
- post = "_";
257
- } else if (s.strikethrough) {
258
- pre = "~~";
259
- post = "~~";
260
- } else if (s.underline) {
261
- pre = "<u>";
262
- post = "</u>";
263
- } else if (s.inline_code) {
264
- pre = "`";
265
- post = "`";
266
- } else if (s.link) {
267
- pre = "[";
268
- post = `](${decodeURIComponent(s.link.url)})`;
269
- }
270
- }
271
- return pre + tr.content + post;
272
- }
273
- parseHeading(b, level) {
274
- const body = b[`heading${level}`];
275
- let s = "#".repeat(level) + " " + (body ? this.parseText(body) : "\n");
276
- for (const id of b.children ?? []) {
277
- const child = this.blockMap.get(id);
278
- if (child) s += this.parseBlock(child, 0);
279
- }
280
- return s;
281
- }
282
- parseBullet(b, indent) {
283
- let s = "- " + this.parseText(b.bullet);
284
- for (const id of b.children ?? []) {
285
- const child = this.blockMap.get(id);
286
- if (child) s += this.parseBlock(child, indent + 1);
287
- }
288
- return s;
289
- }
290
- parseOrdered(b, indent) {
291
- const parent = this.blockMap.get(b.parent_id);
292
- let order = 1;
293
- if (parent?.children) {
294
- const idx = parent.children.indexOf(b.block_id);
295
- for (let i = idx - 1; i >= 0; i--) if (this.blockMap.get(parent.children[i])?.block_type === BlockType.Ordered) order++;
296
- else break;
297
- }
298
- let s = `${order}. ` + this.parseText(b.ordered);
299
- for (const id of b.children ?? []) {
300
- const child = this.blockMap.get(id);
301
- if (child) s += this.parseBlock(child, indent + 1);
302
- }
303
- return s;
304
- }
305
- parseCode(b) {
306
- const lang = codeLangMap[b.code?.style?.language ?? 1] ?? "";
307
- const text = this.parseText(b.code).trim();
308
- return "```" + lang + "\n" + text + "\n```\n";
309
- }
310
- parseTodo(b) {
311
- return `- [${b.todo?.style?.done ? "x" : " "}] ` + this.parseText(b.todo) + "\n";
312
- }
313
- parseCallout(b) {
314
- let s = ">[!TIP] \n";
315
- for (const id of b.children ?? []) {
316
- const child = this.blockMap.get(id);
317
- if (child) s += this.parseBlock(child, 0);
318
- }
319
- return s;
320
- }
321
- parseImage(b) {
322
- const token = b.image?.token;
323
- if (token) {
324
- this.imgTokens.push(token);
325
- return `![图片-${token}](${token})\n`;
326
- }
327
- return "";
328
- }
329
- parseTableCell(b) {
330
- let s = "";
331
- for (const id of b.children ?? []) {
332
- const child = this.blockMap.get(id);
333
- if (child) s += this.parseBlock(child, 0).replace(/\n/g, "") + "<br/>";
334
- }
335
- return s;
336
- }
337
- parseTable(b) {
338
- const t = b.table;
339
- const cols = t.property.column_size;
340
- const rows = [];
341
- const mergeInfos = t.property.merge_info ?? [];
342
- for (let i = 0; i < (t.cells?.length ?? 0); i++) {
343
- const cellId = t.cells[i];
344
- const cell = this.blockMap.get(cellId);
345
- const content = cell ? this.parseBlock(cell, 0).replace(/\n/g, "") : "";
346
- const row = Math.floor(i / cols);
347
- const col = i % cols;
348
- if (!rows[row]) rows[row] = [];
349
- rows[row][col] = content;
350
- }
351
- const mergeMap = /* @__PURE__ */ new Map();
352
- for (let i = 0; i < mergeInfos.length; i++) {
353
- const m = mergeInfos[i];
354
- const row = Math.floor(i / cols);
355
- const col = i % cols;
356
- mergeMap.set(`${row}-${col}`, {
357
- rowSpan: m.row_span ?? 1,
358
- colSpan: m.col_span ?? 1
359
- });
360
- }
361
- const processed = /* @__PURE__ */ new Set();
362
- let buf = "<table>\n";
363
- for (let r = 0; r < rows.length; r++) {
364
- buf += "<tr>\n";
365
- for (let c = 0; c < (rows[r]?.length ?? 0); c++) {
366
- const key = `${r}-${c}`;
367
- if (processed.has(key)) continue;
368
- const merge = mergeMap.get(key);
369
- let attrs = "";
370
- if (merge) {
371
- if (merge.rowSpan > 1) attrs += ` rowspan="${merge.rowSpan}"`;
372
- if (merge.colSpan > 1) attrs += ` colspan="${merge.colSpan}"`;
373
- for (let mr = r; mr < r + merge.rowSpan; mr++) for (let mc = c; mc < c + merge.colSpan; mc++) processed.add(`${mr}-${mc}`);
374
- }
375
- buf += `<td${attrs}>${rows[r][c] ?? ""}</td>`;
376
- }
377
- buf += "</tr>\n";
378
- }
379
- buf += "</table>\n";
380
- return buf;
381
- }
382
- parseQuoteContainer(b) {
383
- let s = "";
384
- for (const id of b.children ?? []) {
385
- const child = this.blockMap.get(id);
386
- if (child) s += "> " + this.parseBlock(child, 0);
387
- }
388
- return s;
389
- }
390
- parseGrid(b, indent) {
391
- let s = "";
392
- for (const colId of b.children ?? []) {
393
- const col = this.blockMap.get(colId);
394
- if (!col) continue;
395
- for (const id of col.children ?? []) {
396
- const child = this.blockMap.get(id);
397
- if (child) s += this.parseBlock(child, indent);
398
- }
399
- }
400
- return s;
401
- }
402
- };
403
- //#endregion
404
- //#region src/logger.ts
405
- const COLORS = {
406
- [LoggerLevel.fatal]: "\x1B[35m",
407
- [LoggerLevel.error]: "\x1B[31m",
408
- [LoggerLevel.warn]: "\x1B[33m",
409
- [LoggerLevel.info]: "\x1B[36m",
410
- [LoggerLevel.debug]: "\x1B[32m",
411
- [LoggerLevel.trace]: "\x1B[90m"
412
- };
413
- const RESET = "\x1B[0m";
414
- const LEVEL_NAMES = [
415
- "FATAL",
416
- "ERROR",
417
- "WARN",
418
- "INFO",
419
- "DEBUG",
420
- "TRACE"
421
- ];
422
- let minLogLevel = LoggerLevel.trace;
423
- function log(level, module, ...args) {
424
- if (level > minLogLevel) return;
425
- const time = (/* @__PURE__ */ new Date()).toISOString();
426
- const color = COLORS[level] ?? "";
427
- const name = LEVEL_NAMES[level] ?? "INFO";
428
- process.stderr.write(`${time} ${color}[${name}]${RESET} [${module}] ${args.map(String).join(" ")}\n`);
429
- }
430
- function setLogLevel(level) {
431
- minLogLevel = level;
432
- }
433
- function createLogger(module) {
434
- return {
435
- fatal: (...args) => log(LoggerLevel.fatal, module, ...args),
436
- error: (...args) => log(LoggerLevel.error, module, ...args),
437
- warn: (...args) => log(LoggerLevel.warn, module, ...args),
438
- info: (...args) => log(LoggerLevel.info, module, ...args),
439
- debug: (...args) => log(LoggerLevel.debug, module, ...args),
440
- trace: (...args) => log(LoggerLevel.trace, module, ...args)
441
- };
442
- }
443
- //#endregion
444
5
  //#region src/cli.ts
445
- const logger = createLogger("cli");
446
- function parseWikiUrl(url) {
447
- const m = url.match(/^https:\/\/[\w.-]+\/(docs|docx|wiki)\/([a-zA-Z0-9]+)/);
448
- if (!m) throw new Error("Invalid feishu document URL");
449
- return {
450
- docType: m[1],
451
- docToken: m[2]
452
- };
453
- }
454
6
  const program = new Command();
455
7
  program.name("larkDocx2md").description("Download Lark/Feishu documents to markdown");
456
- program.command("download").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory", "./larkDocx2mdOutput").option("--agent", "Enable agent mode: ERROR log level, and AI prompt output").option("--image-mode <mode>", "Image handling mode: \"local\" (download) or \"online\" (temp URL)", "local").argument("<url>", "Feishu wiki document URL: https://*.feishu.cn/wiki/*").action(async (url, opts) => {
8
+ program.command("download").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory (or LARK_DOCX2MD_OUTPUT)").option("--agent", "Enable agent mode: ERROR log level, and AI prompt output (or LARK_DOCX2MD_AGENT=true)").option("--wb-format <format>", "Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)").option("--wb-bg <style>", "Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)").option("--wb-image-mode <mode>", "Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)").option("--image-mode <mode>", "Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)").argument("<url>", "Feishu wiki document URL: https://*.feishu.cn/wiki/*").action(async (url, opts) => {
9
+ opts.appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;
10
+ opts.appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;
11
+ opts.output = opts.output ?? process.env.LARK_DOCX2MD_OUTPUT ?? "./larkDocx2mdOutput";
12
+ opts.agent = opts.agent ?? process.env.LARK_DOCX2MD_AGENT === "true";
13
+ opts.imageMode = opts.imageMode ?? process.env.LARK_DOCX2MD_IMAGE_MODE ?? "local";
14
+ opts.wbFormat = opts.wbFormat ?? process.env.LARK_DOCX2MD_WB_FORMAT;
15
+ opts.wbBg = opts.wbBg ?? process.env.LARK_DOCX2MD_WB_BG ?? "none";
16
+ opts.wbImageMode = opts.wbImageMode ?? process.env.LARK_DOCX2MD_WB_IMAGE_MODE ?? "local";
17
+ if (!opts.wbFormat) opts.wbFormat = opts.agent ? "yaml" : "svg";
457
18
  if (opts.agent) {
458
19
  setLogLevel(LoggerLevel.error);
459
20
  opts.imageMode = "online";
460
- } else if (opts.imageMode && !["local", "online"].includes(opts.imageMode)) program.error(`Invalid --image-mode "${opts.imageMode}", must be "local" or "online"`);
461
- const { docType, docToken: rawToken } = parseWikiUrl(url);
462
- logger.info("Captured document token:", rawToken);
463
- const appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;
464
- const appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;
21
+ opts.wbImageMode = "online";
22
+ if (!["inline-svg", "yaml"].includes(opts.wbFormat)) program.error(`Agent mode only supports "inline-svg" or "yaml" for --wb-format`);
23
+ opts.wbImageMode = "online";
24
+ }
25
+ if (opts.imageMode && !["local", "online"].includes(opts.imageMode)) program.error(`Invalid --image-mode "${opts.imageMode}", must be "local" or "online"`);
26
+ if (![
27
+ "base64",
28
+ "inline-svg",
29
+ "svg",
30
+ "yaml"
31
+ ].includes(opts.wbFormat)) program.error(`Invalid --wb-format "${opts.wbFormat}", must be "base64", "inline-svg", "svg", or "yaml"`);
32
+ if (opts.wbFormat === "yaml") opts.wbImageMode = "online";
33
+ if (![
34
+ "online",
35
+ "base64",
36
+ "local"
37
+ ].includes(opts.wbImageMode)) program.error(`Invalid --wb-image-mode "${opts.wbImageMode}", must be "online", "base64", or "local"`);
38
+ const appId = opts.appId;
39
+ const appSecret = opts.appSecret;
465
40
  if (!appId || !appSecret) program.error("Missing credentials: pass --app-id/--app-secret or set LARK_DOCX2MD_APP_ID/LARK_DOCX2MD_APP_SECRET");
466
- const client = createClient(appId, appSecret, opts.agent ? LoggerLevel.error : LoggerLevel.warn);
467
- let docToken = rawToken;
468
- if (docType === "wiki") {
469
- docToken = (await client.getWikiNodeInfo(docToken)).obj_token;
470
- logger.info("Resolved docx token:", docToken);
471
- }
472
- const doc = await client.getDocxDocument(docToken);
473
- const blocks = await client.getDocxBlocks(docToken);
474
- logger.info(`Fetched ${blocks.length} blocks`);
475
- const parser = new Parser();
476
- let markdown = parser.parseDocxContent(doc, blocks);
477
- if (opts.imageMode === "online") for (let i = 0; i < parser.imgTokens.length; i += 5) {
478
- const batch = parser.imgTokens.slice(i, i + 5);
479
- const urlMap = await client.batchGetTmpDownloadUrl(batch);
480
- for (const token of batch) {
481
- const onlineUrl = urlMap[token];
482
- if (onlineUrl) {
483
- markdown = markdown.replace(`(${token})`, `(${onlineUrl})`);
484
- logger.info("Replaced image with online URL:", token);
485
- }
486
- }
487
- }
488
- else {
489
- const imgDir = path.join(opts.output, "static");
490
- for (const imgToken of parser.imgTokens) {
491
- let localPath = await client.downloadImage(imgToken, imgDir);
492
- localPath = path.relative(opts.output, localPath);
493
- markdown = markdown.replace(`(${imgToken})`, `(${localPath})`);
494
- logger.info("Downloaded image:", localPath);
495
- }
496
- }
497
- if (opts.agent) process.stdout.write(markdown);
498
- else {
499
- fs.mkdirSync(opts.output, { recursive: true });
500
- const mdPath = path.join(opts.output, `${docToken}.md`);
501
- fs.writeFileSync(mdPath, markdown);
502
- logger.info("Downloaded markdown file to", mdPath);
503
- }
41
+ const result = await convert({
42
+ appId,
43
+ appSecret,
44
+ url,
45
+ output: opts.output,
46
+ imageMode: opts.imageMode,
47
+ wbImageMode: opts.wbImageMode,
48
+ wbBg: opts.wbBg,
49
+ wbFormat: opts.wbFormat,
50
+ agent: opts.agent
51
+ });
52
+ if (opts.agent) process.stdout.write(result.markdown);
504
53
  });
505
54
  program.parse();
506
55
  //#endregion