lark-docx2md 0.3.2 → 0.5.1-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/dist/cli.js +4 -3
- package/dist/cli.js.map +1 -1
- package/dist/{converter-jW2Zu4Pb.js → converter-Bqb4bqxh.js} +134 -18
- package/dist/converter-Bqb4bqxh.js.map +1 -0
- package/dist/{converter-C_2JDVY1.d.ts → converter-nwp8DCnk.d.ts} +2 -1
- package/dist/converter-nwp8DCnk.d.ts.map +1 -0
- package/dist/converter.js +1 -1
- package/package.json +14 -7
- package/dist/converter-C_2JDVY1.d.ts.map +0 -1
- package/dist/converter-jW2Zu4Pb.js.map +0 -1
package/README.md
CHANGED
|
@@ -2,9 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://npmjs.org/package/lark-docx2md)
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
将飞书文档、电子表格转换为 Markdown 文件的命令行工具。
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
支持:
|
|
9
|
+
- 飞书文档:`https://*.feishu.cn/wiki/*`
|
|
10
|
+
- Markdown 类型中的标题、列表、基础文字等常见的文本样式
|
|
11
|
+
- 飞书文档中的内嵌画板、电子表格
|
|
12
|
+
- ...见下表‘支持的内容块类型’
|
|
13
|
+
- 飞书电子表格:`https://*.feishu.cn/sheets/*`(支持 `?sheet=<sheetId>` 指定子表)、`https://*.feishu.cn/wiki/*`
|
|
8
14
|
|
|
9
15
|
## 使用
|
|
10
16
|
|
|
@@ -38,6 +44,7 @@ npx -y lark-docx2md@latest download <url>
|
|
|
38
44
|
| `-o, --output <dir>` | 输出目录 | `LARK_DOCX2MD_OUTPUT` | `./larkDocx2mdOutput` |
|
|
39
45
|
| `--agent [mode]` | Agent 模式:日志 ERROR。不传值(或 `=true`)为在线模式,Markdown 输出到 stdout;传 `local` 则落盘后输出引导 AI 读取的提示词 | `LARK_DOCX2MD_AGENT=true\|local` | `false` |
|
|
40
46
|
| `--image-mode <mode>` | 图片处理模式:`local`(下载到本地)或 `online`(24h 临时链接) | `LARK_DOCX2MD_IMAGE_MODE` | `local` |
|
|
47
|
+
| `--filter-title <title>` | 按标题过滤:仅转换匹配标题及其下级内容(匹配到同级或更高级标题时截止) | — | — |
|
|
41
48
|
| `--wb-format <format>` | 画板输出格式:`base64`、`inline-svg`、`svg`、`yaml` | `LARK_DOCX2MD_WB_FORMAT` | `svg`(agent 下默认 `yaml`) |
|
|
42
49
|
| `--wb-bg <style>` | 画板 SVG 背景:`none`、`dot` 或颜色值如 `#fff` | `LARK_DOCX2MD_WB_BG` | `none` |
|
|
43
50
|
| `--wb-image-mode <mode>` | 画板图片模式:`online`、`base64` 或 `local` | `LARK_DOCX2MD_WB_IMAGE_MODE` | `local` |
|
|
@@ -47,6 +54,7 @@ npx -y lark-docx2md@latest download <url>
|
|
|
47
54
|
> - `--agent`(在线):强制 `--image-mode=online`、`--wb-image-mode=online`;`--wb-format` 默认 `yaml`,仅允许 `inline-svg` / `yaml`;转换完成后 Markdown 直接通过 stdout 输出。
|
|
48
55
|
> - `--agent local`:强制 `--image-mode=local`、`--wb-image-mode=local`(Markdown、图片、画板中的图片均落盘);`--wb-format` 默认 `yaml`,仅允许 `inline-svg` / `yaml`;stdout 输出引导 AI 读取文件的提示词(包含绝对路径)。
|
|
49
56
|
> - 非 agent 模式下 `--wb-format yaml` 时:`--wb-image-mode` 强制为 `online`。
|
|
57
|
+
> - `--filter-title`:按标题文本精确匹配(忽略前后空格),收集该标题及其所有子级块,遇到同级或更高级标题时停止。未匹配到则报错提示。
|
|
50
58
|
|
|
51
59
|
## 功能
|
|
52
60
|
|
package/dist/cli.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { r as setLogLevel, t as convert } from "./converter-
|
|
2
|
+
import { r as setLogLevel, t as convert } from "./converter-Bqb4bqxh.js";
|
|
3
3
|
import { Command } from "commander";
|
|
4
4
|
import { LoggerLevel } from "@larksuiteoapi/node-sdk";
|
|
5
5
|
//#region src/cli.ts
|
|
6
6
|
const program = new Command();
|
|
7
7
|
program.name("larkDocx2md").description("Download Lark/Feishu documents to markdown");
|
|
8
|
-
program.command("download").alias("dl").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory (or LARK_DOCX2MD_OUTPUT)").option("--agent [mode]", "Enable agent mode: ERROR log level, and AI-oriented stdout. Pass \"local\" to save markdown/images/whiteboards to disk and print a read-file prompt (or LARK_DOCX2MD_AGENT=true|local)").option("--wb-format <format>", "Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)").option("--wb-bg <style>", "Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)").option("--wb-image-mode <mode>", "Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)").option("--image-mode <mode>", "Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)").argument("<url>", "Feishu wiki document URL: https://*.feishu.cn/wiki/*").action(async (url, opts) => {
|
|
8
|
+
program.command("download").alias("dl").description("Download a wiki document to markdown").option("--app-id <id>", "Feishu app ID (or read from LARK_DOCX2MD_APP_ID)").option("--app-secret <secret>", "Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)").option("-o, --output <dir>", "Output directory (or LARK_DOCX2MD_OUTPUT)").option("--agent [mode]", "Enable agent mode: ERROR log level, and AI-oriented stdout. Pass \"local\" to save markdown/images/whiteboards to disk and print a read-file prompt (or LARK_DOCX2MD_AGENT=true|local)").option("--wb-format <format>", "Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)").option("--wb-bg <style>", "Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)").option("--wb-image-mode <mode>", "Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)").option("--image-mode <mode>", "Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)").option("--filter-title <title>", "Only convert the section matching this heading title").argument("<url>", "Feishu wiki document URL: https://*.feishu.cn/wiki/*").action(async (url, opts) => {
|
|
9
9
|
opts.appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;
|
|
10
10
|
opts.appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;
|
|
11
11
|
opts.output = opts.output ?? process.env.LARK_DOCX2MD_OUTPUT ?? "./larkDocx2mdOutput";
|
|
@@ -57,7 +57,8 @@ program.command("download").alias("dl").description("Download a wiki document to
|
|
|
57
57
|
wbImageMode: opts.wbImageMode,
|
|
58
58
|
wbBg: opts.wbBg,
|
|
59
59
|
wbFormat: opts.wbFormat,
|
|
60
|
-
agent: agentLocal ? "local" : opts.agent === true
|
|
60
|
+
agent: agentLocal ? "local" : opts.agent === true,
|
|
61
|
+
filterTitle: opts.filterTitle?.trim()
|
|
61
62
|
});
|
|
62
63
|
if (agentLocal) process.stdout.write(`**The Feishu document has been downloaded to the following absolute path:**\n\n\`${result.filePath}\`\n\n**Read this file to access the full markdown content.**\n`);
|
|
63
64
|
else if (opts.agent === true) process.stdout.write(result.markdown);
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","names":[],"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command } from 'commander';\nimport { LoggerLevel } from '@larksuiteoapi/node-sdk';\nimport { convert } from './converter.js';\nimport { setLogLevel } from './logger.js';\nimport type { SvgBackground, WbFormat, WbImageMode } from './types.js';\n\nconst program = new Command();\nprogram.name('larkDocx2md').description('Download Lark/Feishu documents to markdown');\n\nprogram\n .command('download')\n .alias('dl')\n .description('Download a wiki document to markdown')\n .option('--app-id <id>', 'Feishu app ID (or read from LARK_DOCX2MD_APP_ID)')\n .option('--app-secret <secret>', 'Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)')\n .option('-o, --output <dir>', 'Output directory (or LARK_DOCX2MD_OUTPUT)')\n .option('--agent [mode]', 'Enable agent mode: ERROR log level, and AI-oriented stdout. Pass \"local\" to save markdown/images/whiteboards to disk and print a read-file prompt (or LARK_DOCX2MD_AGENT=true|local)')\n .option('--wb-format <format>', 'Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)')\n .option('--wb-bg <style>', 'Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)')\n .option('--wb-image-mode <mode>', 'Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)')\n .option('--image-mode <mode>', 'Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)')\n .argument('<url>', 'Feishu wiki document URL: https://*.feishu.cn/wiki/*')\n .action(async (url: string, opts: { appId?: string; appSecret?: string; output?: string; agent?: boolean | string; imageMode?: string; wbImageMode?: string; wbBg?: SvgBackground; wbFormat?: string }) => {\n // ─── 环境变量默认值(直接指定 > 环境变量 > 内置默认值)────────────────\n opts.appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;\n opts.appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;\n opts.output = opts.output ?? process.env.LARK_DOCX2MD_OUTPUT ?? './larkDocx2mdOutput';\n // 解析 --agent:可能为 undefined | true | 'local' | 其他字符串\n if (opts.agent === undefined) {\n const envAgent = process.env.LARK_DOCX2MD_AGENT;\n if (envAgent === 'true') opts.agent = true;\n else if (envAgent === 'local') opts.agent = 'local';\n else opts.agent = false;\n } else if (typeof opts.agent === 'string' && opts.agent !== 'local') {\n program.error(`Invalid --agent value \"${opts.agent}\", only \"local\" is supported (or omit the value)`);\n }\n const agentEnabled = opts.agent === true || opts.agent === 'local';\n const agentLocal = opts.agent === 'local';\n\n opts.imageMode = opts.imageMode ?? process.env.LARK_DOCX2MD_IMAGE_MODE ?? 'local';\n opts.wbFormat = opts.wbFormat ?? process.env.LARK_DOCX2MD_WB_FORMAT;\n opts.wbBg = opts.wbBg ?? process.env.LARK_DOCX2MD_WB_BG ?? 'none';\n opts.wbImageMode = opts.wbImageMode ?? process.env.LARK_DOCX2MD_WB_IMAGE_MODE ?? 'local';\n\n // 设置 wb-format 默认值:--agent local 默认 inline-svg(兼容本地画板图片),--agent(在线)默认 yaml,其余 svg\n if (!opts.wbFormat) {\n opts.wbFormat = agentEnabled ? 'yaml' : 'svg';\n }\n\n if (agentEnabled) {\n setLogLevel(LoggerLevel.error);\n if (agentLocal) {\n // --agent local:图片/画板图片均落盘\n opts.imageMode = 'local';\n opts.wbImageMode = 'local';\n } else {\n // --agent(在线):一律在线,且画板仅支持内嵌形式\n opts.imageMode = 'online';\n opts.wbImageMode = 'online';\n }\n if (!['inline-svg', 'yaml'].includes(opts.wbFormat)) {\n program.error(`Agent mode only supports \"inline-svg\" or \"yaml\" for --wb-format`);\n }\n } else {\n // yaml 格式图片仅支持 online\n if (opts.wbFormat === 'yaml') {\n opts.wbImageMode = 'online';\n }\n }\n\n if (opts.imageMode && !['local', 'online'].includes(opts.imageMode)) {\n program.error(`Invalid --image-mode \"${opts.imageMode}\", must be \"local\" or \"online\"`);\n }\n if (!['base64', 'inline-svg', 'svg', 'yaml'].includes(opts.wbFormat)) {\n program.error(`Invalid --wb-format \"${opts.wbFormat}\", must be \"base64\", \"inline-svg\", \"svg\", or \"yaml\"`);\n }\n if (!['online', 'base64', 'local'].includes(opts.wbImageMode)) {\n program.error(`Invalid --wb-image-mode \"${opts.wbImageMode}\", must be \"online\", \"base64\", or \"local\"`);\n }\n\n const appId = opts.appId!;\n const appSecret = opts.appSecret!;\n if (!appId || !appSecret) {\n program.error('Missing credentials: pass --app-id/--app-secret or set LARK_DOCX2MD_APP_ID/LARK_DOCX2MD_APP_SECRET');\n }\n\n const result = await convert({\n appId,\n appSecret,\n url,\n output: opts.output,\n imageMode: opts.imageMode as 'local' | 'online',\n wbImageMode: opts.wbImageMode as WbImageMode,\n wbBg: opts.wbBg,\n wbFormat: opts.wbFormat as WbFormat,\n agent: agentLocal ? 'local' : (opts.agent === true),\n });\n\n if (agentLocal) {\n // 本地模式:输出引导 AI 读取文件的提示词(绝对路径)\n process.stdout.write(\n `**The Feishu document has been downloaded to the following absolute path:**\\n\\n` +\n `\\`${result.filePath}\\`\\n\\n` +\n `**Read this file to access the full markdown content.**\\n`,\n );\n } else if (opts.agent === true) {\n process.stdout.write(result.markdown);\n }\n });\n\nprogram.parse();\n"],"mappings":";;;;;AAOA,MAAM,UAAU,IAAI,SAAS;AAC7B,QAAQ,KAAK,cAAc,CAAC,YAAY,6CAA6C;AAErF,QACG,QAAQ,WAAW,CACnB,MAAM,KAAK,CACX,YAAY,uCAAuC,CACnD,OAAO,iBAAiB,mDAAmD,CAC3E,OAAO,yBAAyB,2DAA2D,CAC3F,OAAO,sBAAsB,4CAA4C,CACzE,OAAO,kBAAkB,yLAAuL,CAChN,OAAO,wBAAwB,yGAAiG,CAChI,OAAO,mBAAmB,iGAA2F,CACrH,OAAO,0BAA0B,8FAAwF,CACzH,OAAO,uBAAuB,4EAAwE,CACtG,SAAS,SAAS,uDAAuD,CACzE,OAAO,OAAO,KAAa,
|
|
1
|
+
{"version":3,"file":"cli.js","names":[],"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command } from 'commander';\nimport { LoggerLevel } from '@larksuiteoapi/node-sdk';\nimport { convert } from './converter.js';\nimport { setLogLevel } from './logger.js';\nimport type { SvgBackground, WbFormat, WbImageMode } from './types.js';\n\nconst program = new Command();\nprogram.name('larkDocx2md').description('Download Lark/Feishu documents to markdown');\n\nprogram\n .command('download')\n .alias('dl')\n .description('Download a wiki document to markdown')\n .option('--app-id <id>', 'Feishu app ID (or read from LARK_DOCX2MD_APP_ID)')\n .option('--app-secret <secret>', 'Feishu app secret (or read from LARK_DOCX2MD_APP_SECRET)')\n .option('-o, --output <dir>', 'Output directory (or LARK_DOCX2MD_OUTPUT)')\n .option('--agent [mode]', 'Enable agent mode: ERROR log level, and AI-oriented stdout. Pass \"local\" to save markdown/images/whiteboards to disk and print a read-file prompt (or LARK_DOCX2MD_AGENT=true|local)')\n .option('--wb-format <format>', 'Whiteboard output format: \"base64\", \"inline-svg\", \"svg\", or \"yaml\" (or LARK_DOCX2MD_WB_FORMAT)')\n .option('--wb-bg <style>', 'Whiteboard SVG background: \"none\", \"dot\", or a color like \"#fff\" (or LARK_DOCX2MD_WB_BG)')\n .option('--wb-image-mode <mode>', 'Whiteboard image mode: \"online\", \"base64\", or \"local\" (or LARK_DOCX2MD_WB_IMAGE_MODE)')\n .option('--image-mode <mode>', 'Image handling mode: \"local\" or \"online\" (or LARK_DOCX2MD_IMAGE_MODE)')\n .option('--filter-title <title>', 'Only convert the section matching this heading title')\n .argument('<url>', 'Feishu wiki document URL: https://*.feishu.cn/wiki/*')\n .action(async (url: string, opts: { appId?: string; appSecret?: string; output?: string; agent?: boolean | string; imageMode?: string; wbImageMode?: string; wbBg?: SvgBackground; wbFormat?: string; filterTitle?: string }) => {\n // ─── 环境变量默认值(直接指定 > 环境变量 > 内置默认值)────────────────\n opts.appId = opts.appId ?? process.env.LARK_DOCX2MD_APP_ID;\n opts.appSecret = opts.appSecret ?? process.env.LARK_DOCX2MD_APP_SECRET;\n opts.output = opts.output ?? process.env.LARK_DOCX2MD_OUTPUT ?? './larkDocx2mdOutput';\n // 解析 --agent:可能为 undefined | true | 'local' | 其他字符串\n if (opts.agent === undefined) {\n const envAgent = process.env.LARK_DOCX2MD_AGENT;\n if (envAgent === 'true') opts.agent = true;\n else if (envAgent === 'local') opts.agent = 'local';\n else opts.agent = false;\n } else if (typeof opts.agent === 'string' && opts.agent !== 'local') {\n program.error(`Invalid --agent value \"${opts.agent}\", only \"local\" is supported (or omit the value)`);\n }\n const agentEnabled = opts.agent === true || opts.agent === 'local';\n const agentLocal = opts.agent === 'local';\n\n opts.imageMode = opts.imageMode ?? process.env.LARK_DOCX2MD_IMAGE_MODE ?? 'local';\n opts.wbFormat = opts.wbFormat ?? process.env.LARK_DOCX2MD_WB_FORMAT;\n opts.wbBg = opts.wbBg ?? process.env.LARK_DOCX2MD_WB_BG ?? 'none';\n opts.wbImageMode = opts.wbImageMode ?? process.env.LARK_DOCX2MD_WB_IMAGE_MODE ?? 'local';\n\n // 设置 wb-format 默认值:--agent local 默认 inline-svg(兼容本地画板图片),--agent(在线)默认 yaml,其余 svg\n if (!opts.wbFormat) {\n opts.wbFormat = agentEnabled ? 'yaml' : 'svg';\n }\n\n if (agentEnabled) {\n setLogLevel(LoggerLevel.error);\n if (agentLocal) {\n // --agent local:图片/画板图片均落盘\n opts.imageMode = 'local';\n opts.wbImageMode = 'local';\n } else {\n // --agent(在线):一律在线,且画板仅支持内嵌形式\n opts.imageMode = 'online';\n opts.wbImageMode = 'online';\n }\n if (!['inline-svg', 'yaml'].includes(opts.wbFormat)) {\n program.error(`Agent mode only supports \"inline-svg\" or \"yaml\" for --wb-format`);\n }\n } else {\n // yaml 格式图片仅支持 online\n if (opts.wbFormat === 'yaml') {\n opts.wbImageMode = 'online';\n }\n }\n\n if (opts.imageMode && !['local', 'online'].includes(opts.imageMode)) {\n program.error(`Invalid --image-mode \"${opts.imageMode}\", must be \"local\" or \"online\"`);\n }\n if (!['base64', 'inline-svg', 'svg', 'yaml'].includes(opts.wbFormat)) {\n program.error(`Invalid --wb-format \"${opts.wbFormat}\", must be \"base64\", \"inline-svg\", \"svg\", or \"yaml\"`);\n }\n if (!['online', 'base64', 'local'].includes(opts.wbImageMode)) {\n program.error(`Invalid --wb-image-mode \"${opts.wbImageMode}\", must be \"online\", \"base64\", or \"local\"`);\n }\n\n const appId = opts.appId!;\n const appSecret = opts.appSecret!;\n if (!appId || !appSecret) {\n program.error('Missing credentials: pass --app-id/--app-secret or set LARK_DOCX2MD_APP_ID/LARK_DOCX2MD_APP_SECRET');\n }\n\n const result = await convert({\n appId,\n appSecret,\n url,\n output: opts.output,\n imageMode: opts.imageMode as 'local' | 'online',\n wbImageMode: opts.wbImageMode as WbImageMode,\n wbBg: opts.wbBg,\n wbFormat: opts.wbFormat as WbFormat,\n agent: agentLocal ? 'local' : (opts.agent === true),\n filterTitle: opts.filterTitle?.trim(),\n });\n\n if (agentLocal) {\n // 本地模式:输出引导 AI 读取文件的提示词(绝对路径)\n process.stdout.write(\n `**The Feishu document has been downloaded to the following absolute path:**\\n\\n` +\n `\\`${result.filePath}\\`\\n\\n` +\n `**Read this file to access the full markdown content.**\\n`,\n );\n } else if (opts.agent === true) {\n process.stdout.write(result.markdown);\n }\n });\n\nprogram.parse();\n"],"mappings":";;;;;AAOA,MAAM,UAAU,IAAI,SAAS;AAC7B,QAAQ,KAAK,cAAc,CAAC,YAAY,6CAA6C;AAErF,QACG,QAAQ,WAAW,CACnB,MAAM,KAAK,CACX,YAAY,uCAAuC,CACnD,OAAO,iBAAiB,mDAAmD,CAC3E,OAAO,yBAAyB,2DAA2D,CAC3F,OAAO,sBAAsB,4CAA4C,CACzE,OAAO,kBAAkB,yLAAuL,CAChN,OAAO,wBAAwB,yGAAiG,CAChI,OAAO,mBAAmB,iGAA2F,CACrH,OAAO,0BAA0B,8FAAwF,CACzH,OAAO,uBAAuB,4EAAwE,CACtG,OAAO,0BAA0B,uDAAuD,CACxF,SAAS,SAAS,uDAAuD,CACzE,OAAO,OAAO,KAAa,SAAqM;AAE/N,MAAK,QAAQ,KAAK,SAAS,QAAQ,IAAI;AACvC,MAAK,YAAY,KAAK,aAAa,QAAQ,IAAI;AAC/C,MAAK,SAAS,KAAK,UAAU,QAAQ,IAAI,uBAAuB;AAEhE,KAAI,KAAK,UAAU,KAAA,GAAW;EAC5B,MAAM,WAAW,QAAQ,IAAI;AAC7B,MAAI,aAAa,OAAQ,MAAK,QAAQ;WAC7B,aAAa,QAAS,MAAK,QAAQ;MACvC,MAAK,QAAQ;YACT,OAAO,KAAK,UAAU,YAAY,KAAK,UAAU,QAC1D,SAAQ,MAAM,0BAA0B,KAAK,MAAM,kDAAkD;CAEvG,MAAM,eAAe,KAAK,UAAU,QAAQ,KAAK,UAAU;CAC3D,MAAM,aAAa,KAAK,UAAU;AAElC,MAAK,YAAY,KAAK,aAAa,QAAQ,IAAI,2BAA2B;AAC1E,MAAK,WAAW,KAAK,YAAY,QAAQ,IAAI;AAC7C,MAAK,OAAO,KAAK,QAAQ,QAAQ,IAAI,sBAAsB;AAC3D,MAAK,cAAc,KAAK,eAAe,QAAQ,IAAI,8BAA8B;AAGjF,KAAI,CAAC,KAAK,SACR,MAAK,WAAW,eAAe,SAAS;AAG1C,KAAI,cAAc;AAChB,cAAY,YAAY,MAAM;AAC9B,MAAI,YAAY;AAEd,QAAK,YAAY;AACjB,QAAK,cAAc;SACd;AAEL,QAAK,YAAY;AACjB,QAAK,cAAc;;AAErB,MAAI,CAAC,CAAC,cAAc,OAAO,CAAC,SAAS,KAAK,SAAS,CACjD,SAAQ,MAAM,kEAAkE;YAI9E,KAAK,aAAa,OACpB,MAAK,cAAc;AAIvB,KAAI,KAAK,aAAa,CAAC,CAAC,SAAS,SAAS,CAAC,SAAS,KAAK,UAAU,CACjE,SAAQ,MAAM,yBAAyB,KAAK,UAAU,gCAAgC;AAExF,KAAI,CAAC;EAAC;EAAU;EAAc;EAAO;EAAO,CAAC,SAAS,KAAK,SAAS,CAClE,SAAQ,MAAM,wBAAwB,KAAK,SAAS,qDAAqD;AAE3G,KAAI,CAAC;EAAC;EAAU;EAAU;EAAQ,CAAC,SAAS,KAAK,YAAY,CAC3D,SAAQ,MAAM,4BAA4B,KAAK,YAAY,2CAA2C;CAGxG,MAAM,QAAQ,KAAK;CACnB,MAAM,YAAY,KAAK;AACvB,KAAI,CAAC,SAAS,CAAC,UACb,SAAQ,MAAM,qGAAqG;CAGrH,MAAM,SAAS,MAAM,QAAQ;EAC3B;EACA;EACA;EACA,QAAQ,KAAK;EACb,WAAW,KAAK;EAChB,aAAa,KAAK;EAClB,MAAM,KAAK;EACX,UAAU,KAAK;EACf,OAAO,aAAa,UAAW,KAAK,UAAU;EAC9C,aAAa,KAAK,aAAa,MAAM;EACtC,CAAC;AAEF,KAAI,WAEF,SAAQ,OAAO,MACb,oFACK,OAAO,SAAS,iEAEtB;UACQ,KAAK,UAAU,KACxB,SAAQ,OAAO,MAAM,OAAO,SAAS;EAEvC;AAEJ,QAAQ,OAAO"}
|
|
@@ -3,6 +3,9 @@ import { LoggerLevel } from "@larksuiteoapi/node-sdk";
|
|
|
3
3
|
import * as fs from "node:fs";
|
|
4
4
|
import * as path from "node:path";
|
|
5
5
|
//#region src/client.ts
|
|
6
|
+
const sleep$1 = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
7
|
+
const RATE_LIMIT_MAX_RETRIES = 3;
|
|
8
|
+
const RATE_LIMIT_RETRY_DELAY = 500;
|
|
6
9
|
function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
|
|
7
10
|
const client = new lark.Client({
|
|
8
11
|
appId,
|
|
@@ -10,18 +13,25 @@ function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
|
|
|
10
13
|
loggerLevel
|
|
11
14
|
});
|
|
12
15
|
async function call(name, fn) {
|
|
13
|
-
let
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
for (let attempt = 0; attempt <= RATE_LIMIT_MAX_RETRIES; attempt++) {
|
|
17
|
+
let res;
|
|
18
|
+
try {
|
|
19
|
+
res = await fn();
|
|
20
|
+
} catch (e) {
|
|
21
|
+
const code = e.response?.data?.code;
|
|
22
|
+
const msg = e.response?.data?.msg;
|
|
23
|
+
if (code === 99991400 && attempt < RATE_LIMIT_MAX_RETRIES) {
|
|
24
|
+
await sleep$1(RATE_LIMIT_RETRY_DELAY * (attempt + 1));
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const error = e.response?.data?.error;
|
|
28
|
+
if (error) throw new Error(`${name} failed: [${code}] ${msg}: \n${JSON.stringify(error, null, 2)}`);
|
|
29
|
+
throw e;
|
|
30
|
+
}
|
|
31
|
+
if (res.code !== 0) throw new Error(`${name} failed: [${res.code}] ${res.msg}`);
|
|
32
|
+
return res.data;
|
|
22
33
|
}
|
|
23
|
-
|
|
24
|
-
return res.data;
|
|
34
|
+
throw new Error(`${name} failed: 频率限制重试次数已用尽`);
|
|
25
35
|
}
|
|
26
36
|
async function getWikiNodeInfo(token) {
|
|
27
37
|
return (await call("getWikiNodeInfo", () => client.wiki.v2.space.getNode({ params: { token } }))).node;
|
|
@@ -33,10 +43,17 @@ function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
|
|
|
33
43
|
title: doc.title
|
|
34
44
|
};
|
|
35
45
|
}
|
|
36
|
-
|
|
37
|
-
|
|
46
|
+
/**
|
|
47
|
+
* 分页获取文档所有块。
|
|
48
|
+
* @param docToken
|
|
49
|
+
* @param pageHandler 可选回调,每页 blocks 传入,返回 false 则提前终止分页。
|
|
50
|
+
* 不传时收集所有块后一次性返回。
|
|
51
|
+
*/
|
|
52
|
+
async function getDocxBlocks(docToken, pageHandler) {
|
|
53
|
+
const allBlocks = [];
|
|
38
54
|
let pageToken;
|
|
39
|
-
for (;;) {
|
|
55
|
+
for (let i = 0;; i++) {
|
|
56
|
+
if (i > 0) await sleep$1(100);
|
|
40
57
|
const data = await call("getDocxBlocks", () => client.docx.v1.documentBlock.list({
|
|
41
58
|
path: { document_id: docToken },
|
|
42
59
|
params: {
|
|
@@ -45,11 +62,16 @@ function createClient(appId, appSecret, loggerLevel = LoggerLevel.warn) {
|
|
|
45
62
|
page_token: pageToken
|
|
46
63
|
}
|
|
47
64
|
}));
|
|
48
|
-
|
|
65
|
+
const items = data.items ?? [];
|
|
66
|
+
if (pageHandler) {
|
|
67
|
+
const shouldContinue = pageHandler(items);
|
|
68
|
+
allBlocks.push(...items);
|
|
69
|
+
if (!shouldContinue) break;
|
|
70
|
+
} else allBlocks.push(...items);
|
|
49
71
|
if (!data.has_more) break;
|
|
50
72
|
pageToken = data.page_token;
|
|
51
73
|
}
|
|
52
|
-
return
|
|
74
|
+
return allBlocks;
|
|
53
75
|
}
|
|
54
76
|
/**
|
|
55
77
|
*
|
|
@@ -2954,6 +2976,86 @@ function hasBlockChildren(node) {
|
|
|
2954
2976
|
return node.type === "page" || node.type === "bullet" || node.type === "ordered" || node.type === "callout" || node.type === "quote" || node.type === "grid";
|
|
2955
2977
|
}
|
|
2956
2978
|
//#endregion
|
|
2979
|
+
//#region src/title-filter.ts
|
|
2980
|
+
/**
|
|
2981
|
+
* 获取标题块的层级(1~9),非标题块返回 null。
|
|
2982
|
+
* block_type 3~11 对应 heading 1~9
|
|
2983
|
+
*/
|
|
2984
|
+
function getHeadingLevel(block) {
|
|
2985
|
+
const bt = block.block_type;
|
|
2986
|
+
if (bt !== void 0 && bt >= 3 && bt <= 11) return bt - 2;
|
|
2987
|
+
return null;
|
|
2988
|
+
}
|
|
2989
|
+
/**
|
|
2990
|
+
* 从 DocxBlock 中提取标题文本(trim),非标题块返回 null。
|
|
2991
|
+
*/
|
|
2992
|
+
function extractHeadingText(block) {
|
|
2993
|
+
const level = getHeadingLevel(block);
|
|
2994
|
+
if (level === null) return null;
|
|
2995
|
+
const body = block[`heading${level}`];
|
|
2996
|
+
if (!body?.elements) return null;
|
|
2997
|
+
return body.elements.map((e) => e.text_run?.content ?? "").join("").trim();
|
|
2998
|
+
}
|
|
2999
|
+
/**
|
|
3000
|
+
* 创建标题过滤器,返回一个 pageHandler 兼容的回调和结果获取器。
|
|
3001
|
+
* 纯函数工厂,无副作用,易于测试。
|
|
3002
|
+
*/
|
|
3003
|
+
function createTitleFilter(options) {
|
|
3004
|
+
const targetTitle = options.title.trim();
|
|
3005
|
+
let state = "scanning";
|
|
3006
|
+
let matchedLevel = 0;
|
|
3007
|
+
const collected = [];
|
|
3008
|
+
const seenHeadings = [];
|
|
3009
|
+
function pageHandler(blocks) {
|
|
3010
|
+
for (const block of blocks) {
|
|
3011
|
+
if (block.block_type === 1) {
|
|
3012
|
+
collected.push(block);
|
|
3013
|
+
continue;
|
|
3014
|
+
}
|
|
3015
|
+
switch (state) {
|
|
3016
|
+
case "scanning": {
|
|
3017
|
+
const level = getHeadingLevel(block);
|
|
3018
|
+
if (level !== null) {
|
|
3019
|
+
const text = extractHeadingText(block) ?? "";
|
|
3020
|
+
seenHeadings.push({
|
|
3021
|
+
level,
|
|
3022
|
+
text
|
|
3023
|
+
});
|
|
3024
|
+
if (text === targetTitle) {
|
|
3025
|
+
state = "collecting";
|
|
3026
|
+
matchedLevel = level;
|
|
3027
|
+
collected.push(block);
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
break;
|
|
3031
|
+
}
|
|
3032
|
+
case "collecting": {
|
|
3033
|
+
const level = getHeadingLevel(block);
|
|
3034
|
+
if (level !== null && level <= matchedLevel) {
|
|
3035
|
+
state = "done";
|
|
3036
|
+
return false;
|
|
3037
|
+
}
|
|
3038
|
+
collected.push(block);
|
|
3039
|
+
break;
|
|
3040
|
+
}
|
|
3041
|
+
case "done": return false;
|
|
3042
|
+
}
|
|
3043
|
+
}
|
|
3044
|
+
return state !== "done";
|
|
3045
|
+
}
|
|
3046
|
+
function getResult() {
|
|
3047
|
+
return {
|
|
3048
|
+
blocks: [...collected],
|
|
3049
|
+
matched: state === "collecting" || state === "done",
|
|
3050
|
+
availableHeadings: [...seenHeadings]
|
|
3051
|
+
};
|
|
3052
|
+
}
|
|
3053
|
+
return {
|
|
3054
|
+
pageHandler,
|
|
3055
|
+
getResult
|
|
3056
|
+
};
|
|
3057
|
+
}
|
|
3058
|
+
//#endregion
|
|
2957
3059
|
//#region src/converter.ts
|
|
2958
3060
|
const logger = createLogger("converter");
|
|
2959
3061
|
function parseWikiUrl(url) {
|
|
@@ -2996,7 +3098,21 @@ async function convert(opts) {
|
|
|
2996
3098
|
};
|
|
2997
3099
|
} else {
|
|
2998
3100
|
const doc = await client.getDocxDocument(docToken);
|
|
2999
|
-
|
|
3101
|
+
let blocks;
|
|
3102
|
+
if (opts.filterTitle) {
|
|
3103
|
+
const filter = createTitleFilter({ title: opts.filterTitle });
|
|
3104
|
+
await client.getDocxBlocks(docToken, filter.pageHandler);
|
|
3105
|
+
const result = filter.getResult();
|
|
3106
|
+
if (!result.matched) {
|
|
3107
|
+
let msg = `No heading matched "${opts.filterTitle}". Please verify the heading text.`;
|
|
3108
|
+
if (result.availableHeadings.length > 0) {
|
|
3109
|
+
const list = result.availableHeadings.map((h) => `${"#".repeat(h.level)} ${h.text}`).join("\n");
|
|
3110
|
+
msg += `\n\nAvailable headings in the document:\n\n${list}`;
|
|
3111
|
+
}
|
|
3112
|
+
throw new Error(msg);
|
|
3113
|
+
}
|
|
3114
|
+
blocks = result.blocks;
|
|
3115
|
+
} else blocks = await client.getDocxBlocks(docToken);
|
|
3000
3116
|
logger.info(`Fetched ${blocks.length} blocks`);
|
|
3001
3117
|
const parser = new Parser();
|
|
3002
3118
|
registerBuiltinParsers(parser);
|
|
@@ -3022,4 +3138,4 @@ async function convert(opts) {
|
|
|
3022
3138
|
//#endregion
|
|
3023
3139
|
export { parseWikiUrl as n, setLogLevel as r, convert as t };
|
|
3024
3140
|
|
|
3025
|
-
//# sourceMappingURL=converter-
|
|
3141
|
+
//# sourceMappingURL=converter-Bqb4bqxh.js.map
|