lark-docx2md 0.2.1-beta.3 → 0.3.1-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -8
- package/dist/cli.js +1 -1
- package/dist/{converter-Bxdyw2k9.d.ts → converter-C_2JDVY1.d.ts} +2 -1
- package/dist/converter-C_2JDVY1.d.ts.map +1 -0
- package/dist/{converter-C5Nrkvfa.js → converter-jW2Zu4Pb.js} +29 -25
- package/dist/converter-jW2Zu4Pb.js.map +1 -0
- package/dist/converter.js +1 -1
- package/package.json +1 -1
- package/dist/converter-Bxdyw2k9.d.ts.map +0 -1
- package/dist/converter-C5Nrkvfa.js.map +0 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
将飞书文档转换为 Markdown 文件的命令行工具。
|
|
6
6
|
|
|
7
|
-
> 支持的飞书文档链接格式:`https://*.feishu.cn/wiki/*`、`https://*.feishu.cn/sheets
|
|
7
|
+
> 支持的飞书文档链接格式:`https://*.feishu.cn/wiki/*`、`https://*.feishu.cn/sheets/*`(支持 `?sheet=<sheetId>` 指定子表)
|
|
8
8
|
|
|
9
9
|
## 使用
|
|
10
10
|
|
|
@@ -32,7 +32,7 @@ npx -y lark-docx2md@latest download <url>
|
|
|
32
32
|
|
|
33
33
|
| 参数 | 说明 | 环境变量 | 默认值 |
|
|
34
34
|
|--------------------------|-------------------------------------------|------------------------------|-----------------------|
|
|
35
|
-
| `<url>` | 飞书文档链接(`https://*.feishu.cn/wiki/*` 或 `/sheets
|
|
35
|
+
| `<url>` | 飞书文档链接(`https://*.feishu.cn/wiki/*` 或 `/sheets/*`,支持 `?sheet=<sheetId>` 指定子表) | — | — |
|
|
36
36
|
| `--app-id <id>` | 飞书应用 App ID | `LARK_DOCX2MD_APP_ID` | — |
|
|
37
37
|
| `--app-secret <secret>` | 飞书应用 App Secret | `LARK_DOCX2MD_APP_SECRET` | — |
|
|
38
38
|
| `-o, --output <dir>` | 输出目录 | `LARK_DOCX2MD_OUTPUT` | `./larkDocx2mdOutput` |
|
|
@@ -55,7 +55,7 @@ npx -y lark-docx2md@latest download <url>
|
|
|
55
55
|
- 输出标准 Markdown 文件
|
|
56
56
|
- 支持飞书画板,输出格式:`base64`(data URI 内嵌)、`inline-svg`(SVG 标签内嵌)、`svg`(独立文件)、`yaml`(AI
|
|
57
57
|
友好结构化数据)。详见 [画板支持说明](./WHITEBOARD.md)
|
|
58
|
-
- 支持飞书电子表格(独立 sheet URL 或 docx 内嵌 sheet 块),输出 GFM
|
|
58
|
+
- 支持飞书电子表格(独立 sheet URL 或 docx 内嵌 sheet 块),输出 GFM 表格,自动展开合并单元格;支持 `?sheet=<sheetId>` 仅处理指定子表。详见 [电子表格支持说明](./SHEET.md)
|
|
59
59
|
|
|
60
60
|
### 支持的内容块类型
|
|
61
61
|
|
|
@@ -141,8 +141,3 @@ ISC
|
|
|
141
141
|
## 🙏 致谢
|
|
142
142
|
|
|
143
143
|
本项目开发过程中获得了 [LINUX DO](https://linux.do/latest) 社区佬友的帮助,本产品会在社区发布,感谢社区的支持。
|
|
144
|
-
|
|
145
|
-
## TODO
|
|
146
|
-
|
|
147
|
-
- [ ] 富文本转换
|
|
148
|
-
- [x] 电子表格导出为 Markdown
|
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { r as setLogLevel, t as convert } from "./converter-
|
|
2
|
+
import { r as setLogLevel, t as convert } from "./converter-jW2Zu4Pb.js";
|
|
3
3
|
import { Command } from "commander";
|
|
4
4
|
import { LoggerLevel } from "@larksuiteoapi/node-sdk";
|
|
5
5
|
//#region src/cli.ts
|
|
@@ -28,10 +28,11 @@ interface ConvertResult {
|
|
|
28
28
|
declare function parseWikiUrl(url: string): {
|
|
29
29
|
docType: string;
|
|
30
30
|
docToken: string;
|
|
31
|
+
sheetId?: string;
|
|
31
32
|
};
|
|
32
33
|
declare function convert(opts: ConvertOptions): Promise<ConvertResult>;
|
|
33
34
|
//# sourceMappingURL=converter.d.ts.map
|
|
34
35
|
|
|
35
36
|
//#endregion
|
|
36
37
|
export { convert, parseWikiUrl };
|
|
37
|
-
//# sourceMappingURL=converter-
|
|
38
|
+
//# sourceMappingURL=converter-C_2JDVY1.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"converter-C_2JDVY1.d.ts","names":[],"sources":["../src/types.ts","../src/converter.ts"],"mappings":";;;;AAsCa,KAhBD,SAAA,GAgBC,OAAA,GAAA,QAAA;AACE,KAdH,WAAA,GAcG,QAAA,GAAA,QAAA,GAAA,OAAA;AACP,KAZI,QAAA,GAYJ,QAAA,GAAA,YAAA,GAAA,KAAA,GAAA,MAAA;AACI,KAVA,aAAA,GAUA,MAAA,GAAA,KAAA,GAAA,CAAA,MAAA,GAAA,CAAA,CAAA,CAAA;AAAQ,UARH,cAAA,CAQG;EASH,KAAA,EAAA,MAAA;;;;ECnCD,SAAA,EDuBH,SCvBe;EAUN,WAAO,EDcd,WCdc;EAAA,IAAA,EDerB,aCfqB;UAAQ,EDgBzB,QChByB;OAAyB,CAAA,EAAA,OAAA,GAAA,OAAA;;AAAD,UDyB5C,aAAA,CCzB4C;;;;;;;;iBAV7C,YAAA;;EDOJ,QAAA,EAAA,MAAS;EAGT,OAAA,CAAA,EAAA,MAAW;AAGvB,CAAA;AAGY,iBCNU,OAAA,CDMG,IAAA,ECNY,cDMZ,CAAA,ECN6B,ODM7B,CCNqC,aDMrC,CAAA;AAEzB"}
|
|
@@ -644,8 +644,9 @@ var MdSerializer = class {
|
|
|
644
644
|
register(serializer) {
|
|
645
645
|
this.registry.register(serializer.type, serializer);
|
|
646
646
|
}
|
|
647
|
-
serialize(root) {
|
|
647
|
+
serialize(root, options = {}) {
|
|
648
648
|
const ctx = {
|
|
649
|
+
sourceType: options.sourceType ?? "docx",
|
|
649
650
|
serialize: (node, indent = 0) => {
|
|
650
651
|
const serializer = this.registry.get(node.type);
|
|
651
652
|
if (serializer) return serializer.serialize(node, ctx);
|
|
@@ -831,11 +832,11 @@ const htmlSerializer = {
|
|
|
831
832
|
};
|
|
832
833
|
const sheetResolvedSerializer = {
|
|
833
834
|
type: "sheetResolved",
|
|
834
|
-
serialize(node) {
|
|
835
|
+
serialize(node, ctx) {
|
|
835
836
|
if (node.type !== "sheetResolved") return "";
|
|
836
837
|
let out = "";
|
|
837
838
|
for (const s of node.sheets) {
|
|
838
|
-
if (
|
|
839
|
+
if (ctx.sourceType === "sheet") out += `## ${node.title}-${s.title}\n\n`;
|
|
839
840
|
if (s.error) {
|
|
840
841
|
out += `> ${s.error}\n\n`;
|
|
841
842
|
continue;
|
|
@@ -2823,7 +2824,7 @@ var MdTransformer = class {
|
|
|
2823
2824
|
try {
|
|
2824
2825
|
const info = await this.client.getSpreadsheetInfo(spreadsheetToken);
|
|
2825
2826
|
const list = await this.client.listSheets(spreadsheetToken);
|
|
2826
|
-
const sheetsToProcess = this.sourceType === "sheet" ? list : list.filter((s) => s.sheet_id === sheetId);
|
|
2827
|
+
const sheetsToProcess = this.sourceType === "sheet" && !sheetId ? list : list.filter((s) => s.sheet_id === sheetId);
|
|
2827
2828
|
const resolved = [];
|
|
2828
2829
|
for (const s of sheetsToProcess) {
|
|
2829
2830
|
if (s.hidden) continue;
|
|
@@ -2868,16 +2869,14 @@ var MdTransformer = class {
|
|
|
2868
2869
|
map.set(raw, {
|
|
2869
2870
|
type: "sheetResolved",
|
|
2870
2871
|
title: info.title ?? "",
|
|
2871
|
-
sheets: resolved
|
|
2872
|
-
showTitle: this.sourceType === "sheet"
|
|
2872
|
+
sheets: resolved
|
|
2873
2873
|
});
|
|
2874
2874
|
} catch (e) {
|
|
2875
2875
|
logger$1.warn(`Failed to resolve sheet ${raw}:`, e.message);
|
|
2876
2876
|
map.set(raw, {
|
|
2877
2877
|
type: "sheetResolved",
|
|
2878
2878
|
title: "",
|
|
2879
|
-
sheets: []
|
|
2880
|
-
showTitle: false
|
|
2879
|
+
sheets: []
|
|
2881
2880
|
});
|
|
2882
2881
|
}
|
|
2883
2882
|
}
|
|
@@ -2960,14 +2959,16 @@ const logger = createLogger("converter");
|
|
|
2960
2959
|
function parseWikiUrl(url) {
|
|
2961
2960
|
const m = url.match(/^https:\/\/[\w.-]+\/(docs|docx|wiki|sheets)\/([a-zA-Z0-9]+)/);
|
|
2962
2961
|
if (!m) throw new Error("Invalid feishu document URL");
|
|
2962
|
+
const sheetId = new URL(url).searchParams.get("sheet") ?? void 0;
|
|
2963
2963
|
return {
|
|
2964
2964
|
docType: m[1],
|
|
2965
|
-
docToken: m[2]
|
|
2965
|
+
docToken: m[2],
|
|
2966
|
+
sheetId
|
|
2966
2967
|
};
|
|
2967
2968
|
}
|
|
2968
2969
|
async function convert(opts) {
|
|
2969
|
-
const { docType, docToken: rawToken } = parseWikiUrl(opts.url);
|
|
2970
|
-
logger.info("Captured document token:", rawToken);
|
|
2970
|
+
const { docType, docToken: rawToken, sheetId } = parseWikiUrl(opts.url);
|
|
2971
|
+
logger.info("Captured document token:", rawToken, sheetId ? `sheetId: ${sheetId}` : "");
|
|
2971
2972
|
const sdkLoggerLevel = opts.agent ? LoggerLevel.error : LoggerLevel.warn;
|
|
2972
2973
|
const client = createClient(opts.appId, opts.appSecret, sdkLoggerLevel);
|
|
2973
2974
|
let docToken = rawToken;
|
|
@@ -2979,18 +2980,21 @@ async function convert(opts) {
|
|
|
2979
2980
|
logger.info("Resolved wiki node:", objType, docToken);
|
|
2980
2981
|
} else if (docType === "sheets") objType = "sheet";
|
|
2981
2982
|
let ast;
|
|
2982
|
-
if (objType === "sheet")
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2983
|
+
if (objType === "sheet") {
|
|
2984
|
+
const info = await client.getSpreadsheetInfo(docToken);
|
|
2985
|
+
const sheetToken = sheetId ? `${docToken}_${sheetId}` : docToken;
|
|
2986
|
+
ast = {
|
|
2987
|
+
type: "page",
|
|
2988
|
+
title: [{
|
|
2989
|
+
type: "text",
|
|
2990
|
+
content: info.title ?? ""
|
|
2991
|
+
}],
|
|
2992
|
+
children: [{
|
|
2993
|
+
type: "sheet",
|
|
2994
|
+
token: sheetToken
|
|
2995
|
+
}]
|
|
2996
|
+
};
|
|
2997
|
+
} else {
|
|
2994
2998
|
const doc = await client.getDocxDocument(docToken);
|
|
2995
2999
|
const blocks = await client.getDocxBlocks(docToken);
|
|
2996
3000
|
logger.info(`Fetched ${blocks.length} blocks`);
|
|
@@ -3001,7 +3005,7 @@ async function convert(opts) {
|
|
|
3001
3005
|
await new MdTransformer(client, opts, objType === "sheet" ? "sheet" : "docx").transform(ast);
|
|
3002
3006
|
const serializer = new MdSerializer();
|
|
3003
3007
|
registerBuiltinSerializers(serializer);
|
|
3004
|
-
const markdown = serializer.serialize(ast);
|
|
3008
|
+
const markdown = serializer.serialize(ast, { sourceType: objType === "sheet" ? "sheet" : "docx" });
|
|
3005
3009
|
let filePath;
|
|
3006
3010
|
if (!opts.agent || opts.agent === "local") {
|
|
3007
3011
|
fs.mkdirSync(opts.output, { recursive: true });
|
|
@@ -3018,4 +3022,4 @@ async function convert(opts) {
|
|
|
3018
3022
|
//#endregion
|
|
3019
3023
|
export { parseWikiUrl as n, setLogLevel as r, convert as t };
|
|
3020
3024
|
|
|
3021
|
-
//# sourceMappingURL=converter-
|
|
3025
|
+
//# sourceMappingURL=converter-jW2Zu4Pb.js.map
|