lark-docx2md 0.2.1-beta.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -141,8 +141,3 @@ ISC
141
141
  ## 🙏 致谢
142
142
 
143
143
  本项目开发过程中获得了 [LINUX DO](https://linux.do/latest) 社区佬友的帮助,本产品会在社区发布,感谢社区的支持。
144
-
145
- ## TODO
146
-
147
- - [ ] 富文本转换
148
- - [x] 电子表格导出为 Markdown
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { r as setLogLevel, t as convert } from "./converter-FwY1m1jm.js";
2
+ import { r as setLogLevel, t as convert } from "./converter-C5Nrkvfa.js";
3
3
  import { Command } from "commander";
4
4
  import { LoggerLevel } from "@larksuiteoapi/node-sdk";
5
5
  //#region src/cli.ts
@@ -835,7 +835,7 @@ const sheetResolvedSerializer = {
835
835
  if (node.type !== "sheetResolved") return "";
836
836
  let out = "";
837
837
  for (const s of node.sheets) {
838
- out += `## 工作表:${s.title}\n\n`;
838
+ if (node.showTitle) out += `## ${node.title}-${s.title}\n\n`;
839
839
  if (s.error) {
840
840
  out += `> ${s.error}\n\n`;
841
841
  continue;
@@ -2548,19 +2548,19 @@ function whiteboardNodesToYaml(nodes) {
2548
2548
  const nctx = prepareNodeContext(nodes);
2549
2549
  const yamlNodes = nctx.rootNodes.map((n) => convertNode(n, nctx)).filter((n) => n !== null);
2550
2550
  const imageTokens = [];
2551
- collectImageTokens$1(yamlNodes, imageTokens);
2551
+ collectImageTokens(yamlNodes, imageTokens);
2552
2552
  return {
2553
2553
  yaml: serializeYaml({ whiteboard: { nodes: yamlNodes } }),
2554
2554
  imageTokens
2555
2555
  };
2556
2556
  }
2557
2557
  /** 递归收集所有 image 节点的 token */
2558
- function collectImageTokens$1(nodes, out) {
2558
+ function collectImageTokens(nodes, out) {
2559
2559
  for (const node of nodes) {
2560
2560
  if (node.type === "image" && node.token) out.push(node.token);
2561
- if (Array.isArray(node.children)) collectImageTokens$1(node.children, out);
2561
+ if (Array.isArray(node.children)) collectImageTokens(node.children, out);
2562
2562
  if (Array.isArray(node.cells)) {
2563
- for (const cell of node.cells) if (Array.isArray(cell.children)) collectImageTokens$1(cell.children, out);
2563
+ for (const cell of node.cells) if (Array.isArray(cell.children)) collectImageTokens(cell.children, out);
2564
2564
  }
2565
2565
  }
2566
2566
  }
@@ -2666,16 +2666,16 @@ function createLogger(module) {
2666
2666
  //#endregion
2667
2667
  //#region src/md-ast/transformer.ts
2668
2668
  const logger$1 = createLogger("transformer");
2669
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
2669
2670
  var MdTransformer = class {
2670
- constructor(client, opts) {
2671
+ constructor(client, opts, sourceType = "docx") {
2671
2672
  this.client = client;
2672
2673
  this.opts = opts;
2674
+ this.sourceType = sourceType;
2673
2675
  }
2674
2676
  async transform(ast) {
2675
- const imageTokens = collectImageTokens(ast);
2676
- const whiteboardTokens = collectWhiteboardTokens(ast);
2677
- const sheetTokens = collectSheetTokens(ast);
2678
- replaceInAst(ast, await this.resolveImages(imageTokens), await this.resolveWhiteboards(whiteboardTokens), await this.resolveSheets(sheetTokens));
2677
+ const { imageTokens, whiteboardTokens, sheetRefs } = collectTokens(ast);
2678
+ replaceInAst(ast, await this.resolveImages(imageTokens), await this.resolveWhiteboards(whiteboardTokens), await this.resolveSheets(sheetRefs));
2679
2679
  }
2680
2680
  async resolveImages(tokens) {
2681
2681
  const map = /* @__PURE__ */ new Map();
@@ -2814,17 +2814,18 @@ var MdTransformer = class {
2814
2814
  }
2815
2815
  return yamlContent;
2816
2816
  }
2817
- async resolveSheets(tokens) {
2817
+ async resolveSheets(refs) {
2818
2818
  const map = /* @__PURE__ */ new Map();
2819
- const uniqueTokens = [...new Set(tokens)];
2820
- if (uniqueTokens.length === 0) return map;
2821
- for (const token of uniqueTokens) {
2822
- if (!token) continue;
2819
+ const uniqueRefs = refs.filter((r, i, arr) => arr.findIndex((x) => x.raw === r.raw) === i);
2820
+ if (uniqueRefs.length === 0) return map;
2821
+ for (const { raw, spreadsheetToken, sheetId } of uniqueRefs) {
2822
+ if (!spreadsheetToken) continue;
2823
2823
  try {
2824
- const info = await this.client.getSpreadsheetInfo(token);
2825
- const list = await this.client.listSheets(token);
2824
+ const info = await this.client.getSpreadsheetInfo(spreadsheetToken);
2825
+ const list = await this.client.listSheets(spreadsheetToken);
2826
+ const sheetsToProcess = this.sourceType === "sheet" ? list : list.filter((s) => s.sheet_id === sheetId);
2826
2827
  const resolved = [];
2827
- for (const s of list) {
2828
+ for (const s of sheetsToProcess) {
2828
2829
  if (s.hidden) continue;
2829
2830
  if (s.resource_type && s.resource_type !== "sheet") {
2830
2831
  resolved.push({
@@ -2836,7 +2837,7 @@ var MdTransformer = class {
2836
2837
  continue;
2837
2838
  }
2838
2839
  try {
2839
- const meta = await this.client.getSheetMeta(token, s.sheet_id);
2840
+ const meta = await this.client.getSheetMeta(spreadsheetToken, s.sheet_id);
2840
2841
  const { row_count = 0, column_count = 0 } = meta.grid_properties ?? {};
2841
2842
  if (!row_count || !column_count) {
2842
2843
  resolved.push({
@@ -2847,7 +2848,7 @@ var MdTransformer = class {
2847
2848
  continue;
2848
2849
  }
2849
2850
  const endCol = columnIndexToLetter(column_count);
2850
- const trimmed = trimTrailingEmpty(expandMerges((await this.client.readSheetValues(token, `${s.sheet_id}!A1:${endCol}${row_count}`) ?? []).map((row) => row.map(cellToMd)), meta.merges ?? []));
2851
+ const trimmed = trimTrailingEmpty(expandMerges((await this.client.readSheetValues(spreadsheetToken, `${s.sheet_id}!A1:${endCol}${row_count}`) ?? []).map((row) => row.map(cellToMd)), meta.merges ?? []));
2851
2852
  resolved.push({
2852
2853
  title: s.title ?? "",
2853
2854
  kind: "grid",
@@ -2860,45 +2861,59 @@ var MdTransformer = class {
2860
2861
  rows: [],
2861
2862
  error: `读取失败:${e.message}`
2862
2863
  });
2864
+ } finally {
2865
+ await sleep(600);
2863
2866
  }
2864
2867
  }
2865
- map.set(token, {
2868
+ map.set(raw, {
2866
2869
  type: "sheetResolved",
2867
2870
  title: info.title ?? "",
2868
- sheets: resolved
2871
+ sheets: resolved,
2872
+ showTitle: this.sourceType === "sheet"
2869
2873
  });
2870
2874
  } catch (e) {
2871
- logger$1.warn(`Failed to render sheet ${token}:`, e.message);
2872
- map.set(token, {
2875
+ logger$1.warn(`Failed to resolve sheet ${raw}:`, e.message);
2876
+ map.set(raw, {
2873
2877
  type: "sheetResolved",
2874
2878
  title: "",
2875
- sheets: []
2879
+ sheets: [],
2880
+ showTitle: false
2876
2881
  });
2877
2882
  }
2878
2883
  }
2879
2884
  return map;
2880
2885
  }
2881
2886
  };
2882
- function collectImageTokens(node) {
2883
- const tokens = [];
2884
- traverseBlockAst(node, (n) => {
2885
- if (n.type === "image") tokens.push(n.src);
2886
- });
2887
- return tokens;
2888
- }
2889
- function collectWhiteboardTokens(node) {
2890
- const tokens = [];
2891
- traverseBlockAst(node, (n) => {
2892
- if (n.type === "whiteboard") tokens.push(n.token);
2893
- });
2894
- return tokens;
2895
- }
2896
- function collectSheetTokens(node) {
2897
- const tokens = [];
2887
+ function collectTokens(node) {
2888
+ const imageTokens = [];
2889
+ const whiteboardTokens = [];
2890
+ const sheetRefs = [];
2898
2891
  traverseBlockAst(node, (n) => {
2899
- if (n.type === "sheet") tokens.push(n.token);
2892
+ switch (n.type) {
2893
+ case "image":
2894
+ imageTokens.push(n.src);
2895
+ break;
2896
+ case "whiteboard":
2897
+ whiteboardTokens.push(n.token);
2898
+ break;
2899
+ case "sheet": {
2900
+ const lastIdx = n.token.lastIndexOf("_");
2901
+ const spreadsheetToken = lastIdx > 0 ? n.token.slice(0, lastIdx) : n.token;
2902
+ const sheetId = lastIdx > 0 ? n.token.slice(lastIdx + 1) : void 0;
2903
+ sheetRefs.push({
2904
+ raw: n.token,
2905
+ spreadsheetToken,
2906
+ sheetId
2907
+ });
2908
+ break;
2909
+ }
2910
+ }
2900
2911
  });
2901
- return tokens;
2912
+ return {
2913
+ imageTokens,
2914
+ whiteboardTokens,
2915
+ sheetRefs
2916
+ };
2902
2917
  }
2903
2918
  function replaceInAst(node, imageMap, whiteboardMap, sheetMap) {
2904
2919
  if (node.type === "image") {
@@ -2983,7 +2998,7 @@ async function convert(opts) {
2983
2998
  registerBuiltinParsers(parser);
2984
2999
  ast = parser.parse(doc, blocks);
2985
3000
  }
2986
- await new MdTransformer(client, opts).transform(ast);
3001
+ await new MdTransformer(client, opts, objType === "sheet" ? "sheet" : "docx").transform(ast);
2987
3002
  const serializer = new MdSerializer();
2988
3003
  registerBuiltinSerializers(serializer);
2989
3004
  const markdown = serializer.serialize(ast);
@@ -3003,4 +3018,4 @@ async function convert(opts) {
3003
3018
  //#endregion
3004
3019
  export { parseWikiUrl as n, setLogLevel as r, convert as t };
3005
3020
 
3006
- //# sourceMappingURL=converter-FwY1m1jm.js.map
3021
+ //# sourceMappingURL=converter-C5Nrkvfa.js.map