@adobe/helix-docx2md 1.0.21 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ # [1.1.0](https://github.com/adobe/helix-docx2md/compare/v1.0.23...v1.1.0) (2022-09-09)
2
+
3
+
4
+ ### Features
5
+
6
+ * add support for gridtables ([aff5f95](https://github.com/adobe/helix-docx2md/commit/aff5f951e1172f7f9ca4e14c8935c6e4b0890dd9))
7
+
8
+ ## [1.0.23](https://github.com/adobe/helix-docx2md/compare/v1.0.22...v1.0.23) (2022-08-29)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * **deps:** update dependency @adobe/helix-mediahandler to v1.0.42 ([#108](https://github.com/adobe/helix-docx2md/issues/108)) ([ae7726c](https://github.com/adobe/helix-docx2md/commit/ae7726ccce4b26802c42b515288930ce2c328218))
14
+
15
+ ## [1.0.22](https://github.com/adobe/helix-docx2md/compare/v1.0.21...v1.0.22) (2022-07-28)
16
+
17
+
18
+ ### Bug Fixes
19
+
20
+ * **deps:** update dependency @adobe/helix-markdown-support to v3.1.8 ([0884578](https://github.com/adobe/helix-docx2md/commit/0884578e7fb5e190e4791972fd307242485a55d2))
21
+
1
22
  ## [1.0.21](https://github.com/adobe/helix-docx2md/compare/v1.0.20...v1.0.21) (2022-07-26)
2
23
 
3
24
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-docx2md",
3
- "version": "1.0.21",
3
+ "version": "1.1.0",
4
4
  "description": "Helix library that converts word documents to markdown",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -13,7 +13,6 @@
13
13
  },
14
14
  "scripts": {
15
15
  "test": "c8 mocha",
16
- "test-ci": "c8 mocha && codecov",
17
16
  "lint": "eslint .",
18
17
  "semantic-release": "semantic-release"
19
18
  },
@@ -34,9 +33,9 @@
34
33
  },
35
34
  "homepage": "https://github.com/adobe/helix-docx2md#readme",
36
35
  "dependencies": {
37
- "@adobe/helix-markdown-support": "3.1.7",
36
+ "@adobe/helix-markdown-support": "5.0.7",
38
37
  "@adobe/helix-shared-process-queue": "1.1.5",
39
- "@adobe/mammoth": "1.4.15-bleeding.1",
38
+ "@adobe/mammoth": "1.5.1-bleeding.1",
40
39
  "dirname-filename-esm": "1.1.1",
41
40
  "mdast-util-to-markdown": "1.3.0",
42
41
  "mdast-util-to-string": "3.1.0",
@@ -45,26 +44,27 @@
45
44
  "remark-stringify": "10.0.2",
46
45
  "unified": "10.1.2",
47
46
  "unist-util-find": "1.0.2",
48
- "unist-util-visit": "4.1.0",
47
+ "unist-util-visit": "4.1.1",
49
48
  "yauzl": "2.10.0"
50
49
  },
51
50
  "devDependencies": {
52
51
  "@adobe/eslint-config-helix": "1.3.2",
53
- "@adobe/helix-mediahandler": "1.0.36",
52
+ "@adobe/helix-mediahandler": "1.2.0",
54
53
  "@semantic-release/changelog": "6.0.1",
55
54
  "@semantic-release/exec": "6.0.3",
56
55
  "@semantic-release/git": "10.0.1",
57
56
  "c8": "7.12.0",
58
- "dotenv": "16.0.1",
59
- "eslint": "8.20.0",
57
+ "dotenv": "16.0.2",
58
+ "eslint": "8.23.0",
59
+ "eslint-import-resolver-exports": "1.0.0-beta.2",
60
60
  "eslint-plugin-header": "3.1.1",
61
61
  "eslint-plugin-import": "2.26.0",
62
62
  "husky": "8.0.1",
63
- "junit-report-builder": "3.0.0",
63
+ "junit-report-builder": "3.0.1",
64
64
  "lint-staged": "13.0.3",
65
65
  "mocha": "10.0.0",
66
66
  "mocha-multi-reporters": "1.5.1",
67
- "semantic-release": "19.0.3",
67
+ "semantic-release": "19.0.5",
68
68
  "unist-util-inspect": "7.0.1"
69
69
  },
70
70
  "lint-staged": {
@@ -13,6 +13,7 @@
13
13
  /**
14
14
  * Converts the docx AST to markdown ast.
15
15
  * @param {object} tree the docx ast
16
+ * @param {boolean} [opts.gridtables = false] generate gridtables
16
17
  * @return {object} the markdown ast
17
18
  */
18
- export default function dast2mdast(tree: object): object;
19
+ export default function dast2mdast(tree: object, opts?: object): object;
@@ -30,9 +30,11 @@ import handlers from './handlers/index.js';
30
30
  *
31
31
  * Converts the docx AST to markdown ast.
32
32
  * @param {object} tree the docx ast
33
+ * @param {object} opts options
34
+ * @param {boolean} [opts.gridtables = false] generate gridtables
33
35
  * @return {object} the markdown ast
34
36
  */
35
- export default async function dast2mdast(tree) {
37
+ export default async function dast2mdast(tree, opts = {}) {
36
38
  const byId = {};
37
39
 
38
40
  const h = (type, props, children) => {
@@ -59,6 +61,7 @@ export default async function dast2mdast(tree) {
59
61
  h.frozenBaseUrl = null;
60
62
  h.handlers = handlers;
61
63
  h.numbering = {};
64
+ h.gridtables = opts.gridtables;
62
65
 
63
66
  /** @type {ListContainers} */
64
67
  h.listContainers = [[]];
@@ -9,9 +9,45 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { TYPE_CELL } from '@adobe/helix-markdown-support/gridtable';
12
13
  import all from '../all.js';
13
14
 
15
+ const V_ALIGN = {
16
+ top: '',
17
+ center: 'middle',
18
+ bottom: 'bottom',
19
+ };
20
+
21
+ const H_ALIGN = {
22
+ left: '',
23
+ right: 'right',
24
+ center: 'center',
25
+ distribute: 'justify',
26
+ };
27
+
28
+ function gridTableCell(h, node) {
29
+ const props = {};
30
+ if (node.children.length > 0) {
31
+ props.align = H_ALIGN[node.children[0].alignment];
32
+ }
33
+ props.valign = V_ALIGN[node.verticalAlignment];
34
+
35
+ if (node.rowSpan > 1) {
36
+ props.rowSpan = node.rowSpan;
37
+ }
38
+ if (node.colSpan > 1) {
39
+ props.colSpan = node.colSpan;
40
+ }
41
+ h.listContainers.unshift([]);
42
+ const c = h(TYPE_CELL, props, all(h, node));
43
+ h.listContainers.shift();
44
+ return c;
45
+ }
46
+
14
47
  export default function cell(h, node) {
48
+ if (h.gridtables) {
49
+ return gridTableCell(h, node);
50
+ }
15
51
  // remember alignment for table
16
52
  const props = {};
17
53
  if (node.children.length > 0) {
@@ -9,8 +9,20 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { TYPE_ROW } from '@adobe/helix-markdown-support/gridtable';
12
13
  import all from '../all.js';
13
14
 
15
+ function gridTableRow(h, node) {
16
+ const props = {};
17
+ if (node.isHeader) {
18
+ props.isHeader = true;
19
+ }
20
+ return h(TYPE_ROW, props, all(h, node));
21
+ }
22
+
14
23
  export default function row(h, node) {
24
+ if (h.gridtables) {
25
+ return gridTableRow(h, node);
26
+ }
15
27
  return h('tableRow', all(h, node));
16
28
  }
@@ -9,9 +9,32 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
+ import { TYPE_TABLE, TYPE_HEADER, TYPE_BODY } from '@adobe/helix-markdown-support/gridtable';
12
13
  import all from '../all.js';
13
14
 
15
+ function gridTable(h, node) {
16
+ let rows = all(h, node);
17
+ if (rows[0]?.isHeader) {
18
+ const header = [];
19
+ while (rows[0]?.isHeader) {
20
+ const row = rows.shift();
21
+ delete row.isHeader;
22
+ header.push(row);
23
+ }
24
+ rows = [
25
+ h(TYPE_HEADER, header),
26
+ h(TYPE_BODY, rows),
27
+ ];
28
+ }
29
+
30
+ return h(TYPE_TABLE, rows);
31
+ }
32
+
14
33
  export default function table(h, node) {
34
+ if (h.gridtables) {
35
+ return gridTable(h, node);
36
+ }
37
+
15
38
  const nodes = all(h, node);
16
39
 
17
40
  // get alignment of first row, if any
package/src/docx2md.d.ts CHANGED
@@ -66,6 +66,8 @@ export declare interface Docx2mdOptions {
66
66
 
67
67
  parserOptions?: object;
68
68
 
69
+ gridtables?: boolean;
70
+
69
71
  }
70
72
 
71
73
  /**
package/src/docx2md.js CHANGED
@@ -32,7 +32,7 @@ export default async function docx2md(doc, opts) {
32
32
  listener('beforeToMdast', dast);
33
33
  }
34
34
 
35
- const mdast = await dast2mdast(dast);
35
+ const mdast = await dast2mdast(dast, opts);
36
36
 
37
37
  if (listener) {
38
38
  listener('afterToMdast', mdast);
@@ -23,6 +23,8 @@ export declare interface MDast2mdOptions {
23
23
  mediaHandler?: MediaHandler,
24
24
 
25
25
  source?: string,
26
+
27
+ gridtables?: boolean,
26
28
  }
27
29
 
28
30
  /**
@@ -21,12 +21,15 @@ import {
21
21
  robustTables,
22
22
  suppressSpaceCode,
23
23
  sanitizeHeading,
24
- remarkMatter,
25
24
  breaksAsSpaces,
26
25
  sanitizeFormats,
27
26
  sanitizeText,
27
+ sanitizeTextAndFormats,
28
28
  sanitizeLinks,
29
+ imageReferences,
29
30
  } from '@adobe/helix-markdown-support';
31
+ import { remarkMatter } from '@adobe/helix-markdown-support/matter';
32
+ import { remarkGridTable } from '@adobe/helix-markdown-support/gridtable';
30
33
 
31
34
  import processImages from './mdast-process-images.js';
32
35
  import sanitizeAutoEmbeds from './mdast-sanitize-autoembeds.js';
@@ -45,18 +48,46 @@ export default async function mdast2md(mdast, opts = {}) {
45
48
  listener('beforePostProcess', mdast);
46
49
  }
47
50
 
51
+ const processor = unified()
52
+ .use(stringify, {
53
+ strong: '*',
54
+ emphasis: '_',
55
+ bullet: '-',
56
+ fence: '`',
57
+ fences: true,
58
+ incrementListMarker: true,
59
+ rule: '-',
60
+ ruleRepetition: 3,
61
+ ruleSpaces: false,
62
+ })
63
+ .use(gfm)
64
+ .use(remarkMatter)
65
+ .use(orderedListPlugin);
66
+
48
67
  // process.stdout.write(inspect(mdast));
49
68
  // process.stdout.write('\n');
50
69
 
51
- await sanitizeHeading(mdast);
52
- await sanitizeFormats(mdast); // collapse formats once
53
- await sanitizeLinks(mdast);
54
- await sanitizeFormats(mdast); // and again for sanitized links
55
- await sanitizeText(mdast);
56
- await suppressSpaceCode(mdast);
57
- await sanitizeAutoEmbeds(mdast);
58
- await processImages(log, mdast, opts.mediaHandler, opts.source);
59
- await robustTables(mdast);
70
+ if (opts.gridtables) {
71
+ await sanitizeHeading(mdast);
72
+ await sanitizeLinks(mdast);
73
+ await sanitizeTextAndFormats(mdast);
74
+ await suppressSpaceCode(mdast);
75
+ await sanitizeAutoEmbeds(mdast);
76
+ await processImages(log, mdast, opts.mediaHandler, opts.source);
77
+ await imageReferences(mdast);
78
+ processor.use(remarkGridTable);
79
+ } else {
80
+ await sanitizeHeading(mdast);
81
+ await sanitizeFormats(mdast); // collapse formats once
82
+ await sanitizeLinks(mdast);
83
+ await sanitizeFormats(mdast); // and again for sanitized links
84
+ await sanitizeText(mdast);
85
+ await suppressSpaceCode(mdast);
86
+ await sanitizeAutoEmbeds(mdast);
87
+ await processImages(log, mdast, opts.mediaHandler, opts.source);
88
+ await robustTables(mdast);
89
+ processor.use(breaksAsSpaces);
90
+ }
60
91
 
61
92
  // process.stdout.write(inspect(mdast));
62
93
  // process.stdout.write('\n');
@@ -67,26 +98,7 @@ export default async function mdast2md(mdast, opts = {}) {
67
98
  }
68
99
 
69
100
  // noinspection JSVoidFunctionReturnValueUsed
70
- const md = unified()
71
- .use(stringify, {
72
- strong: '*',
73
- emphasis: '_',
74
- bullet: '-',
75
- fence: '`',
76
- fences: true,
77
- incrementListMarker: true,
78
- rule: '-',
79
- ruleRepetition: 3,
80
- ruleSpaces: false,
81
- })
82
- .use(gfm, {
83
- // tableCellPadding: false,
84
- // tablePipeAlign: false,
85
- })
86
- .use(breaksAsSpaces)
87
- .use(remarkMatter)
88
- .use(orderedListPlugin)
89
- .stringify(mdast);
101
+ const md = processor.stringify(mdast);
90
102
 
91
103
  if (listener) {
92
104
  listener('afterToMarkdown', md);