@adobe/helix-md2docx 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## [2.1.6](https://github.com/adobe/helix-md2docx/compare/v2.1.5...v2.1.6) (2023-06-26)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * error with newlines and <u>, <sub>, <sup> ([52821fa](https://github.com/adobe/helix-md2docx/commit/52821fa1d1999eaa81a8114127452d118a23ad4f)), closes [#276](https://github.com/adobe/helix-md2docx/issues/276)
7
+
1
8
  ## [2.1.5](https://github.com/adobe/helix-md2docx/compare/v2.1.4...v2.1.5) (2023-05-18)
2
9
 
3
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-md2docx",
3
- "version": "2.1.5",
3
+ "version": "2.1.6",
4
4
  "description": "Helix Service that converts markdown to word documents",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -39,19 +39,19 @@
39
39
  "mime": "3.0.0",
40
40
  "rehype-parse": "8.0.4",
41
41
  "remark-gfm": "3.0.1",
42
- "remark-parse": "10.0.1",
42
+ "remark-parse": "10.0.2",
43
43
  "unified": "10.1.2",
44
44
  "unist-util-visit": "4.1.2"
45
45
  },
46
46
  "devDependencies": {
47
47
  "@adobe/eslint-config-helix": "2.0.2",
48
- "@adobe/helix-mediahandler": "2.1.11",
48
+ "@adobe/helix-mediahandler": "2.2.2",
49
49
  "@semantic-release/changelog": "6.0.3",
50
50
  "@semantic-release/exec": "6.0.3",
51
51
  "@semantic-release/git": "10.0.1",
52
- "c8": "7.13.0",
53
- "dotenv": "16.0.3",
54
- "eslint": "8.40.0",
52
+ "c8": "8.0.0",
53
+ "dotenv": "16.3.1",
54
+ "eslint": "8.43.0",
55
55
  "eslint-import-resolver-exports": "1.0.0-beta.5",
56
56
  "eslint-plugin-header": "3.1.1",
57
57
  "eslint-plugin-import": "2.27.5",
@@ -61,7 +61,7 @@
61
61
  "mocha": "10.2.0",
62
62
  "mocha-multi-reporters": "1.5.1",
63
63
  "nock": "13.3.1",
64
- "semantic-release": "21.0.2",
64
+ "semantic-release": "21.0.5",
65
65
  "unist-util-inspect": "7.0.2",
66
66
  "yauzl": "2.10.0"
67
67
  },
@@ -40,8 +40,8 @@ export default {
40
40
  emphasis: characterStyle('italics'),
41
41
  strong: characterStyle('bold'),
42
42
  underline: characterStyle('underline'),
43
- subScript: characterStyle('subScript'),
44
- superScript: characterStyle('superScript'),
43
+ subscript: characterStyle('subScript'),
44
+ superscript: characterStyle('superScript'),
45
45
  heading,
46
46
  html,
47
47
  image,
@@ -12,7 +12,7 @@
12
12
  import { visit } from 'unist-util-visit';
13
13
  import { unified } from 'unified';
14
14
  import parse from 'rehype-parse';
15
- import { toMdast } from 'hast-util-to-mdast';
15
+ import { defaultHandlers, toMdast } from 'hast-util-to-mdast';
16
16
  // import inspect from 'unist-util-inspect';
17
17
  import tableHandler from './hast-table-handler.js';
18
18
  import tableCellHandler from './hast-table-cell-handler.js';
@@ -22,7 +22,32 @@ import tableCellHandler from './hast-table-cell-handler.js';
22
22
  * @param type
23
23
  */
24
24
  function formatHandler(type) {
25
- return (state, { children }) => ({ type, children });
25
+ return (state, node) => {
26
+ const result = { type, children: state.all(node) };
27
+ state.patch(node, result);
28
+ return result;
29
+ };
30
+ }
31
+
32
+ /**
33
+ * removes paragraphs from the child nodes recursively.
34
+ * @param node
35
+ */
36
+ function unwrapParagraphs(node) {
37
+ if (!node.children) {
38
+ return node;
39
+ }
40
+ for (let idx = 0; idx < node.children.length; idx += 1) {
41
+ const child = node.children[idx];
42
+ if (child.type === 'paragraph') {
43
+ node.children.splice(idx, 1, ...child.children);
44
+ idx += child.children.length - 1;
45
+ } else {
46
+ // eslint-disable-next-line no-param-reassign
47
+ node.children[idx] = unwrapParagraphs(child);
48
+ }
49
+ }
50
+ return node;
26
51
  }
27
52
 
28
53
  /**
@@ -32,10 +57,27 @@ function formatHandler(type) {
32
57
  function mdHandler(mdasts) {
33
58
  return (state, node) => {
34
59
  const { idx } = node.properties;
35
- return mdasts[idx];
60
+ return mdasts[+idx];
36
61
  };
37
62
  }
38
63
 
64
+ function isPhrasingParent(node) {
65
+ return [
66
+ 'paragraph',
67
+ 'underline',
68
+ 'subscript',
69
+ 'superscript',
70
+ 'heading',
71
+ 'emphasis',
72
+ 'strong',
73
+ 'link',
74
+ 'linkReference',
75
+ 'tableCell',
76
+ 'delete',
77
+ 'footnote',
78
+ ].includes(node.type);
79
+ }
80
+
39
81
  /**
40
82
  * Sanitizes html:
41
83
  * - collapses consecutive html content (simply concat all nodes until the last html sibling)
@@ -72,12 +114,16 @@ export default function sanitizeHtml(tree) {
72
114
  if (n.type === 'html' || n.type === 'text') {
73
115
  html += n.value;
74
116
  } else {
75
- html += `<markdown idx="${mdInserts.length}"></markdown>`;
117
+ html += `<markdown idx="${mdInserts.length}">foo</markdown>`;
118
+ mdInserts.push(n);
76
119
  }
77
- mdInserts.push(n);
78
120
  });
79
121
  }
80
122
 
123
+ if (isPhrasingParent(parent)) {
124
+ html = `<p>${html}</p>`;
125
+ }
126
+
81
127
  // try parse html
82
128
  const hast = unified()
83
129
  .use(parse, { fragment: true })
@@ -85,22 +131,46 @@ export default function sanitizeHtml(tree) {
85
131
 
86
132
  // convert to mdast with extra handlers
87
133
  const mdast = toMdast(hast, {
134
+ document: false,
88
135
  handlers: {
136
+ ...defaultHandlers,
89
137
  u: formatHandler('underline'),
90
- sub: formatHandler('subScript'),
91
- sup: formatHandler('superScript'),
138
+ sub: formatHandler('subscript'),
139
+ sup: formatHandler('superscript'),
92
140
  table: tableHandler,
93
141
  markdown: mdHandler(mdInserts),
94
142
  th: tableCellHandler,
95
143
  td: tableCellHandler,
96
144
  },
97
145
  });
146
+ // clear inserts
147
+ mdInserts.length = 0;
98
148
 
99
- // console.log('************************************');
100
- // // console.log('>>>>', html);
101
- // process.stdout.write(inspect(hast));
102
- // process.stdout.write('\n');
103
- // console.log('************************************');
149
+ // ensure that flow nodes are in phrasing context
150
+ if (!isPhrasingParent(parent)) {
151
+ let lastParagraph;
152
+ for (let idx = 0; idx < mdast.children.length; idx += 1) {
153
+ const child = mdast.children[idx];
154
+ if (child.type === 'underline' || child.type === 'subscript' || child.type === 'superscript') {
155
+ unwrapParagraphs(child);
156
+ if (!lastParagraph) {
157
+ lastParagraph = {
158
+ type: 'paragraph',
159
+ children: [child],
160
+ };
161
+ mdast.children.splice(idx, 1, lastParagraph);
162
+ } else {
163
+ lastParagraph.children.push(child);
164
+ mdast.children.splice(idx, 1);
165
+ idx -= 1;
166
+ }
167
+ } else {
168
+ lastParagraph = null;
169
+ }
170
+ }
171
+ } else {
172
+ unwrapParagraphs(mdast);
173
+ }
104
174
 
105
175
  // inject children of parsed tree
106
176
  siblings.splice(index, 1, ...mdast.children);