@adobe/helix-md2docx 2.1.29 → 2.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## [2.1.31](https://github.com/adobe/helix-md2docx/compare/v2.1.30...v2.1.31) (2023-09-30)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * **deps:** update dependency hast-util-to-mdast to v10.1.0 ([ec6ab9a](https://github.com/adobe/helix-md2docx/commit/ec6ab9a28325aa1ed79dcad2e173d09ab5a71b51))
7
+
8
+ ## [2.1.30](https://github.com/adobe/helix-md2docx/compare/v2.1.29...v2.1.30) (2023-09-30)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * Support in-document Heading Anchors ([b79b2ce](https://github.com/adobe/helix-md2docx/commit/b79b2cea8a091d4046715829f35f4fbb4ede86d2)), closes [#327](https://github.com/adobe/helix-md2docx/issues/327)
14
+
1
15
  ## [2.1.29](https://github.com/adobe/helix-md2docx/compare/v2.1.28...v2.1.29) (2023-09-29)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-md2docx",
3
- "version": "2.1.29",
3
+ "version": "2.1.31",
4
4
  "description": "Helix Service that converts markdown to word documents",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -32,10 +32,12 @@
32
32
  "@adobe/helix-markdown-support": "7.0.0",
33
33
  "@adobe/helix-shared-process-queue": "3.0.0",
34
34
  "@adobe/remark-gridtables": "2.0.1",
35
- "docx": "8.2.2",
35
+ "docx": "8.2.3",
36
+ "github-slugger": "2.0.0",
36
37
  "hast-util-is-element": "3.0.0",
37
- "hast-util-to-mdast": "10.0.1",
38
+ "hast-util-to-mdast": "10.1.0",
38
39
  "image-size": "1.0.2",
40
+ "mdast-util-to-string": "4.0.0",
39
41
  "mime": "3.0.0",
40
42
  "rehype-parse": "9.0.0",
41
43
  "remark-gfm": "4.0.0",
@@ -45,7 +47,7 @@
45
47
  },
46
48
  "devDependencies": {
47
49
  "@adobe/eslint-config-helix": "2.0.3",
48
- "@adobe/helix-mediahandler": "2.2.16",
50
+ "@adobe/helix-mediahandler": "2.2.17",
49
51
  "@semantic-release/changelog": "6.0.3",
50
52
  "@semantic-release/exec": "6.0.3",
51
53
  "@semantic-release/git": "10.0.1",
@@ -61,7 +63,7 @@
61
63
  "mocha": "10.2.0",
62
64
  "mocha-multi-reporters": "1.5.1",
63
65
  "nock": "13.3.3",
64
- "semantic-release": "22.0.4",
66
+ "semantic-release": "22.0.5",
65
67
  "unist-util-inspect": "8.0.0",
66
68
  "yauzl": "2.10.0"
67
69
  },
@@ -0,0 +1,21 @@
1
+ /*
2
+ * Copyright 2023 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { Bookmark } from 'docx';
13
+ import all from '../all.js';
14
+
15
+ export default async function bookmark(ctx, node) {
16
+ const children = await all(ctx, node);
17
+ return new Bookmark({
18
+ id: node.anchor,
19
+ children,
20
+ });
21
+ }
@@ -9,7 +9,8 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
- import { HeadingLevel, Paragraph } from 'docx';
12
+ import { Bookmark, HeadingLevel, Paragraph } from 'docx';
13
+
13
14
  import all from '../all.js';
14
15
 
15
16
  const DEPTHS = [
@@ -23,6 +24,13 @@ const DEPTHS = [
23
24
 
24
25
  export default async function heading(ctx, node, parent) {
25
26
  const children = await all(ctx, node);
27
+
28
+ if (node.anchor) {
29
+ children.unshift(new Bookmark({
30
+ id: node.anchor,
31
+ children: [],
32
+ }));
33
+ }
26
34
  return new Paragraph({
27
35
  heading: DEPTHS[node.depth - 1],
28
36
  children,
@@ -9,10 +9,13 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
-
12
+ import all from '../all.js';
13
+ import bookmark from './bookmark.js';
13
14
  import brk from './break.js';
14
15
  import characterStyle from './characterStyle.js';
15
16
  import code from './code.js';
17
+ import gridTable from './gridTable.js';
18
+ import gtRow from './gtRow.js';
16
19
  import heading from './heading.js';
17
20
  import html from './html.js';
18
21
  import image from './image.js';
@@ -24,24 +27,24 @@ import paragraph from './paragraph.js';
24
27
  import paragraphStyle from './paragraphStyle.js';
25
28
  import root from './root.js';
26
29
  import table from './table.js';
27
- import tableRow from './tableRow.js';
28
30
  import tableCell from './tableCell.js';
31
+ import tableRow from './tableRow.js';
29
32
  import text from './text.js';
30
33
  import thematicBreak from './thematicBreak.js';
31
- import gridTable from './gridTable.js';
32
- import gtRow from './gtRow.js';
33
- import all from '../all.js';
34
34
 
35
35
  export default {
36
36
  blockquote: paragraphStyle('Quote'),
37
+ bookmark,
37
38
  break: brk,
38
39
  code,
39
40
  delete: characterStyle('strike'),
40
41
  emphasis: characterStyle('italics'),
41
- strong: characterStyle('bold'),
42
- underline: characterStyle('underline'),
43
- subscript: characterStyle('subScript'),
44
- superscript: characterStyle('superScript'),
42
+ gridTable,
43
+ gtBody: all,
44
+ gtCell: tableCell,
45
+ gtFooter: all,
46
+ gtHeader: all,
47
+ gtRow,
45
48
  heading,
46
49
  html,
47
50
  image,
@@ -51,15 +54,13 @@ export default {
51
54
  listItem,
52
55
  paragraph,
53
56
  root,
57
+ strong: characterStyle('bold'),
58
+ subscript: characterStyle('subScript'),
59
+ superscript: characterStyle('superScript'),
54
60
  table,
55
- tableRow,
56
61
  tableCell,
62
+ tableRow,
57
63
  text,
58
64
  thematicBreak,
59
- gridTable,
60
- gtRow,
61
- gtHeader: all,
62
- gtFooter: all,
63
- gtBody: all,
64
- gtCell: tableCell,
65
+ underline: characterStyle('underline'),
65
66
  };
@@ -9,7 +9,9 @@
9
9
  * OF ANY KIND, either express or implied. See the License for the specific language
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
- import { ExternalHyperlink, ImageRun, TextRun } from 'docx';
12
+ import {
13
+ ExternalHyperlink, ImageRun, InternalHyperlink, TextRun,
14
+ } from 'docx';
13
15
  import all from '../all.js';
14
16
 
15
17
  /**
@@ -49,6 +51,12 @@ export default async function link(ctx, node) {
49
51
  }));
50
52
  }
51
53
  }
54
+ } else if (node.url.startsWith('#')) {
55
+ // Link to the headings
56
+ result.push(new InternalHyperlink({
57
+ children,
58
+ anchor: node.anchor,
59
+ }));
52
60
  } else {
53
61
  result.push(new ExternalHyperlink({
54
62
  children,
@@ -21,6 +21,7 @@ import sanitizeHtml from './mdast-sanitize-html.js';
21
21
  // import { openArrayBuffer } from '../zipfile.js';
22
22
  import { findXMLComponent } from './utils.js';
23
23
  import downloadImages from './mdast-download-images.js';
24
+ import { buildAnchors } from './mdast-docx-anchors.js';
24
25
 
25
26
  export default async function mdast2docx(mdast, opts = {}) {
26
27
  const {
@@ -54,7 +55,7 @@ export default async function mdast2docx(mdast, opts = {}) {
54
55
  // process.stdout.write('==================================================\n');
55
56
 
56
57
  await downloadImages(ctx, mdast);
57
-
58
+ await buildAnchors(mdast);
58
59
  const children = await all(ctx, mdast);
59
60
 
60
61
  if (!stylesXML) {
@@ -0,0 +1,102 @@
1
+ /*
2
+ * Copyright 2023 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { visit } from 'unist-util-visit';
13
+ import { toString } from 'mdast-util-to-string';
14
+ import { slug } from 'github-slugger';
15
+
16
+ class GHSlugger {
17
+ constructor() {
18
+ this.occurrences = {};
19
+ }
20
+
21
+ /**
22
+ * Generate a unique slug.
23
+ * @param {string} value String of text to slugify
24
+ * @return {string} A unique slug string
25
+ */
26
+ slug(value) {
27
+ let id = slug(value)
28
+ // remove leading numbers
29
+ .replace(/^\d+-+/, '');
30
+
31
+ // resolve collisions
32
+ const original = id;
33
+ while (id in this.occurrences) {
34
+ this.occurrences[original] += 1;
35
+ id = `${original}-${this.occurrences[original]}`;
36
+ }
37
+ this.occurrences[id] = 0;
38
+ return id;
39
+ }
40
+ }
41
+
42
+ export function buildAnchors(tree) {
43
+ const tracking = {};
44
+ const slugger = new GHSlugger();
45
+
46
+ const track = (url) => {
47
+ let ref = tracking[url];
48
+ if (!ref) {
49
+ ref = { links: [], heading: null, bookmark: null };
50
+ tracking[url] = ref;
51
+ }
52
+ return ref;
53
+ };
54
+
55
+ visit(tree, (node) => {
56
+ if (node.type === 'link' && node.url.startsWith('#')) {
57
+ const ref = track(node.url);
58
+ ref.links.push(node);
59
+ // special case: link to top of page
60
+ if (node.url === '#') {
61
+ // eslint-disable-next-line no-param-reassign
62
+ node.anchor = '_top';
63
+ }
64
+ } else if (node.type === 'link' && node.anchor) {
65
+ // eslint-disable-next-line no-param-reassign
66
+ node.type = 'bookmark';
67
+ track(`#${node.anchor}`).bookmark = node;
68
+ } else if (node.type === 'heading') {
69
+ const anchor = `#${slugger.slug(toString(node))}`;
70
+ track(anchor).heading = node;
71
+ }
72
+ return visit.CONTINUE;
73
+ });
74
+
75
+ const anchors = {};
76
+ Object.keys(tracking).forEach((k) => {
77
+ const ref = tracking[k];
78
+ if (ref.heading) {
79
+ // ms-word heading bookmark algorithm
80
+ const words = toString(ref.heading).split(/\s+/).slice(0, 3);
81
+ let anchor = `_${words.join('_')}`.substring(0, 36);
82
+
83
+ // resolve collisions
84
+ const original = anchor;
85
+ while (anchor in anchors) {
86
+ anchors[original] += 1;
87
+ anchor = `${original}${anchors[original]}`;
88
+ }
89
+ anchors[anchor] = 0;
90
+
91
+ ref.heading.anchor = anchor;
92
+ for (const link of ref.links) {
93
+ link.anchor = anchor;
94
+ }
95
+ } else if (ref.bookmark) {
96
+ const { anchor } = ref.bookmark;
97
+ for (const link of ref.links) {
98
+ link.anchor = anchor;
99
+ }
100
+ }
101
+ });
102
+ }
@@ -29,6 +29,32 @@ function formatHandler(type) {
29
29
  };
30
30
  }
31
31
 
32
+ /**
33
+ * @param {State} state
34
+ * State.
35
+ * @param {Readonly<Element>} node
36
+ * hast element to transform.
37
+ * @returns {Link}
38
+ * mdast node.
39
+ */
40
+ export function linkHandler(state, node) {
41
+ const properties = node.properties || {};
42
+ // Allow potentially “invalid” nodes, they might be unknown.
43
+ // We also support straddling later.
44
+ const children = /** @type {Array<PhrasingContent>} */ (state.all(node));
45
+
46
+ /** @type {Link} */
47
+ const result = {
48
+ type: 'link',
49
+ url: state.resolve(String(properties.href || '') || null),
50
+ title: properties.title ? String(properties.title) : null,
51
+ anchor: properties.name ?? properties.id,
52
+ children,
53
+ };
54
+ state.patch(node, result);
55
+ return result;
56
+ }
57
+
32
58
  /**
33
59
  * removes paragraphs from the child nodes recursively.
34
60
  * @param node
@@ -134,6 +160,7 @@ export default function sanitizeHtml(tree) {
134
160
  document: false,
135
161
  handlers: {
136
162
  ...defaultHandlers,
163
+ a: linkHandler,
137
164
  u: formatHandler('underline'),
138
165
  sub: formatHandler('subscript'),
139
166
  sup: formatHandler('superscript'),