@adobe/helix-md2docx 2.1.28 → 2.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +4 -2
- package/src/mdast2docx/handlers/bookmark.js +21 -0
- package/src/mdast2docx/handlers/heading.js +9 -1
- package/src/mdast2docx/handlers/index.js +17 -16
- package/src/mdast2docx/handlers/link.js +9 -1
- package/src/mdast2docx/index.js +2 -1
- package/src/mdast2docx/mdast-docx-anchors.js +102 -0
- package/src/mdast2docx/mdast-sanitize-html.js +27 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
## [2.1.30](https://github.com/adobe/helix-md2docx/compare/v2.1.29...v2.1.30) (2023-09-30)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* Support in-document Heading Anchors ([b79b2ce](https://github.com/adobe/helix-md2docx/commit/b79b2cea8a091d4046715829f35f4fbb4ede86d2)), closes [#327](https://github.com/adobe/helix-md2docx/issues/327)
|
|
7
|
+
|
|
8
|
+
## [2.1.29](https://github.com/adobe/helix-md2docx/compare/v2.1.28...v2.1.29) (2023-09-29)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* **deps:** update dependency @adobe/helix-docx2md to v1.4.18 ([#333](https://github.com/adobe/helix-md2docx/issues/333)) ([5c67511](https://github.com/adobe/helix-md2docx/commit/5c675118eb922c7e7fab0287191280368c184558))
|
|
14
|
+
|
|
1
15
|
## [2.1.28](https://github.com/adobe/helix-md2docx/compare/v2.1.27...v2.1.28) (2023-09-29)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/helix-md2docx",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.30",
|
|
4
4
|
"description": "Helix Service that converts markdown to word documents",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -28,14 +28,16 @@
|
|
|
28
28
|
"homepage": "https://github.com/adobe/helix-md2docx#readme",
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@adobe/fetch": "4.1.0",
|
|
31
|
-
"@adobe/helix-docx2md": "1.4.
|
|
31
|
+
"@adobe/helix-docx2md": "1.4.18",
|
|
32
32
|
"@adobe/helix-markdown-support": "7.0.0",
|
|
33
33
|
"@adobe/helix-shared-process-queue": "3.0.0",
|
|
34
34
|
"@adobe/remark-gridtables": "2.0.1",
|
|
35
35
|
"docx": "8.2.2",
|
|
36
|
+
"github-slugger": "2.0.0",
|
|
36
37
|
"hast-util-is-element": "3.0.0",
|
|
37
38
|
"hast-util-to-mdast": "10.0.1",
|
|
38
39
|
"image-size": "1.0.2",
|
|
40
|
+
"mdast-util-to-string": "4.0.0",
|
|
39
41
|
"mime": "3.0.0",
|
|
40
42
|
"rehype-parse": "9.0.0",
|
|
41
43
|
"remark-gfm": "4.0.0",
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2023 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { Bookmark } from 'docx';
|
|
13
|
+
import all from '../all.js';
|
|
14
|
+
|
|
15
|
+
export default async function bookmark(ctx, node) {
|
|
16
|
+
const children = await all(ctx, node);
|
|
17
|
+
return new Bookmark({
|
|
18
|
+
id: node.anchor,
|
|
19
|
+
children,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
-
import { HeadingLevel, Paragraph } from 'docx';
|
|
12
|
+
import { Bookmark, HeadingLevel, Paragraph } from 'docx';
|
|
13
|
+
|
|
13
14
|
import all from '../all.js';
|
|
14
15
|
|
|
15
16
|
const DEPTHS = [
|
|
@@ -23,6 +24,13 @@ const DEPTHS = [
|
|
|
23
24
|
|
|
24
25
|
export default async function heading(ctx, node, parent) {
|
|
25
26
|
const children = await all(ctx, node);
|
|
27
|
+
|
|
28
|
+
if (node.anchor) {
|
|
29
|
+
children.unshift(new Bookmark({
|
|
30
|
+
id: node.anchor,
|
|
31
|
+
children: [],
|
|
32
|
+
}));
|
|
33
|
+
}
|
|
26
34
|
return new Paragraph({
|
|
27
35
|
heading: DEPTHS[node.depth - 1],
|
|
28
36
|
children,
|
|
@@ -9,10 +9,13 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
-
|
|
12
|
+
import all from '../all.js';
|
|
13
|
+
import bookmark from './bookmark.js';
|
|
13
14
|
import brk from './break.js';
|
|
14
15
|
import characterStyle from './characterStyle.js';
|
|
15
16
|
import code from './code.js';
|
|
17
|
+
import gridTable from './gridTable.js';
|
|
18
|
+
import gtRow from './gtRow.js';
|
|
16
19
|
import heading from './heading.js';
|
|
17
20
|
import html from './html.js';
|
|
18
21
|
import image from './image.js';
|
|
@@ -24,24 +27,24 @@ import paragraph from './paragraph.js';
|
|
|
24
27
|
import paragraphStyle from './paragraphStyle.js';
|
|
25
28
|
import root from './root.js';
|
|
26
29
|
import table from './table.js';
|
|
27
|
-
import tableRow from './tableRow.js';
|
|
28
30
|
import tableCell from './tableCell.js';
|
|
31
|
+
import tableRow from './tableRow.js';
|
|
29
32
|
import text from './text.js';
|
|
30
33
|
import thematicBreak from './thematicBreak.js';
|
|
31
|
-
import gridTable from './gridTable.js';
|
|
32
|
-
import gtRow from './gtRow.js';
|
|
33
|
-
import all from '../all.js';
|
|
34
34
|
|
|
35
35
|
export default {
|
|
36
36
|
blockquote: paragraphStyle('Quote'),
|
|
37
|
+
bookmark,
|
|
37
38
|
break: brk,
|
|
38
39
|
code,
|
|
39
40
|
delete: characterStyle('strike'),
|
|
40
41
|
emphasis: characterStyle('italics'),
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
gridTable,
|
|
43
|
+
gtBody: all,
|
|
44
|
+
gtCell: tableCell,
|
|
45
|
+
gtFooter: all,
|
|
46
|
+
gtHeader: all,
|
|
47
|
+
gtRow,
|
|
45
48
|
heading,
|
|
46
49
|
html,
|
|
47
50
|
image,
|
|
@@ -51,15 +54,13 @@ export default {
|
|
|
51
54
|
listItem,
|
|
52
55
|
paragraph,
|
|
53
56
|
root,
|
|
57
|
+
strong: characterStyle('bold'),
|
|
58
|
+
subscript: characterStyle('subScript'),
|
|
59
|
+
superscript: characterStyle('superScript'),
|
|
54
60
|
table,
|
|
55
|
-
tableRow,
|
|
56
61
|
tableCell,
|
|
62
|
+
tableRow,
|
|
57
63
|
text,
|
|
58
64
|
thematicBreak,
|
|
59
|
-
|
|
60
|
-
gtRow,
|
|
61
|
-
gtHeader: all,
|
|
62
|
-
gtFooter: all,
|
|
63
|
-
gtBody: all,
|
|
64
|
-
gtCell: tableCell,
|
|
65
|
+
underline: characterStyle('underline'),
|
|
65
66
|
};
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
ExternalHyperlink, ImageRun, InternalHyperlink, TextRun,
|
|
14
|
+
} from 'docx';
|
|
13
15
|
import all from '../all.js';
|
|
14
16
|
|
|
15
17
|
/**
|
|
@@ -49,6 +51,12 @@ export default async function link(ctx, node) {
|
|
|
49
51
|
}));
|
|
50
52
|
}
|
|
51
53
|
}
|
|
54
|
+
} else if (node.url.startsWith('#')) {
|
|
55
|
+
// Link to the headings
|
|
56
|
+
result.push(new InternalHyperlink({
|
|
57
|
+
children,
|
|
58
|
+
anchor: node.anchor,
|
|
59
|
+
}));
|
|
52
60
|
} else {
|
|
53
61
|
result.push(new ExternalHyperlink({
|
|
54
62
|
children,
|
package/src/mdast2docx/index.js
CHANGED
|
@@ -21,6 +21,7 @@ import sanitizeHtml from './mdast-sanitize-html.js';
|
|
|
21
21
|
// import { openArrayBuffer } from '../zipfile.js';
|
|
22
22
|
import { findXMLComponent } from './utils.js';
|
|
23
23
|
import downloadImages from './mdast-download-images.js';
|
|
24
|
+
import { buildAnchors } from './mdast-docx-anchors.js';
|
|
24
25
|
|
|
25
26
|
export default async function mdast2docx(mdast, opts = {}) {
|
|
26
27
|
const {
|
|
@@ -54,7 +55,7 @@ export default async function mdast2docx(mdast, opts = {}) {
|
|
|
54
55
|
// process.stdout.write('==================================================\n');
|
|
55
56
|
|
|
56
57
|
await downloadImages(ctx, mdast);
|
|
57
|
-
|
|
58
|
+
await buildAnchors(mdast);
|
|
58
59
|
const children = await all(ctx, mdast);
|
|
59
60
|
|
|
60
61
|
if (!stylesXML) {
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2023 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { visit } from 'unist-util-visit';
|
|
13
|
+
import { toString } from 'mdast-util-to-string';
|
|
14
|
+
import { slug } from 'github-slugger';
|
|
15
|
+
|
|
16
|
+
class GHSlugger {
|
|
17
|
+
constructor() {
|
|
18
|
+
this.occurrences = {};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Generate a unique slug.
|
|
23
|
+
* @param {string} value String of text to slugify
|
|
24
|
+
* @return {string} A unique slug string
|
|
25
|
+
*/
|
|
26
|
+
slug(value) {
|
|
27
|
+
let id = slug(value)
|
|
28
|
+
// remove leading numbers
|
|
29
|
+
.replace(/^\d+-+/, '');
|
|
30
|
+
|
|
31
|
+
// resolve collisions
|
|
32
|
+
const original = id;
|
|
33
|
+
while (id in this.occurrences) {
|
|
34
|
+
this.occurrences[original] += 1;
|
|
35
|
+
id = `${original}-${this.occurrences[original]}`;
|
|
36
|
+
}
|
|
37
|
+
this.occurrences[id] = 0;
|
|
38
|
+
return id;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function buildAnchors(tree) {
|
|
43
|
+
const tracking = {};
|
|
44
|
+
const slugger = new GHSlugger();
|
|
45
|
+
|
|
46
|
+
const track = (url) => {
|
|
47
|
+
let ref = tracking[url];
|
|
48
|
+
if (!ref) {
|
|
49
|
+
ref = { links: [], heading: null, bookmark: null };
|
|
50
|
+
tracking[url] = ref;
|
|
51
|
+
}
|
|
52
|
+
return ref;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
visit(tree, (node) => {
|
|
56
|
+
if (node.type === 'link' && node.url.startsWith('#')) {
|
|
57
|
+
const ref = track(node.url);
|
|
58
|
+
ref.links.push(node);
|
|
59
|
+
// special case: link to top of page
|
|
60
|
+
if (node.url === '#') {
|
|
61
|
+
// eslint-disable-next-line no-param-reassign
|
|
62
|
+
node.anchor = '_top';
|
|
63
|
+
}
|
|
64
|
+
} else if (node.type === 'link' && node.anchor) {
|
|
65
|
+
// eslint-disable-next-line no-param-reassign
|
|
66
|
+
node.type = 'bookmark';
|
|
67
|
+
track(`#${node.anchor}`).bookmark = node;
|
|
68
|
+
} else if (node.type === 'heading') {
|
|
69
|
+
const anchor = `#${slugger.slug(toString(node))}`;
|
|
70
|
+
track(anchor).heading = node;
|
|
71
|
+
}
|
|
72
|
+
return visit.CONTINUE;
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
const anchors = {};
|
|
76
|
+
Object.keys(tracking).forEach((k) => {
|
|
77
|
+
const ref = tracking[k];
|
|
78
|
+
if (ref.heading) {
|
|
79
|
+
// ms-word heading bookmark algorithm
|
|
80
|
+
const words = toString(ref.heading).split(/\s+/).slice(0, 3);
|
|
81
|
+
let anchor = `_${words.join('_')}`.substring(0, 36);
|
|
82
|
+
|
|
83
|
+
// resolve collisions
|
|
84
|
+
const original = anchor;
|
|
85
|
+
while (anchor in anchors) {
|
|
86
|
+
anchors[original] += 1;
|
|
87
|
+
anchor = `${original}${anchors[original]}`;
|
|
88
|
+
}
|
|
89
|
+
anchors[anchor] = 0;
|
|
90
|
+
|
|
91
|
+
ref.heading.anchor = anchor;
|
|
92
|
+
for (const link of ref.links) {
|
|
93
|
+
link.anchor = anchor;
|
|
94
|
+
}
|
|
95
|
+
} else if (ref.bookmark) {
|
|
96
|
+
const { anchor } = ref.bookmark;
|
|
97
|
+
for (const link of ref.links) {
|
|
98
|
+
link.anchor = anchor;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
}
|
|
@@ -29,6 +29,32 @@ function formatHandler(type) {
|
|
|
29
29
|
};
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
/**
|
|
33
|
+
* @param {State} state
|
|
34
|
+
* State.
|
|
35
|
+
* @param {Readonly<Element>} node
|
|
36
|
+
* hast element to transform.
|
|
37
|
+
* @returns {Link}
|
|
38
|
+
* mdast node.
|
|
39
|
+
*/
|
|
40
|
+
export function linkHandler(state, node) {
|
|
41
|
+
const properties = node.properties || {};
|
|
42
|
+
// Allow potentially “invalid” nodes, they might be unknown.
|
|
43
|
+
// We also support straddling later.
|
|
44
|
+
const children = /** @type {Array<PhrasingContent>} */ (state.all(node));
|
|
45
|
+
|
|
46
|
+
/** @type {Link} */
|
|
47
|
+
const result = {
|
|
48
|
+
type: 'link',
|
|
49
|
+
url: state.resolve(String(properties.href || '') || null),
|
|
50
|
+
title: properties.title ? String(properties.title) : null,
|
|
51
|
+
anchor: properties.name ?? properties.id,
|
|
52
|
+
children,
|
|
53
|
+
};
|
|
54
|
+
state.patch(node, result);
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
|
|
32
58
|
/**
|
|
33
59
|
* removes paragraphs from the child nodes recursively.
|
|
34
60
|
* @param node
|
|
@@ -134,6 +160,7 @@ export default function sanitizeHtml(tree) {
|
|
|
134
160
|
document: false,
|
|
135
161
|
handlers: {
|
|
136
162
|
...defaultHandlers,
|
|
163
|
+
a: linkHandler,
|
|
137
164
|
u: formatHandler('underline'),
|
|
138
165
|
sub: formatHandler('subscript'),
|
|
139
166
|
sup: formatHandler('superscript'),
|