@adobe/helix-md2docx 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/CODE_OF_CONDUCT.md +74 -0
- package/CONTRIBUTING.md +74 -0
- package/LICENSE.txt +264 -0
- package/README.md +33 -0
- package/package.json +70 -0
- package/src/cli/convert2docx.js +96 -0
- package/src/index.d.ts +13 -0
- package/src/index.js +13 -0
- package/src/mdast2docx/all.js +31 -0
- package/src/mdast2docx/default-numbering.js +79 -0
- package/src/mdast2docx/handlers/break.js +22 -0
- package/src/mdast2docx/handlers/characterStyle.js +29 -0
- package/src/mdast2docx/handlers/code.js +27 -0
- package/src/mdast2docx/handlers/heading.js +32 -0
- package/src/mdast2docx/handlers/html.js +35 -0
- package/src/mdast2docx/handlers/image.js +90 -0
- package/src/mdast2docx/handlers/index.js +56 -0
- package/src/mdast2docx/handlers/inlineCode.js +21 -0
- package/src/mdast2docx/handlers/link.js +63 -0
- package/src/mdast2docx/handlers/list.js +39 -0
- package/src/mdast2docx/handlers/listItem.js +16 -0
- package/src/mdast2docx/handlers/paragraph.js +52 -0
- package/src/mdast2docx/handlers/paragraphStyle.js +21 -0
- package/src/mdast2docx/handlers/root.js +16 -0
- package/src/mdast2docx/handlers/table.js +54 -0
- package/src/mdast2docx/handlers/tableCell.js +51 -0
- package/src/mdast2docx/handlers/tableRow.js +28 -0
- package/src/mdast2docx/handlers/text.js +24 -0
- package/src/mdast2docx/handlers/thematicBreak.js +24 -0
- package/src/mdast2docx/hast-table-handler.js +145 -0
- package/src/mdast2docx/index.d.ts +21 -0
- package/src/mdast2docx/index.js +88 -0
- package/src/mdast2docx/mdast-download-images.js +92 -0
- package/src/mdast2docx/mdast-sanitize-html.js +112 -0
- package/src/mdast2docx/template/[Content_Types].xml +41 -0
- package/src/mdast2docx/template/docProps/app.xml +20 -0
- package/src/mdast2docx/template/docProps/core.xml +12 -0
- package/src/mdast2docx/template/word/_rels/document.xml.rels +51 -0
- package/src/mdast2docx/template/word/_rels/settings.xml.rels +7 -0
- package/src/mdast2docx/template/word/document.xml +1116 -0
- package/src/mdast2docx/template/word/endnotes.xml +56 -0
- package/src/mdast2docx/template/word/fontTable.xml +58 -0
- package/src/mdast2docx/template/word/footer1.xml +39 -0
- package/src/mdast2docx/template/word/footer2.xml +39 -0
- package/src/mdast2docx/template/word/footer3.xml +39 -0
- package/src/mdast2docx/template/word/footnotes.xml +56 -0
- package/src/mdast2docx/template/word/header1.xml +39 -0
- package/src/mdast2docx/template/word/header2.xml +39 -0
- package/src/mdast2docx/template/word/header3.xml +39 -0
- package/src/mdast2docx/template/word/media/image1.png +0 -0
- package/src/mdast2docx/template/word/numbering.xml +277 -0
- package/src/mdast2docx/template/word/settings.xml +91 -0
- package/src/mdast2docx/template/word/styles.xml +1084 -0
- package/src/mdast2docx/template/word/theme/theme1.xml +296 -0
- package/src/mdast2docx/template/word/webSettings.xml +40 -0
- package/src/mdast2docx/template.docx +0 -0
- package/src/mdast2docx/utils.js +22 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import docx from 'docx';
|
|
13
|
+
import all from '../all.js';
|
|
14
|
+
|
|
15
|
+
const { Paragraph, Table, WidthType } = docx;
|
|
16
|
+
|
|
17
|
+
// see http://officeopenxml.com/WPtableWidth.php
|
|
18
|
+
// Note: The 2006 version of the OOXML standard specified that the value was to be a decimal.
|
|
19
|
+
// When type="pct", the value was interpreted as fifths of a percent, so 4975=99.5%,
|
|
20
|
+
// and no % symbol was included in the attribute. In the 2011 version the value can be either a
|
|
21
|
+
// decimal or a percent, so a % symbol should be included when type="pct".
|
|
22
|
+
|
|
23
|
+
export default async function table(ctx, node) {
|
|
24
|
+
const oldTable = ctx.table;
|
|
25
|
+
ctx.table = {
|
|
26
|
+
// remember the table width (the column width will be calculated in the tableRow handler)
|
|
27
|
+
// default width: Letter Width - Margin = 8.5" - 2" = 6.5". the unit is 1/1440 inches.
|
|
28
|
+
width: oldTable ? oldTable.columnWidth : 1440 * 6.5,
|
|
29
|
+
align: node.align || [],
|
|
30
|
+
};
|
|
31
|
+
// process the rows
|
|
32
|
+
const rows = await all(ctx, node);
|
|
33
|
+
|
|
34
|
+
// and remember the column width
|
|
35
|
+
const { columnWidth } = ctx.table;
|
|
36
|
+
ctx.table = oldTable;
|
|
37
|
+
|
|
38
|
+
// use the same width for all columns
|
|
39
|
+
const numCols = rows.length ? rows[0].CellCount : 0;
|
|
40
|
+
const columnWidths = new Array(numCols).fill(Math.round(columnWidth));
|
|
41
|
+
|
|
42
|
+
const tbl = new Table({
|
|
43
|
+
style: 'PageBlock',
|
|
44
|
+
rows,
|
|
45
|
+
columnWidths,
|
|
46
|
+
width: {
|
|
47
|
+
size: 100,
|
|
48
|
+
type: WidthType.PERCENTAGE,
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// add empty paragraph for better separation in word
|
|
53
|
+
return [tbl, new Paragraph([])];
|
|
54
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import docx from 'docx';
|
|
13
|
+
import all from '../all.js';
|
|
14
|
+
|
|
15
|
+
const {
|
|
16
|
+
AlignmentType, Paragraph, Table, TableCell,
|
|
17
|
+
} = docx;
|
|
18
|
+
|
|
19
|
+
const ALIGN = {
|
|
20
|
+
left: null,
|
|
21
|
+
right: AlignmentType.RIGHT,
|
|
22
|
+
center: AlignmentType.CENTER,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export default async function tableCell(ctx, node, parent, siblings) {
|
|
26
|
+
const children = await all(ctx, node);
|
|
27
|
+
const alignment = ALIGN[ctx.table.align[siblings.length]];
|
|
28
|
+
|
|
29
|
+
const content = [];
|
|
30
|
+
let leaves = [];
|
|
31
|
+
// wrap non block elements with paragraph
|
|
32
|
+
for (let i = 0; i < children.length; i += 1) {
|
|
33
|
+
const child = children[i];
|
|
34
|
+
if ((child instanceof Paragraph) || (child instanceof Table)) {
|
|
35
|
+
if (leaves.length) {
|
|
36
|
+
content.push(new Paragraph({ alignment, children: leaves }));
|
|
37
|
+
}
|
|
38
|
+
content.push(child);
|
|
39
|
+
leaves = [];
|
|
40
|
+
} else {
|
|
41
|
+
leaves.push(child);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (leaves.length) {
|
|
45
|
+
content.push(new Paragraph({ alignment, children: leaves }));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return new TableCell({
|
|
49
|
+
children: content,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import docx from 'docx';
|
|
13
|
+
import all from '../all.js';
|
|
14
|
+
|
|
15
|
+
const { TableRow } = docx;
|
|
16
|
+
|
|
17
|
+
export default async function tableRow(ctx, node, parent, siblings) {
|
|
18
|
+
// adjust columnWidth
|
|
19
|
+
if (!ctx.table.columnWidth) {
|
|
20
|
+
ctx.table.columnWidth = ctx.table.width / node.children.length;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const children = await all(ctx, node);
|
|
24
|
+
return new TableRow({
|
|
25
|
+
children,
|
|
26
|
+
tableHeader: siblings.length === 0,
|
|
27
|
+
});
|
|
28
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import docx from 'docx';
|
|
13
|
+
|
|
14
|
+
const { TextRun } = docx;
|
|
15
|
+
|
|
16
|
+
export default function textNode(ctx, node) {
|
|
17
|
+
return node.value.split('\n').map((text, idx) => (
|
|
18
|
+
new TextRun({
|
|
19
|
+
...ctx.style,
|
|
20
|
+
text,
|
|
21
|
+
break: idx > 0 ? 1 : 0,
|
|
22
|
+
})
|
|
23
|
+
));
|
|
24
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import docx from 'docx';
|
|
13
|
+
|
|
14
|
+
const { Paragraph } = docx;
|
|
15
|
+
|
|
16
|
+
export default function thematicBreak() {
|
|
17
|
+
return new Paragraph({
|
|
18
|
+
text: '---',
|
|
19
|
+
spacing: {
|
|
20
|
+
before: 250,
|
|
21
|
+
after: 250,
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
'use strict';
|
|
14
|
+
|
|
15
|
+
import { convertElement } from 'hast-util-is-element';
|
|
16
|
+
import { visit } from 'unist-util-visit';
|
|
17
|
+
import { all } from 'hast-util-to-mdast/lib/all.js';
|
|
18
|
+
|
|
19
|
+
const thead = convertElement('thead');
|
|
20
|
+
const tr = convertElement('tr');
|
|
21
|
+
const cell = convertElement(['th', 'td']);
|
|
22
|
+
|
|
23
|
+
/*
|
|
24
|
+
copied and adapted from
|
|
25
|
+
https://github.com/syntax-tree/hast-util-to-mdast/blob/7.1.3/lib/handlers/table.js
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
// Infer whether the HTML table has a head and how it aligns.
|
|
29
|
+
function inspect(node) {
|
|
30
|
+
let headless = true;
|
|
31
|
+
const align = [];
|
|
32
|
+
let rowIndex = 0;
|
|
33
|
+
let cellIndex = 0;
|
|
34
|
+
|
|
35
|
+
function visitor(child) {
|
|
36
|
+
// If there is a `thead`, assume there is a header row.
|
|
37
|
+
if (thead(child)) {
|
|
38
|
+
headless = false;
|
|
39
|
+
} else if (tr(child)) {
|
|
40
|
+
rowIndex += 1;
|
|
41
|
+
cellIndex = 0;
|
|
42
|
+
} else if (cell(child)) {
|
|
43
|
+
if (align[cellIndex] === undefined) {
|
|
44
|
+
align[cellIndex] = child.properties.align || null;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// If there is a th in the first row, assume there is a header row.
|
|
48
|
+
if (headless && rowIndex < 2 && child.tagName === 'th') {
|
|
49
|
+
headless = false;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
cellIndex += 1;
|
|
53
|
+
return visit.SKIP;
|
|
54
|
+
}
|
|
55
|
+
return visit.CONTINUE;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
visit(node, 'element', visitor);
|
|
59
|
+
|
|
60
|
+
return { align, headless };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Ensure the cells in a row are properly structured.
|
|
64
|
+
function toCells(children, info) {
|
|
65
|
+
const nodes = [];
|
|
66
|
+
let queue;
|
|
67
|
+
|
|
68
|
+
children.forEach((node) => {
|
|
69
|
+
if (node.type === 'tableCell') {
|
|
70
|
+
if (queue) {
|
|
71
|
+
// eslint-disable-next-line no-param-reassign
|
|
72
|
+
node.children = queue.concat(node.children);
|
|
73
|
+
queue = undefined;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
nodes.push(node);
|
|
77
|
+
} else {
|
|
78
|
+
if (!queue) {
|
|
79
|
+
queue = [];
|
|
80
|
+
}
|
|
81
|
+
queue.push(node);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
if (queue) {
|
|
86
|
+
let node = nodes[nodes.length - 1];
|
|
87
|
+
|
|
88
|
+
if (!node) {
|
|
89
|
+
node = { type: 'tableCell', children: [] };
|
|
90
|
+
nodes.push(node);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
node.children = node.children.concat(queue);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// add empty cells if there are more in the table
|
|
97
|
+
for (let index = nodes.length; index < info.align.length; index += 1) {
|
|
98
|
+
nodes.push({ type: 'tableCell', children: [] });
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return nodes;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Ensure the rows are properly structured.
|
|
105
|
+
function toRows(children, info) {
|
|
106
|
+
const nodes = [];
|
|
107
|
+
let queue;
|
|
108
|
+
|
|
109
|
+
// Add an empty header row.
|
|
110
|
+
// we don't need extra header rows
|
|
111
|
+
// if (info.headless) {
|
|
112
|
+
// nodes.push({ type: 'tableRow', children: [] });
|
|
113
|
+
// }
|
|
114
|
+
|
|
115
|
+
children.forEach((node) => {
|
|
116
|
+
if (node.type === 'tableRow') {
|
|
117
|
+
if (queue) {
|
|
118
|
+
// eslint-disable-next-line no-param-reassign
|
|
119
|
+
node.children = queue.concat(node.children);
|
|
120
|
+
queue = undefined;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
nodes.push(node);
|
|
124
|
+
} else {
|
|
125
|
+
if (!queue) queue = [];
|
|
126
|
+
queue.push(node);
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
if (queue) {
|
|
131
|
+
const node = nodes[nodes.length - 1];
|
|
132
|
+
node.children = node.children.concat(queue);
|
|
133
|
+
}
|
|
134
|
+
nodes.forEach((node) => {
|
|
135
|
+
// eslint-disable-next-line no-param-reassign
|
|
136
|
+
node.children = toCells(node.children, info);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
return nodes;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export default function table(h, node) {
|
|
143
|
+
const info = inspect(node);
|
|
144
|
+
return h(node, 'table', { align: info.align }, toRows(all(h, node), info));
|
|
145
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2022 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
declare interface Logger {}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Converts the mdast to a word document (docx).
|
|
16
|
+
*
|
|
17
|
+
* @param {Node} mdast The mdast
|
|
18
|
+
* @param {Logger} [log] a console like logger
|
|
19
|
+
* @returns {Promise<Buffer>} the docx
|
|
20
|
+
*/
|
|
21
|
+
export default function mdast2docx(mdast: object, log: Logger): Promise<Buffer>;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { readFile } from 'fs/promises';
|
|
13
|
+
import path from 'path';
|
|
14
|
+
import { dirname } from 'dirname-filename-esm';
|
|
15
|
+
import docx from 'docx';
|
|
16
|
+
|
|
17
|
+
import all from './all.js';
|
|
18
|
+
import handlers from './handlers/index.js';
|
|
19
|
+
import numbering from './default-numbering.js';
|
|
20
|
+
import sanitizeHtml from './mdast-sanitize-html.js';
|
|
21
|
+
// import { openArrayBuffer } from '../zipfile.js';
|
|
22
|
+
import { findXMLComponent } from './utils.js';
|
|
23
|
+
import downloadImages from './mdast-download-images.js';
|
|
24
|
+
|
|
25
|
+
const { Document, Packer } = docx;
|
|
26
|
+
|
|
27
|
+
// eslint-disable-next-line no-underscore-dangle
|
|
28
|
+
const __dirname = dirname(import.meta);
|
|
29
|
+
|
|
30
|
+
export default async function mdast2docx(mdast, log = console) {
|
|
31
|
+
const ctx = {
|
|
32
|
+
handlers,
|
|
33
|
+
style: {},
|
|
34
|
+
paragraphStyle: '',
|
|
35
|
+
images: {},
|
|
36
|
+
listLevel: -1,
|
|
37
|
+
lists: [],
|
|
38
|
+
log,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// eslint-disable-next-line no-param-reassign
|
|
42
|
+
mdast = sanitizeHtml(mdast);
|
|
43
|
+
|
|
44
|
+
// process.stdout.write('==================================================\n');
|
|
45
|
+
// process.stdout.write(inspect(mdast));
|
|
46
|
+
// process.stdout.write('\n');
|
|
47
|
+
// process.stdout.write('==================================================\n');
|
|
48
|
+
|
|
49
|
+
await downloadImages(ctx, mdast);
|
|
50
|
+
|
|
51
|
+
const children = await all(ctx, mdast);
|
|
52
|
+
|
|
53
|
+
// read styles from template.docx. this seems to be the most reliable
|
|
54
|
+
// const templateDoc = await readFile(path.resolve(__dirname, 'template.docx'));
|
|
55
|
+
// const zip = await openArrayBuffer(templateDoc);
|
|
56
|
+
// const styleXML = await zip.read('word/styles.xml', 'utf-8');
|
|
57
|
+
const styleXML = await readFile(path.resolve(__dirname, 'template', 'word', 'styles.xml'), 'utf-8');
|
|
58
|
+
|
|
59
|
+
const doc = new Document({
|
|
60
|
+
numbering,
|
|
61
|
+
externalStyles: styleXML,
|
|
62
|
+
sections: [{
|
|
63
|
+
children,
|
|
64
|
+
}],
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// temporary hack for problems with online word
|
|
68
|
+
const cn = doc.numbering.concreteNumberingMap.get('default-bullet-numbering');
|
|
69
|
+
cn.root[0].root.numId = 1;
|
|
70
|
+
cn.numId = 1;
|
|
71
|
+
|
|
72
|
+
// temporary hack for problems with lists in online word
|
|
73
|
+
for (const nb of doc.numbering.abstractNumberingMap.values()) {
|
|
74
|
+
nb.root.forEach((attr) => {
|
|
75
|
+
if (attr.rootKey !== 'w:lvl') {
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const jc = findXMLComponent(attr, 'w:lvlJc');
|
|
79
|
+
if (jc) {
|
|
80
|
+
const idx = attr.root.indexOf(jc);
|
|
81
|
+
attr.root.splice(idx, 1);
|
|
82
|
+
attr.root.push(jc);
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return Packer.toBuffer(doc);
|
|
88
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2020 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
/* eslint-disable no-param-reassign */
|
|
13
|
+
import crypto from 'crypto';
|
|
14
|
+
import { context as fetchAPI, h1 } from '@adobe/helix-fetch';
|
|
15
|
+
import processQueue from '@adobe/helix-shared-process-queue';
|
|
16
|
+
import { visit } from 'unist-util-visit';
|
|
17
|
+
import getDimensions from 'image-size';
|
|
18
|
+
|
|
19
|
+
function createFetchContext() {
|
|
20
|
+
return process.env.HELIX_FETCH_FORCE_HTTP1
|
|
21
|
+
? h1()
|
|
22
|
+
: fetchAPI();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function hsize(bytes, decimals = 2) {
|
|
26
|
+
if (bytes === 0) {
|
|
27
|
+
return '0 ';
|
|
28
|
+
}
|
|
29
|
+
const k = 1024;
|
|
30
|
+
const dm = decimals < 0 ? 0 : decimals;
|
|
31
|
+
const sizes = [' ', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
|
|
32
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
33
|
+
return `${parseFloat((bytes / k ** i).toFixed(dm))} ${sizes[i]}`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export default async function downloadImages(ctx, tree) {
|
|
37
|
+
const context = createFetchContext();
|
|
38
|
+
const { fetch } = context;
|
|
39
|
+
|
|
40
|
+
// gather all image nodes
|
|
41
|
+
const images = [];
|
|
42
|
+
visit(tree, (node) => {
|
|
43
|
+
if (node.type === 'image' && node.url) {
|
|
44
|
+
images.push(node);
|
|
45
|
+
}
|
|
46
|
+
return visit.CONTINUE;
|
|
47
|
+
});
|
|
48
|
+
let count = 0;
|
|
49
|
+
|
|
50
|
+
// download images
|
|
51
|
+
await processQueue(images, async (node) => {
|
|
52
|
+
try {
|
|
53
|
+
const ref = crypto.createHash('sha1')
|
|
54
|
+
.update(node.url)
|
|
55
|
+
.digest('hex');
|
|
56
|
+
const key = `${ref}.png`;
|
|
57
|
+
node.data = ctx.images[key];
|
|
58
|
+
if (node.data) {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let buffer;
|
|
63
|
+
if (node.url.startsWith('data:image/png;base64,')) {
|
|
64
|
+
buffer = Buffer.from(node.url.split(',').pop(), 'base64');
|
|
65
|
+
} else {
|
|
66
|
+
const idx = String(count).padStart(2, ' ');
|
|
67
|
+
count += 1;
|
|
68
|
+
ctx.log.info(`[${idx}] GET ${node.url}`);
|
|
69
|
+
const ret = await fetch(node.url);
|
|
70
|
+
if (!ret.ok) {
|
|
71
|
+
const text = await ret.text();
|
|
72
|
+
ctx.log.error(`[${idx}] ${ret.status} ${text}`);
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
buffer = await ret.buffer();
|
|
76
|
+
ctx.log.info(`[${idx}] ${ret.status} ${hsize(buffer.length).padStart(10)} ${ret.headers.get('content-type')}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
node.data = {
|
|
80
|
+
key,
|
|
81
|
+
buffer,
|
|
82
|
+
dimensions: getDimensions(buffer),
|
|
83
|
+
};
|
|
84
|
+
ctx.images[key] = node.data;
|
|
85
|
+
} catch (error) {
|
|
86
|
+
ctx.log.error(`Cannot download image ${node.url}: ${error.message}`);
|
|
87
|
+
}
|
|
88
|
+
}, 8);
|
|
89
|
+
|
|
90
|
+
// reset fetch context
|
|
91
|
+
context.reset();
|
|
92
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2020 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import { visit } from 'unist-util-visit';
|
|
13
|
+
import { unified } from 'unified';
|
|
14
|
+
import parse from 'rehype-parse';
|
|
15
|
+
import { toMdast } from 'hast-util-to-mdast';
|
|
16
|
+
// import inspect from 'unist-util-inspect';
|
|
17
|
+
import tableHandler from './hast-table-handler.js';
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Creates simple format handler
|
|
21
|
+
* @param type
|
|
22
|
+
*/
|
|
23
|
+
function formatHandler(type) {
|
|
24
|
+
return (h, node) => h(node, type, node.children);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Handler for `<markdown>` elements.
|
|
29
|
+
* @param {[]} mdasts array of mdast sub trees
|
|
30
|
+
*/
|
|
31
|
+
function mdHandler(mdasts) {
|
|
32
|
+
return (h, node) => {
|
|
33
|
+
const { idx } = node.properties;
|
|
34
|
+
return mdasts[idx];
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Sanitizes html:
|
|
40
|
+
* - collapses consecutive html content (simply concat all nodes until the last html sibling)
|
|
41
|
+
* - parses and converts them to mdast again
|
|
42
|
+
*
|
|
43
|
+
* @param {object} tree
|
|
44
|
+
* @returns {object} The modified (original) tree.
|
|
45
|
+
*/
|
|
46
|
+
export default function sanitizeHtml(tree) {
|
|
47
|
+
const mdInserts = [];
|
|
48
|
+
|
|
49
|
+
visit(tree, (node, index, parent) => {
|
|
50
|
+
const { children: siblings = [] } = parent || {};
|
|
51
|
+
|
|
52
|
+
// collapse html blocks
|
|
53
|
+
if (node.type === 'html') {
|
|
54
|
+
// find last html block
|
|
55
|
+
let lastHtml = siblings.length - 1;
|
|
56
|
+
while (lastHtml >= index) {
|
|
57
|
+
if (siblings[lastHtml].type === 'html') {
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
lastHtml -= 1;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
let html = node.value;
|
|
64
|
+
if (lastHtml > index) {
|
|
65
|
+
// remove all html nodes
|
|
66
|
+
const removed = siblings.splice(index + 1, lastHtml - index);
|
|
67
|
+
|
|
68
|
+
// and append to html as special markdown element marker which is then handled in the
|
|
69
|
+
// mdHandler for the `<markdown>` elements.
|
|
70
|
+
removed.forEach((n) => {
|
|
71
|
+
if (n.type === 'html' || n.type === 'text') {
|
|
72
|
+
html += n.value;
|
|
73
|
+
} else {
|
|
74
|
+
html += `<markdown idx="${mdInserts.length}"></markdown>`;
|
|
75
|
+
}
|
|
76
|
+
mdInserts.push(n);
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// try parse html
|
|
81
|
+
const hast = unified()
|
|
82
|
+
.use(parse, { fragment: true })
|
|
83
|
+
.parse(html);
|
|
84
|
+
|
|
85
|
+
// convert to mdast with extra handlers
|
|
86
|
+
const mdast = toMdast(hast, {
|
|
87
|
+
handlers: {
|
|
88
|
+
u: formatHandler('underline'),
|
|
89
|
+
sub: formatHandler('subScript'),
|
|
90
|
+
sup: formatHandler('superScript'),
|
|
91
|
+
table: tableHandler,
|
|
92
|
+
markdown: mdHandler(mdInserts),
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// console.log('************************************');
|
|
97
|
+
// // console.log('>>>>', html);
|
|
98
|
+
// process.stdout.write(inspect(hast));
|
|
99
|
+
// process.stdout.write('\n');
|
|
100
|
+
// console.log('************************************');
|
|
101
|
+
|
|
102
|
+
// inject children of parsed tree
|
|
103
|
+
siblings.splice(index, 1, ...mdast.children);
|
|
104
|
+
|
|
105
|
+
// continue after
|
|
106
|
+
return index + mdast.children.length;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return visit.CONTINUE;
|
|
110
|
+
});
|
|
111
|
+
return tree;
|
|
112
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
2
|
+
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
|
3
|
+
<Default Extension="png" ContentType="image/png"/>
|
|
4
|
+
<Default Extension="rels"
|
|
5
|
+
ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
|
6
|
+
<Default Extension="xml" ContentType="application/xml"/>
|
|
7
|
+
<Override PartName="/word/document.xml"
|
|
8
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
|
|
9
|
+
<Override PartName="/word/numbering.xml"
|
|
10
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
|
|
11
|
+
<Override PartName="/word/styles.xml"
|
|
12
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
|
|
13
|
+
<Override PartName="/word/settings.xml"
|
|
14
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/>
|
|
15
|
+
<Override PartName="/word/webSettings.xml"
|
|
16
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"/>
|
|
17
|
+
<Override PartName="/word/footnotes.xml"
|
|
18
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"/>
|
|
19
|
+
<Override PartName="/word/endnotes.xml"
|
|
20
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"/>
|
|
21
|
+
<Override PartName="/word/header1.xml"
|
|
22
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
|
|
23
|
+
<Override PartName="/word/header2.xml"
|
|
24
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
|
|
25
|
+
<Override PartName="/word/footer1.xml"
|
|
26
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
|
|
27
|
+
<Override PartName="/word/footer2.xml"
|
|
28
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
|
|
29
|
+
<Override PartName="/word/header3.xml"
|
|
30
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
|
|
31
|
+
<Override PartName="/word/footer3.xml"
|
|
32
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
|
|
33
|
+
<Override PartName="/word/fontTable.xml"
|
|
34
|
+
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"/>
|
|
35
|
+
<Override PartName="/word/theme/theme1.xml"
|
|
36
|
+
ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/>
|
|
37
|
+
<Override PartName="/docProps/core.xml"
|
|
38
|
+
ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
|
|
39
|
+
<Override PartName="/docProps/app.xml"
|
|
40
|
+
ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
|
|
41
|
+
</Types>
|