@adobe/helix-md2docx 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/CODE_OF_CONDUCT.md +74 -0
  3. package/CONTRIBUTING.md +74 -0
  4. package/LICENSE.txt +264 -0
  5. package/README.md +33 -0
  6. package/package.json +70 -0
  7. package/src/cli/convert2docx.js +96 -0
  8. package/src/index.d.ts +13 -0
  9. package/src/index.js +13 -0
  10. package/src/mdast2docx/all.js +31 -0
  11. package/src/mdast2docx/default-numbering.js +79 -0
  12. package/src/mdast2docx/handlers/break.js +22 -0
  13. package/src/mdast2docx/handlers/characterStyle.js +29 -0
  14. package/src/mdast2docx/handlers/code.js +27 -0
  15. package/src/mdast2docx/handlers/heading.js +32 -0
  16. package/src/mdast2docx/handlers/html.js +35 -0
  17. package/src/mdast2docx/handlers/image.js +90 -0
  18. package/src/mdast2docx/handlers/index.js +56 -0
  19. package/src/mdast2docx/handlers/inlineCode.js +21 -0
  20. package/src/mdast2docx/handlers/link.js +63 -0
  21. package/src/mdast2docx/handlers/list.js +39 -0
  22. package/src/mdast2docx/handlers/listItem.js +16 -0
  23. package/src/mdast2docx/handlers/paragraph.js +52 -0
  24. package/src/mdast2docx/handlers/paragraphStyle.js +21 -0
  25. package/src/mdast2docx/handlers/root.js +16 -0
  26. package/src/mdast2docx/handlers/table.js +54 -0
  27. package/src/mdast2docx/handlers/tableCell.js +51 -0
  28. package/src/mdast2docx/handlers/tableRow.js +28 -0
  29. package/src/mdast2docx/handlers/text.js +24 -0
  30. package/src/mdast2docx/handlers/thematicBreak.js +24 -0
  31. package/src/mdast2docx/hast-table-handler.js +145 -0
  32. package/src/mdast2docx/index.d.ts +21 -0
  33. package/src/mdast2docx/index.js +88 -0
  34. package/src/mdast2docx/mdast-download-images.js +92 -0
  35. package/src/mdast2docx/mdast-sanitize-html.js +112 -0
  36. package/src/mdast2docx/template/[Content_Types].xml +41 -0
  37. package/src/mdast2docx/template/docProps/app.xml +20 -0
  38. package/src/mdast2docx/template/docProps/core.xml +12 -0
  39. package/src/mdast2docx/template/word/_rels/document.xml.rels +51 -0
  40. package/src/mdast2docx/template/word/_rels/settings.xml.rels +7 -0
  41. package/src/mdast2docx/template/word/document.xml +1116 -0
  42. package/src/mdast2docx/template/word/endnotes.xml +56 -0
  43. package/src/mdast2docx/template/word/fontTable.xml +58 -0
  44. package/src/mdast2docx/template/word/footer1.xml +39 -0
  45. package/src/mdast2docx/template/word/footer2.xml +39 -0
  46. package/src/mdast2docx/template/word/footer3.xml +39 -0
  47. package/src/mdast2docx/template/word/footnotes.xml +56 -0
  48. package/src/mdast2docx/template/word/header1.xml +39 -0
  49. package/src/mdast2docx/template/word/header2.xml +39 -0
  50. package/src/mdast2docx/template/word/header3.xml +39 -0
  51. package/src/mdast2docx/template/word/media/image1.png +0 -0
  52. package/src/mdast2docx/template/word/numbering.xml +277 -0
  53. package/src/mdast2docx/template/word/settings.xml +91 -0
  54. package/src/mdast2docx/template/word/styles.xml +1084 -0
  55. package/src/mdast2docx/template/word/theme/theme1.xml +296 -0
  56. package/src/mdast2docx/template/word/webSettings.xml +40 -0
  57. package/src/mdast2docx/template.docx +0 -0
  58. package/src/mdast2docx/utils.js +22 -0
@@ -0,0 +1,54 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import docx from 'docx';
13
+ import all from '../all.js';
14
+
15
+ const { Paragraph, Table, WidthType } = docx;
16
+
17
+ // see http://officeopenxml.com/WPtableWidth.php
18
+ // Note: The 2006 version of the OOXML standard specified that the value was to be a decimal.
19
+ // When type="pct", the value was interpreted as fifths of a percent, so 4975=99.5%,
20
+ // and no % symbol was included in the attribute. In the 2011 version the value can be either a
21
+ // decimal or a percent, so a % symbol should be included when type="pct".
22
+
23
+ export default async function table(ctx, node) {
24
+ const oldTable = ctx.table;
25
+ ctx.table = {
26
+ // remember the table width (the column width will be calculated in the tableRow handler)
27
+ // default width: Letter Width - Margin = 8.5" - 2" = 6.5". the unit is 1/1440 inches.
28
+ width: oldTable ? oldTable.columnWidth : 1440 * 6.5,
29
+ align: node.align || [],
30
+ };
31
+ // process the rows
32
+ const rows = await all(ctx, node);
33
+
34
+ // and remember the column width
35
+ const { columnWidth } = ctx.table;
36
+ ctx.table = oldTable;
37
+
38
+ // use the same width for all columns
39
+ const numCols = rows.length ? rows[0].CellCount : 0;
40
+ const columnWidths = new Array(numCols).fill(Math.round(columnWidth));
41
+
42
+ const tbl = new Table({
43
+ style: 'PageBlock',
44
+ rows,
45
+ columnWidths,
46
+ width: {
47
+ size: 100,
48
+ type: WidthType.PERCENTAGE,
49
+ },
50
+ });
51
+
52
+ // add empty paragraph for better separation in word
53
+ return [tbl, new Paragraph([])];
54
+ }
@@ -0,0 +1,51 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import docx from 'docx';
13
+ import all from '../all.js';
14
+
15
+ const {
16
+ AlignmentType, Paragraph, Table, TableCell,
17
+ } = docx;
18
+
19
+ const ALIGN = {
20
+ left: null,
21
+ right: AlignmentType.RIGHT,
22
+ center: AlignmentType.CENTER,
23
+ };
24
+
25
+ export default async function tableCell(ctx, node, parent, siblings) {
26
+ const children = await all(ctx, node);
27
+ const alignment = ALIGN[ctx.table.align[siblings.length]];
28
+
29
+ const content = [];
30
+ let leaves = [];
31
+ // wrap non block elements with paragraph
32
+ for (let i = 0; i < children.length; i += 1) {
33
+ const child = children[i];
34
+ if ((child instanceof Paragraph) || (child instanceof Table)) {
35
+ if (leaves.length) {
36
+ content.push(new Paragraph({ alignment, children: leaves }));
37
+ }
38
+ content.push(child);
39
+ leaves = [];
40
+ } else {
41
+ leaves.push(child);
42
+ }
43
+ }
44
+ if (leaves.length) {
45
+ content.push(new Paragraph({ alignment, children: leaves }));
46
+ }
47
+
48
+ return new TableCell({
49
+ children: content,
50
+ });
51
+ }
@@ -0,0 +1,28 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import docx from 'docx';
13
+ import all from '../all.js';
14
+
15
+ const { TableRow } = docx;
16
+
17
+ export default async function tableRow(ctx, node, parent, siblings) {
18
+ // adjust columnWidth
19
+ if (!ctx.table.columnWidth) {
20
+ ctx.table.columnWidth = ctx.table.width / node.children.length;
21
+ }
22
+
23
+ const children = await all(ctx, node);
24
+ return new TableRow({
25
+ children,
26
+ tableHeader: siblings.length === 0,
27
+ });
28
+ }
@@ -0,0 +1,24 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import docx from 'docx';
13
+
14
+ const { TextRun } = docx;
15
+
16
+ export default function textNode(ctx, node) {
17
+ return node.value.split('\n').map((text, idx) => (
18
+ new TextRun({
19
+ ...ctx.style,
20
+ text,
21
+ break: idx > 0 ? 1 : 0,
22
+ })
23
+ ));
24
+ }
@@ -0,0 +1,24 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import docx from 'docx';
13
+
14
+ const { Paragraph } = docx;
15
+
16
+ export default function thematicBreak() {
17
+ return new Paragraph({
18
+ text: '---',
19
+ spacing: {
20
+ before: 250,
21
+ after: 250,
22
+ },
23
+ });
24
+ }
@@ -0,0 +1,145 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ 'use strict';
14
+
15
+ import { convertElement } from 'hast-util-is-element';
16
+ import { visit } from 'unist-util-visit';
17
+ import { all } from 'hast-util-to-mdast/lib/all.js';
18
+
19
+ const thead = convertElement('thead');
20
+ const tr = convertElement('tr');
21
+ const cell = convertElement(['th', 'td']);
22
+
23
+ /*
24
+ copied and adapted from
25
+ https://github.com/syntax-tree/hast-util-to-mdast/blob/7.1.3/lib/handlers/table.js
26
+ */
27
+
28
+ // Infer whether the HTML table has a head and how it aligns.
29
+ function inspect(node) {
30
+ let headless = true;
31
+ const align = [];
32
+ let rowIndex = 0;
33
+ let cellIndex = 0;
34
+
35
+ function visitor(child) {
36
+ // If there is a `thead`, assume there is a header row.
37
+ if (thead(child)) {
38
+ headless = false;
39
+ } else if (tr(child)) {
40
+ rowIndex += 1;
41
+ cellIndex = 0;
42
+ } else if (cell(child)) {
43
+ if (align[cellIndex] === undefined) {
44
+ align[cellIndex] = child.properties.align || null;
45
+ }
46
+
47
+ // If there is a th in the first row, assume there is a header row.
48
+ if (headless && rowIndex < 2 && child.tagName === 'th') {
49
+ headless = false;
50
+ }
51
+
52
+ cellIndex += 1;
53
+ return visit.SKIP;
54
+ }
55
+ return visit.CONTINUE;
56
+ }
57
+
58
+ visit(node, 'element', visitor);
59
+
60
+ return { align, headless };
61
+ }
62
+
63
+ // Ensure the cells in a row are properly structured.
64
+ function toCells(children, info) {
65
+ const nodes = [];
66
+ let queue;
67
+
68
+ children.forEach((node) => {
69
+ if (node.type === 'tableCell') {
70
+ if (queue) {
71
+ // eslint-disable-next-line no-param-reassign
72
+ node.children = queue.concat(node.children);
73
+ queue = undefined;
74
+ }
75
+
76
+ nodes.push(node);
77
+ } else {
78
+ if (!queue) {
79
+ queue = [];
80
+ }
81
+ queue.push(node);
82
+ }
83
+ });
84
+
85
+ if (queue) {
86
+ let node = nodes[nodes.length - 1];
87
+
88
+ if (!node) {
89
+ node = { type: 'tableCell', children: [] };
90
+ nodes.push(node);
91
+ }
92
+
93
+ node.children = node.children.concat(queue);
94
+ }
95
+
96
+ // add empty cells if there are more in the table
97
+ for (let index = nodes.length; index < info.align.length; index += 1) {
98
+ nodes.push({ type: 'tableCell', children: [] });
99
+ }
100
+
101
+ return nodes;
102
+ }
103
+
104
+ // Ensure the rows are properly structured.
105
+ function toRows(children, info) {
106
+ const nodes = [];
107
+ let queue;
108
+
109
+ // Add an empty header row.
110
+ // we don't need extra header rows
111
+ // if (info.headless) {
112
+ // nodes.push({ type: 'tableRow', children: [] });
113
+ // }
114
+
115
+ children.forEach((node) => {
116
+ if (node.type === 'tableRow') {
117
+ if (queue) {
118
+ // eslint-disable-next-line no-param-reassign
119
+ node.children = queue.concat(node.children);
120
+ queue = undefined;
121
+ }
122
+
123
+ nodes.push(node);
124
+ } else {
125
+ if (!queue) queue = [];
126
+ queue.push(node);
127
+ }
128
+ });
129
+
130
+ if (queue) {
131
+ const node = nodes[nodes.length - 1];
132
+ node.children = node.children.concat(queue);
133
+ }
134
+ nodes.forEach((node) => {
135
+ // eslint-disable-next-line no-param-reassign
136
+ node.children = toCells(node.children, info);
137
+ });
138
+
139
+ return nodes;
140
+ }
141
+
142
+ export default function table(h, node) {
143
+ const info = inspect(node);
144
+ return h(node, 'table', { align: info.align }, toRows(all(h, node), info));
145
+ }
@@ -0,0 +1,21 @@
1
+ /*
2
+ * Copyright 2022 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ declare interface Logger {}
13
+
14
+ /**
15
+ * Converts the mdast to a word document (docx).
16
+ *
17
+ * @param {Node} mdast The mdast
18
+ * @param {Logger} [log] a console like logger
19
+ * @returns {Promise<Buffer>} the docx
20
+ */
21
+ export default function mdast2docx(mdast: object, log: Logger): Promise<Buffer>;
@@ -0,0 +1,88 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { readFile } from 'fs/promises';
13
+ import path from 'path';
14
+ import { dirname } from 'dirname-filename-esm';
15
+ import docx from 'docx';
16
+
17
+ import all from './all.js';
18
+ import handlers from './handlers/index.js';
19
+ import numbering from './default-numbering.js';
20
+ import sanitizeHtml from './mdast-sanitize-html.js';
21
+ // import { openArrayBuffer } from '../zipfile.js';
22
+ import { findXMLComponent } from './utils.js';
23
+ import downloadImages from './mdast-download-images.js';
24
+
25
+ const { Document, Packer } = docx;
26
+
27
+ // eslint-disable-next-line no-underscore-dangle
28
+ const __dirname = dirname(import.meta);
29
+
30
+ export default async function mdast2docx(mdast, log = console) {
31
+ const ctx = {
32
+ handlers,
33
+ style: {},
34
+ paragraphStyle: '',
35
+ images: {},
36
+ listLevel: -1,
37
+ lists: [],
38
+ log,
39
+ };
40
+
41
+ // eslint-disable-next-line no-param-reassign
42
+ mdast = sanitizeHtml(mdast);
43
+
44
+ // process.stdout.write('==================================================\n');
45
+ // process.stdout.write(inspect(mdast));
46
+ // process.stdout.write('\n');
47
+ // process.stdout.write('==================================================\n');
48
+
49
+ await downloadImages(ctx, mdast);
50
+
51
+ const children = await all(ctx, mdast);
52
+
53
+ // read styles from template.docx. this seems to be the most reliable
54
+ // const templateDoc = await readFile(path.resolve(__dirname, 'template.docx'));
55
+ // const zip = await openArrayBuffer(templateDoc);
56
+ // const styleXML = await zip.read('word/styles.xml', 'utf-8');
57
+ const styleXML = await readFile(path.resolve(__dirname, 'template', 'word', 'styles.xml'), 'utf-8');
58
+
59
+ const doc = new Document({
60
+ numbering,
61
+ externalStyles: styleXML,
62
+ sections: [{
63
+ children,
64
+ }],
65
+ });
66
+
67
+ // temporary hack for problems with online word
68
+ const cn = doc.numbering.concreteNumberingMap.get('default-bullet-numbering');
69
+ cn.root[0].root.numId = 1;
70
+ cn.numId = 1;
71
+
72
+ // temporary hack for problems with lists in online word
73
+ for (const nb of doc.numbering.abstractNumberingMap.values()) {
74
+ nb.root.forEach((attr) => {
75
+ if (attr.rootKey !== 'w:lvl') {
76
+ return;
77
+ }
78
+ const jc = findXMLComponent(attr, 'w:lvlJc');
79
+ if (jc) {
80
+ const idx = attr.root.indexOf(jc);
81
+ attr.root.splice(idx, 1);
82
+ attr.root.push(jc);
83
+ }
84
+ });
85
+ }
86
+
87
+ return Packer.toBuffer(doc);
88
+ }
@@ -0,0 +1,92 @@
1
+ /*
2
+ * Copyright 2020 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ /* eslint-disable no-param-reassign */
13
+ import crypto from 'crypto';
14
+ import { context as fetchAPI, h1 } from '@adobe/helix-fetch';
15
+ import processQueue from '@adobe/helix-shared-process-queue';
16
+ import { visit } from 'unist-util-visit';
17
+ import getDimensions from 'image-size';
18
+
19
+ function createFetchContext() {
20
+ return process.env.HELIX_FETCH_FORCE_HTTP1
21
+ ? h1()
22
+ : fetchAPI();
23
+ }
24
+
25
+ function hsize(bytes, decimals = 2) {
26
+ if (bytes === 0) {
27
+ return '0 ';
28
+ }
29
+ const k = 1024;
30
+ const dm = decimals < 0 ? 0 : decimals;
31
+ const sizes = [' ', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
32
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
33
+ return `${parseFloat((bytes / k ** i).toFixed(dm))} ${sizes[i]}`;
34
+ }
35
+
36
+ export default async function downloadImages(ctx, tree) {
37
+ const context = createFetchContext();
38
+ const { fetch } = context;
39
+
40
+ // gather all image nodes
41
+ const images = [];
42
+ visit(tree, (node) => {
43
+ if (node.type === 'image' && node.url) {
44
+ images.push(node);
45
+ }
46
+ return visit.CONTINUE;
47
+ });
48
+ let count = 0;
49
+
50
+ // download images
51
+ await processQueue(images, async (node) => {
52
+ try {
53
+ const ref = crypto.createHash('sha1')
54
+ .update(node.url)
55
+ .digest('hex');
56
+ const key = `${ref}.png`;
57
+ node.data = ctx.images[key];
58
+ if (node.data) {
59
+ return;
60
+ }
61
+
62
+ let buffer;
63
+ if (node.url.startsWith('data:image/png;base64,')) {
64
+ buffer = Buffer.from(node.url.split(',').pop(), 'base64');
65
+ } else {
66
+ const idx = String(count).padStart(2, ' ');
67
+ count += 1;
68
+ ctx.log.info(`[${idx}] GET ${node.url}`);
69
+ const ret = await fetch(node.url);
70
+ if (!ret.ok) {
71
+ const text = await ret.text();
72
+ ctx.log.error(`[${idx}] ${ret.status} ${text}`);
73
+ return;
74
+ }
75
+ buffer = await ret.buffer();
76
+ ctx.log.info(`[${idx}] ${ret.status} ${hsize(buffer.length).padStart(10)} ${ret.headers.get('content-type')}`);
77
+ }
78
+
79
+ node.data = {
80
+ key,
81
+ buffer,
82
+ dimensions: getDimensions(buffer),
83
+ };
84
+ ctx.images[key] = node.data;
85
+ } catch (error) {
86
+ ctx.log.error(`Cannot download image ${node.url}: ${error.message}`);
87
+ }
88
+ }, 8);
89
+
90
+ // reset fetch context
91
+ context.reset();
92
+ }
@@ -0,0 +1,112 @@
1
+ /*
2
+ * Copyright 2020 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { visit } from 'unist-util-visit';
13
+ import { unified } from 'unified';
14
+ import parse from 'rehype-parse';
15
+ import { toMdast } from 'hast-util-to-mdast';
16
+ // import inspect from 'unist-util-inspect';
17
+ import tableHandler from './hast-table-handler.js';
18
+
19
+ /**
20
+ * Creates simple format handler
21
+ * @param type
22
+ */
23
+ function formatHandler(type) {
24
+ return (h, node) => h(node, type, node.children);
25
+ }
26
+
27
+ /**
28
+ * Handler for `<markdown>` elements.
29
+ * @param {[]} mdasts array of mdast sub trees
30
+ */
31
+ function mdHandler(mdasts) {
32
+ return (h, node) => {
33
+ const { idx } = node.properties;
34
+ return mdasts[idx];
35
+ };
36
+ }
37
+
38
+ /**
39
+ * Sanitizes html:
40
+ * - collapses consecutive html content (simply concat all nodes until the last html sibling)
41
+ * - parses and converts them to mdast again
42
+ *
43
+ * @param {object} tree
44
+ * @returns {object} The modified (original) tree.
45
+ */
46
+ export default function sanitizeHtml(tree) {
47
+ const mdInserts = [];
48
+
49
+ visit(tree, (node, index, parent) => {
50
+ const { children: siblings = [] } = parent || {};
51
+
52
+ // collapse html blocks
53
+ if (node.type === 'html') {
54
+ // find last html block
55
+ let lastHtml = siblings.length - 1;
56
+ while (lastHtml >= index) {
57
+ if (siblings[lastHtml].type === 'html') {
58
+ break;
59
+ }
60
+ lastHtml -= 1;
61
+ }
62
+
63
+ let html = node.value;
64
+ if (lastHtml > index) {
65
+ // remove all html nodes
66
+ const removed = siblings.splice(index + 1, lastHtml - index);
67
+
68
+ // and append to html as special markdown element marker which is then handled in the
69
+ // mdHandler for the `<markdown>` elements.
70
+ removed.forEach((n) => {
71
+ if (n.type === 'html' || n.type === 'text') {
72
+ html += n.value;
73
+ } else {
74
+ html += `<markdown idx="${mdInserts.length}"></markdown>`;
75
+ }
76
+ mdInserts.push(n);
77
+ });
78
+ }
79
+
80
+ // try parse html
81
+ const hast = unified()
82
+ .use(parse, { fragment: true })
83
+ .parse(html);
84
+
85
+ // convert to mdast with extra handlers
86
+ const mdast = toMdast(hast, {
87
+ handlers: {
88
+ u: formatHandler('underline'),
89
+ sub: formatHandler('subScript'),
90
+ sup: formatHandler('superScript'),
91
+ table: tableHandler,
92
+ markdown: mdHandler(mdInserts),
93
+ },
94
+ });
95
+
96
+ // console.log('************************************');
97
+ // // console.log('>>>>', html);
98
+ // process.stdout.write(inspect(hast));
99
+ // process.stdout.write('\n');
100
+ // console.log('************************************');
101
+
102
+ // inject children of parsed tree
103
+ siblings.splice(index, 1, ...mdast.children);
104
+
105
+ // continue after
106
+ return index + mdast.children.length;
107
+ }
108
+
109
+ return visit.CONTINUE;
110
+ });
111
+ return tree;
112
+ }
@@ -0,0 +1,41 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
+ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
3
+ <Default Extension="png" ContentType="image/png"/>
4
+ <Default Extension="rels"
5
+ ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
6
+ <Default Extension="xml" ContentType="application/xml"/>
7
+ <Override PartName="/word/document.xml"
8
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
9
+ <Override PartName="/word/numbering.xml"
10
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
11
+ <Override PartName="/word/styles.xml"
12
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
13
+ <Override PartName="/word/settings.xml"
14
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/>
15
+ <Override PartName="/word/webSettings.xml"
16
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"/>
17
+ <Override PartName="/word/footnotes.xml"
18
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"/>
19
+ <Override PartName="/word/endnotes.xml"
20
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"/>
21
+ <Override PartName="/word/header1.xml"
22
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
23
+ <Override PartName="/word/header2.xml"
24
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
25
+ <Override PartName="/word/footer1.xml"
26
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
27
+ <Override PartName="/word/footer2.xml"
28
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
29
+ <Override PartName="/word/header3.xml"
30
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"/>
31
+ <Override PartName="/word/footer3.xml"
32
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
33
+ <Override PartName="/word/fontTable.xml"
34
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"/>
35
+ <Override PartName="/word/theme/theme1.xml"
36
+ ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/>
37
+ <Override PartName="/docProps/core.xml"
38
+ ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
39
+ <Override PartName="/docProps/app.xml"
40
+ ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
41
+ </Types>