@adobe/helix-docx2md 1.0.22 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/package.json +10 -10
- package/src/dast2mdast/dast2mdast.d.ts +2 -1
- package/src/dast2mdast/dast2mdast.js +4 -1
- package/src/dast2mdast/handlers/table-cell.js +36 -0
- package/src/dast2mdast/handlers/table-row.js +12 -0
- package/src/dast2mdast/handlers/table.js +23 -0
- package/src/docx2md.d.ts +2 -0
- package/src/docx2md.js +1 -1
- package/src/mdast2md/mdast2md.d.ts +2 -0
- package/src/mdast2md/mdast2md.js +42 -30
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
## [1.1.1](https://github.com/adobe/helix-docx2md/compare/v1.1.0...v1.1.1) (2022-09-11)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* **deps:** update dependency @adobe/helix-mediahandler to v1.2.2 ([#120](https://github.com/adobe/helix-docx2md/issues/120)) ([42b3cd5](https://github.com/adobe/helix-docx2md/commit/42b3cd5b9d450cbb62a1f6f11bb7fb021186551c))
|
|
7
|
+
|
|
8
|
+
# [1.1.0](https://github.com/adobe/helix-docx2md/compare/v1.0.23...v1.1.0) (2022-09-09)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* add support for gridtables ([aff5f95](https://github.com/adobe/helix-docx2md/commit/aff5f951e1172f7f9ca4e14c8935c6e4b0890dd9))
|
|
14
|
+
|
|
15
|
+
## [1.0.23](https://github.com/adobe/helix-docx2md/compare/v1.0.22...v1.0.23) (2022-08-29)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Bug Fixes
|
|
19
|
+
|
|
20
|
+
* **deps:** update dependency @adobe/helix-mediahandler to v1.0.42 ([#108](https://github.com/adobe/helix-docx2md/issues/108)) ([ae7726c](https://github.com/adobe/helix-docx2md/commit/ae7726ccce4b26802c42b515288930ce2c328218))
|
|
21
|
+
|
|
1
22
|
## [1.0.22](https://github.com/adobe/helix-docx2md/compare/v1.0.21...v1.0.22) (2022-07-28)
|
|
2
23
|
|
|
3
24
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/helix-docx2md",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Helix library that converts word documents to markdown",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.js",
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
},
|
|
14
14
|
"scripts": {
|
|
15
15
|
"test": "c8 mocha",
|
|
16
|
-
"test-ci": "c8 mocha && codecov",
|
|
17
16
|
"lint": "eslint .",
|
|
18
17
|
"semantic-release": "semantic-release"
|
|
19
18
|
},
|
|
@@ -34,9 +33,9 @@
|
|
|
34
33
|
},
|
|
35
34
|
"homepage": "https://github.com/adobe/helix-docx2md#readme",
|
|
36
35
|
"dependencies": {
|
|
37
|
-
"@adobe/helix-markdown-support": "
|
|
36
|
+
"@adobe/helix-markdown-support": "5.0.7",
|
|
38
37
|
"@adobe/helix-shared-process-queue": "1.1.5",
|
|
39
|
-
"@adobe/mammoth": "1.
|
|
38
|
+
"@adobe/mammoth": "1.5.1-bleeding.1",
|
|
40
39
|
"dirname-filename-esm": "1.1.1",
|
|
41
40
|
"mdast-util-to-markdown": "1.3.0",
|
|
42
41
|
"mdast-util-to-string": "3.1.0",
|
|
@@ -45,26 +44,27 @@
|
|
|
45
44
|
"remark-stringify": "10.0.2",
|
|
46
45
|
"unified": "10.1.2",
|
|
47
46
|
"unist-util-find": "1.0.2",
|
|
48
|
-
"unist-util-visit": "4.1.
|
|
47
|
+
"unist-util-visit": "4.1.1",
|
|
49
48
|
"yauzl": "2.10.0"
|
|
50
49
|
},
|
|
51
50
|
"devDependencies": {
|
|
52
51
|
"@adobe/eslint-config-helix": "1.3.2",
|
|
53
|
-
"@adobe/helix-mediahandler": "1.
|
|
52
|
+
"@adobe/helix-mediahandler": "1.2.2",
|
|
54
53
|
"@semantic-release/changelog": "6.0.1",
|
|
55
54
|
"@semantic-release/exec": "6.0.3",
|
|
56
55
|
"@semantic-release/git": "10.0.1",
|
|
57
56
|
"c8": "7.12.0",
|
|
58
|
-
"dotenv": "16.0.
|
|
59
|
-
"eslint": "8.
|
|
57
|
+
"dotenv": "16.0.2",
|
|
58
|
+
"eslint": "8.23.0",
|
|
59
|
+
"eslint-import-resolver-exports": "1.0.0-beta.3",
|
|
60
60
|
"eslint-plugin-header": "3.1.1",
|
|
61
61
|
"eslint-plugin-import": "2.26.0",
|
|
62
62
|
"husky": "8.0.1",
|
|
63
|
-
"junit-report-builder": "3.0.
|
|
63
|
+
"junit-report-builder": "3.0.1",
|
|
64
64
|
"lint-staged": "13.0.3",
|
|
65
65
|
"mocha": "10.0.0",
|
|
66
66
|
"mocha-multi-reporters": "1.5.1",
|
|
67
|
-
"semantic-release": "19.0.
|
|
67
|
+
"semantic-release": "19.0.5",
|
|
68
68
|
"unist-util-inspect": "7.0.1"
|
|
69
69
|
},
|
|
70
70
|
"lint-staged": {
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
/**
|
|
14
14
|
* Converts the docx AST to markdown ast.
|
|
15
15
|
* @param {object} tree the docx ast
|
|
16
|
+
* @param {boolean} [opts.gridtables = false] generate gridtables
|
|
16
17
|
* @return {object} the markdown ast
|
|
17
18
|
*/
|
|
18
|
-
export default function dast2mdast(tree: object): object;
|
|
19
|
+
export default function dast2mdast(tree: object, opts?: object): object;
|
|
@@ -30,9 +30,11 @@ import handlers from './handlers/index.js';
|
|
|
30
30
|
*
|
|
31
31
|
* Converts the docx AST to markdown ast.
|
|
32
32
|
* @param {object} tree the docx ast
|
|
33
|
+
* @param {object} opts options
|
|
34
|
+
* @param {boolean} [opts.gridtables = false] generate gridtables
|
|
33
35
|
* @return {object} the markdown ast
|
|
34
36
|
*/
|
|
35
|
-
export default async function dast2mdast(tree) {
|
|
37
|
+
export default async function dast2mdast(tree, opts = {}) {
|
|
36
38
|
const byId = {};
|
|
37
39
|
|
|
38
40
|
const h = (type, props, children) => {
|
|
@@ -59,6 +61,7 @@ export default async function dast2mdast(tree) {
|
|
|
59
61
|
h.frozenBaseUrl = null;
|
|
60
62
|
h.handlers = handlers;
|
|
61
63
|
h.numbering = {};
|
|
64
|
+
h.gridtables = opts.gridtables;
|
|
62
65
|
|
|
63
66
|
/** @type {ListContainers} */
|
|
64
67
|
h.listContainers = [[]];
|
|
@@ -9,9 +9,45 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { TYPE_CELL } from '@adobe/helix-markdown-support/gridtable';
|
|
12
13
|
import all from '../all.js';
|
|
13
14
|
|
|
15
|
+
const V_ALIGN = {
|
|
16
|
+
top: '',
|
|
17
|
+
center: 'middle',
|
|
18
|
+
bottom: 'bottom',
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const H_ALIGN = {
|
|
22
|
+
left: '',
|
|
23
|
+
right: 'right',
|
|
24
|
+
center: 'center',
|
|
25
|
+
distribute: 'justify',
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
function gridTableCell(h, node) {
|
|
29
|
+
const props = {};
|
|
30
|
+
if (node.children.length > 0) {
|
|
31
|
+
props.align = H_ALIGN[node.children[0].alignment];
|
|
32
|
+
}
|
|
33
|
+
props.valign = V_ALIGN[node.verticalAlignment];
|
|
34
|
+
|
|
35
|
+
if (node.rowSpan > 1) {
|
|
36
|
+
props.rowSpan = node.rowSpan;
|
|
37
|
+
}
|
|
38
|
+
if (node.colSpan > 1) {
|
|
39
|
+
props.colSpan = node.colSpan;
|
|
40
|
+
}
|
|
41
|
+
h.listContainers.unshift([]);
|
|
42
|
+
const c = h(TYPE_CELL, props, all(h, node));
|
|
43
|
+
h.listContainers.shift();
|
|
44
|
+
return c;
|
|
45
|
+
}
|
|
46
|
+
|
|
14
47
|
export default function cell(h, node) {
|
|
48
|
+
if (h.gridtables) {
|
|
49
|
+
return gridTableCell(h, node);
|
|
50
|
+
}
|
|
15
51
|
// remember alignment for table
|
|
16
52
|
const props = {};
|
|
17
53
|
if (node.children.length > 0) {
|
|
@@ -9,8 +9,20 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { TYPE_ROW } from '@adobe/helix-markdown-support/gridtable';
|
|
12
13
|
import all from '../all.js';
|
|
13
14
|
|
|
15
|
+
function gridTableRow(h, node) {
|
|
16
|
+
const props = {};
|
|
17
|
+
if (node.isHeader) {
|
|
18
|
+
props.isHeader = true;
|
|
19
|
+
}
|
|
20
|
+
return h(TYPE_ROW, props, all(h, node));
|
|
21
|
+
}
|
|
22
|
+
|
|
14
23
|
export default function row(h, node) {
|
|
24
|
+
if (h.gridtables) {
|
|
25
|
+
return gridTableRow(h, node);
|
|
26
|
+
}
|
|
15
27
|
return h('tableRow', all(h, node));
|
|
16
28
|
}
|
|
@@ -9,9 +9,32 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
+
import { TYPE_TABLE, TYPE_HEADER, TYPE_BODY } from '@adobe/helix-markdown-support/gridtable';
|
|
12
13
|
import all from '../all.js';
|
|
13
14
|
|
|
15
|
+
function gridTable(h, node) {
|
|
16
|
+
let rows = all(h, node);
|
|
17
|
+
if (rows[0]?.isHeader) {
|
|
18
|
+
const header = [];
|
|
19
|
+
while (rows[0]?.isHeader) {
|
|
20
|
+
const row = rows.shift();
|
|
21
|
+
delete row.isHeader;
|
|
22
|
+
header.push(row);
|
|
23
|
+
}
|
|
24
|
+
rows = [
|
|
25
|
+
h(TYPE_HEADER, header),
|
|
26
|
+
h(TYPE_BODY, rows),
|
|
27
|
+
];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return h(TYPE_TABLE, rows);
|
|
31
|
+
}
|
|
32
|
+
|
|
14
33
|
export default function table(h, node) {
|
|
34
|
+
if (h.gridtables) {
|
|
35
|
+
return gridTable(h, node);
|
|
36
|
+
}
|
|
37
|
+
|
|
15
38
|
const nodes = all(h, node);
|
|
16
39
|
|
|
17
40
|
// get alignment of first row, if any
|
package/src/docx2md.d.ts
CHANGED
package/src/docx2md.js
CHANGED
package/src/mdast2md/mdast2md.js
CHANGED
|
@@ -21,12 +21,15 @@ import {
|
|
|
21
21
|
robustTables,
|
|
22
22
|
suppressSpaceCode,
|
|
23
23
|
sanitizeHeading,
|
|
24
|
-
remarkMatter,
|
|
25
24
|
breaksAsSpaces,
|
|
26
25
|
sanitizeFormats,
|
|
27
26
|
sanitizeText,
|
|
27
|
+
sanitizeTextAndFormats,
|
|
28
28
|
sanitizeLinks,
|
|
29
|
+
imageReferences,
|
|
29
30
|
} from '@adobe/helix-markdown-support';
|
|
31
|
+
import { remarkMatter } from '@adobe/helix-markdown-support/matter';
|
|
32
|
+
import { remarkGridTable } from '@adobe/helix-markdown-support/gridtable';
|
|
30
33
|
|
|
31
34
|
import processImages from './mdast-process-images.js';
|
|
32
35
|
import sanitizeAutoEmbeds from './mdast-sanitize-autoembeds.js';
|
|
@@ -45,18 +48,46 @@ export default async function mdast2md(mdast, opts = {}) {
|
|
|
45
48
|
listener('beforePostProcess', mdast);
|
|
46
49
|
}
|
|
47
50
|
|
|
51
|
+
const processor = unified()
|
|
52
|
+
.use(stringify, {
|
|
53
|
+
strong: '*',
|
|
54
|
+
emphasis: '_',
|
|
55
|
+
bullet: '-',
|
|
56
|
+
fence: '`',
|
|
57
|
+
fences: true,
|
|
58
|
+
incrementListMarker: true,
|
|
59
|
+
rule: '-',
|
|
60
|
+
ruleRepetition: 3,
|
|
61
|
+
ruleSpaces: false,
|
|
62
|
+
})
|
|
63
|
+
.use(gfm)
|
|
64
|
+
.use(remarkMatter)
|
|
65
|
+
.use(orderedListPlugin);
|
|
66
|
+
|
|
48
67
|
// process.stdout.write(inspect(mdast));
|
|
49
68
|
// process.stdout.write('\n');
|
|
50
69
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
70
|
+
if (opts.gridtables) {
|
|
71
|
+
await sanitizeHeading(mdast);
|
|
72
|
+
await sanitizeLinks(mdast);
|
|
73
|
+
await sanitizeTextAndFormats(mdast);
|
|
74
|
+
await suppressSpaceCode(mdast);
|
|
75
|
+
await sanitizeAutoEmbeds(mdast);
|
|
76
|
+
await processImages(log, mdast, opts.mediaHandler, opts.source);
|
|
77
|
+
await imageReferences(mdast);
|
|
78
|
+
processor.use(remarkGridTable);
|
|
79
|
+
} else {
|
|
80
|
+
await sanitizeHeading(mdast);
|
|
81
|
+
await sanitizeFormats(mdast); // collapse formats once
|
|
82
|
+
await sanitizeLinks(mdast);
|
|
83
|
+
await sanitizeFormats(mdast); // and again for sanitized links
|
|
84
|
+
await sanitizeText(mdast);
|
|
85
|
+
await suppressSpaceCode(mdast);
|
|
86
|
+
await sanitizeAutoEmbeds(mdast);
|
|
87
|
+
await processImages(log, mdast, opts.mediaHandler, opts.source);
|
|
88
|
+
await robustTables(mdast);
|
|
89
|
+
processor.use(breaksAsSpaces);
|
|
90
|
+
}
|
|
60
91
|
|
|
61
92
|
// process.stdout.write(inspect(mdast));
|
|
62
93
|
// process.stdout.write('\n');
|
|
@@ -67,26 +98,7 @@ export default async function mdast2md(mdast, opts = {}) {
|
|
|
67
98
|
}
|
|
68
99
|
|
|
69
100
|
// noinspection JSVoidFunctionReturnValueUsed
|
|
70
|
-
const md =
|
|
71
|
-
.use(stringify, {
|
|
72
|
-
strong: '*',
|
|
73
|
-
emphasis: '_',
|
|
74
|
-
bullet: '-',
|
|
75
|
-
fence: '`',
|
|
76
|
-
fences: true,
|
|
77
|
-
incrementListMarker: true,
|
|
78
|
-
rule: '-',
|
|
79
|
-
ruleRepetition: 3,
|
|
80
|
-
ruleSpaces: false,
|
|
81
|
-
})
|
|
82
|
-
.use(gfm, {
|
|
83
|
-
// tableCellPadding: false,
|
|
84
|
-
// tablePipeAlign: false,
|
|
85
|
-
})
|
|
86
|
-
.use(breaksAsSpaces)
|
|
87
|
-
.use(remarkMatter)
|
|
88
|
-
.use(orderedListPlugin)
|
|
89
|
-
.stringify(mdast);
|
|
101
|
+
const md = processor.stringify(mdast);
|
|
90
102
|
|
|
91
103
|
if (listener) {
|
|
92
104
|
listener('afterToMarkdown', md);
|