@adobe/helix-docx2md 1.1.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/package.json +5 -4
- package/src/dast2mdast/dast2mdast.js +56 -1
- package/src/dast2mdast/handlers/bookmark.js +24 -0
- package/src/dast2mdast/handlers/hyperlink.js +11 -1
- package/src/dast2mdast/handlers/index.js +2 -0
- package/src/dast2mdast/handlers/paragraph.js +9 -2
- package/src/dast2mdast/id-slugger.js +38 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
# [1.2.0](https://github.com/adobe/helix-docx2md/compare/v1.1.4...v1.2.0) (2022-10-25)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* add support for document internal links and bookmarks ([#140](https://github.com/adobe/helix-docx2md/issues/140)) ([90bc3af](https://github.com/adobe/helix-docx2md/commit/90bc3afb3c05819ed285bb6deedf0fff89ba52b3)), closes [#134](https://github.com/adobe/helix-docx2md/issues/134)
|
|
7
|
+
|
|
1
8
|
## [1.1.4](https://github.com/adobe/helix-docx2md/compare/v1.1.3...v1.1.4) (2022-10-05)
|
|
2
9
|
|
|
3
10
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/helix-docx2md",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "Helix library that converts word documents to markdown",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.js",
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
"@adobe/helix-shared-process-queue": "1.1.5",
|
|
38
38
|
"@adobe/mammoth": "1.5.1-bleeding.1",
|
|
39
39
|
"dirname-filename-esm": "1.1.1",
|
|
40
|
+
"github-slugger": "1.4.0",
|
|
40
41
|
"mdast-util-to-markdown": "1.3.0",
|
|
41
42
|
"mdast-util-to-string": "3.1.0",
|
|
42
43
|
"remark-gfm": "3.0.1",
|
|
@@ -49,20 +50,20 @@
|
|
|
49
50
|
},
|
|
50
51
|
"devDependencies": {
|
|
51
52
|
"@adobe/eslint-config-helix": "1.3.2",
|
|
52
|
-
"@adobe/helix-mediahandler": "1.2.
|
|
53
|
+
"@adobe/helix-mediahandler": "1.2.12",
|
|
53
54
|
"@semantic-release/changelog": "6.0.1",
|
|
54
55
|
"@semantic-release/exec": "6.0.3",
|
|
55
56
|
"@semantic-release/git": "10.0.1",
|
|
56
57
|
"c8": "7.12.0",
|
|
57
58
|
"dotenv": "16.0.3",
|
|
58
|
-
"eslint": "8.
|
|
59
|
+
"eslint": "8.26.0",
|
|
59
60
|
"eslint-import-resolver-exports": "1.0.0-beta.3",
|
|
60
61
|
"eslint-plugin-header": "3.1.1",
|
|
61
62
|
"eslint-plugin-import": "2.26.0",
|
|
62
63
|
"husky": "8.0.1",
|
|
63
64
|
"junit-report-builder": "3.0.1",
|
|
64
65
|
"lint-staged": "13.0.3",
|
|
65
|
-
"mocha": "10.
|
|
66
|
+
"mocha": "10.1.0",
|
|
66
67
|
"mocha-multi-reporters": "1.5.1",
|
|
67
68
|
"semantic-release": "19.0.5",
|
|
68
69
|
"unist-util-inspect": "7.0.1"
|
|
@@ -11,8 +11,10 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
/* eslint-disable no-param-reassign */
|
|
14
|
+
import { toString } from 'mdast-util-to-string';
|
|
14
15
|
import one from './one.js';
|
|
15
16
|
import handlers from './handlers/index.js';
|
|
17
|
+
import IDSlugger from './id-slugger.js';
|
|
16
18
|
|
|
17
19
|
/**
|
|
18
20
|
* @typedef {Node} List
|
|
@@ -28,6 +30,11 @@ import handlers from './handlers/index.js';
|
|
|
28
30
|
* Note: that the stack is reversed, i.e. the first is the deepest one.
|
|
29
31
|
* @typedef {ListStack[]} ListContainers
|
|
30
32
|
*
|
|
33
|
+
* @typedef Bookmark
|
|
34
|
+
* @property {string} name
|
|
35
|
+
* @property {Node} target
|
|
36
|
+
* @property {Node[]} links
|
|
37
|
+
*
|
|
31
38
|
* Converts the docx AST to markdown ast.
|
|
32
39
|
* @param {object} tree the docx ast
|
|
33
40
|
* @param {object} opts options
|
|
@@ -62,9 +69,57 @@ export default async function dast2mdast(tree, opts = {}) {
|
|
|
62
69
|
h.handlers = handlers;
|
|
63
70
|
h.numbering = {};
|
|
64
71
|
h.gridtables = opts.gridtables;
|
|
72
|
+
h.bookmarks = {};
|
|
65
73
|
|
|
66
74
|
/** @type {ListContainers} */
|
|
67
75
|
h.listContainers = [[]];
|
|
68
76
|
|
|
69
|
-
|
|
77
|
+
/**
|
|
78
|
+
* @param {string} name
|
|
79
|
+
* @returns {Bookmark}
|
|
80
|
+
*/
|
|
81
|
+
h.getBookmark = (name) => {
|
|
82
|
+
let bm = h.bookmarks[name];
|
|
83
|
+
if (!bm) {
|
|
84
|
+
bm = {
|
|
85
|
+
name,
|
|
86
|
+
target: null,
|
|
87
|
+
links: [],
|
|
88
|
+
};
|
|
89
|
+
h.bookmarks[name] = bm;
|
|
90
|
+
}
|
|
91
|
+
return bm;
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const mdast = one(h, tree, null);
|
|
95
|
+
|
|
96
|
+
// process bookmarks. note that we _should_ re-slug them after the headings are sanitized in
|
|
97
|
+
// mdast2md. another option would be to keep the `bookmark` nodes in the mdast and only
|
|
98
|
+
// process them in mdast2md. but then, the dast2mdast would produce non standard mdast.
|
|
99
|
+
let slugger;
|
|
100
|
+
for (const bm of Object.values(h.bookmarks)) {
|
|
101
|
+
if (!bm.target) {
|
|
102
|
+
// eslint-disable-next-line no-continue
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (!slugger) {
|
|
106
|
+
slugger = new IDSlugger();
|
|
107
|
+
}
|
|
108
|
+
// if heading, create an ID from its text
|
|
109
|
+
if (bm.target.type === 'heading') {
|
|
110
|
+
const text = toString(bm.target).trim();
|
|
111
|
+
bm.id = slugger.slug(text || 'heading');
|
|
112
|
+
bm.target.id = bm.id;
|
|
113
|
+
} else {
|
|
114
|
+
// create an anchor node for non-heading bookmarks
|
|
115
|
+
bm.id = slugger.slug('bookmark');
|
|
116
|
+
bm.target.type = 'html';
|
|
117
|
+
bm.target.value = `<a id="${bm.id}"></a>`;
|
|
118
|
+
}
|
|
119
|
+
// adjust all links uris to the id
|
|
120
|
+
for (const link of bm.links) {
|
|
121
|
+
link.url = `#${bm.id}`;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return mdast;
|
|
70
125
|
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2019 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Bookmarks are inserted by word when one creates a document internal link. At this point they
|
|
15
|
+
* are just remembered. If they are a child of a heading node it will later be adjusted to
|
|
16
|
+
* reflect the expected ID of that heading (by slugging the heading text). if they are just normal
|
|
17
|
+
* paragraph bookmarks, they will later be turned into an anchor link.
|
|
18
|
+
*/
|
|
19
|
+
export default function bookmark(h, node) {
|
|
20
|
+
const bm = h.getBookmark(node.name);
|
|
21
|
+
bm.target = h('bookmark', node.name);
|
|
22
|
+
bm.target.bookmark = bm;
|
|
23
|
+
return bm.target;
|
|
24
|
+
}
|
|
@@ -12,8 +12,18 @@
|
|
|
12
12
|
import all from '../all.js';
|
|
13
13
|
|
|
14
14
|
export default function hyperlink(h, node) {
|
|
15
|
-
|
|
15
|
+
const link = h('link', {
|
|
16
16
|
url: node.href || '',
|
|
17
17
|
title: node.title,
|
|
18
18
|
}, all(h, node));
|
|
19
|
+
|
|
20
|
+
// document internal links will have an `anchor` property and are managed as bookmarks
|
|
21
|
+
// after the document is processed, the uris will be adjusted to point to the correct bookmark id.
|
|
22
|
+
if (node.anchor) {
|
|
23
|
+
// only bookmark links have anchors
|
|
24
|
+
const bm = h.getBookmark(node.anchor);
|
|
25
|
+
bm.links.push(link);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return link;
|
|
19
29
|
}
|
|
@@ -20,6 +20,7 @@ import tableRow from './table-row.js';
|
|
|
20
20
|
import tableCell from './table-cell.js';
|
|
21
21
|
import text from './text.js';
|
|
22
22
|
import run from './run.js';
|
|
23
|
+
import bookmarkStart from './bookmark.js';
|
|
23
24
|
|
|
24
25
|
export default {
|
|
25
26
|
document,
|
|
@@ -32,4 +33,5 @@ export default {
|
|
|
32
33
|
tableRow,
|
|
33
34
|
tableCell,
|
|
34
35
|
image,
|
|
36
|
+
bookmarkStart,
|
|
35
37
|
};
|
|
@@ -172,8 +172,15 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
172
172
|
if (toString(nodes).trim() === '---') {
|
|
173
173
|
return h('thematicBreak');
|
|
174
174
|
}
|
|
175
|
-
|
|
176
|
-
|
|
175
|
+
const heading = h('heading', { depth }, nodes);
|
|
176
|
+
// check if any of the children is a bookmark
|
|
177
|
+
const idx = nodes.findIndex((n) => n.type === 'bookmark');
|
|
178
|
+
if (idx >= 0) {
|
|
179
|
+
// replace the bookmark node with this heading and remove the child
|
|
180
|
+
nodes[idx].bookmark.target = heading;
|
|
181
|
+
nodes.splice(idx, 1);
|
|
182
|
+
}
|
|
183
|
+
return heading;
|
|
177
184
|
}
|
|
178
185
|
|
|
179
186
|
// check for codeblock
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2022 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
import GithubSlugger from 'github-slugger';
|
|
13
|
+
|
|
14
|
+
export default class IDSlugger {
|
|
15
|
+
constructor() {
|
|
16
|
+
this.occurrences = {};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Generate a unique slug.
|
|
21
|
+
* @param {string} value String of text to slugify
|
|
22
|
+
* @return {string} A unique slug string
|
|
23
|
+
*/
|
|
24
|
+
slug(value) {
|
|
25
|
+
let id = GithubSlugger.slug(value)
|
|
26
|
+
// remove leading numbers
|
|
27
|
+
.replace(/^\d+-+/, '');
|
|
28
|
+
|
|
29
|
+
// resolve collisions
|
|
30
|
+
const original = id;
|
|
31
|
+
while (id in this.occurrences) {
|
|
32
|
+
this.occurrences[original] += 1;
|
|
33
|
+
id = `${original}-${this.occurrences[original]}`;
|
|
34
|
+
}
|
|
35
|
+
this.occurrences[id] = 0;
|
|
36
|
+
return id;
|
|
37
|
+
}
|
|
38
|
+
}
|