@adobe/helix-docx2md 1.5.11 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [1.6.0](https://github.com/adobe/helix-docx2md/compare/v1.5.12...v1.6.0) (2024-04-22)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* detect paragraph border and insert thematic breaks ([#444](https://github.com/adobe/helix-docx2md/issues/444)) ([75e013c](https://github.com/adobe/helix-docx2md/commit/75e013c8afcafd5bf1b664c0529a72d84919e6ec))
|
|
7
|
+
|
|
8
|
+
## [1.5.12](https://github.com/adobe/helix-docx2md/compare/v1.5.11...v1.5.12) (2024-04-22)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* restart mumbering on sublists ([#443](https://github.com/adobe/helix-docx2md/issues/443)) ([21d522f](https://github.com/adobe/helix-docx2md/commit/21d522f2c97c9993ca393aee606306e67c70170a))
|
|
14
|
+
|
|
1
15
|
## [1.5.11](https://github.com/adobe/helix-docx2md/compare/v1.5.10...v1.5.11) (2024-04-20)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/helix-docx2md",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Helix library that converts word documents to markdown",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.js",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@adobe/helix-markdown-support": "7.1.2",
|
|
38
38
|
"@adobe/helix-shared-process-queue": "3.0.4",
|
|
39
|
-
"@adobe/mammoth": "1.
|
|
39
|
+
"@adobe/mammoth": "1.7.1-bleeding.2",
|
|
40
40
|
"@adobe/mdast-util-gridtables": "4.0.4",
|
|
41
41
|
"@adobe/remark-gridtables": "3.0.4",
|
|
42
42
|
"dirname-filename-esm": "1.1.1",
|
|
@@ -20,10 +20,10 @@ export default function image(h, node) {
|
|
|
20
20
|
if (node.title && node.title !== node.altText) {
|
|
21
21
|
props.title = node.title;
|
|
22
22
|
}
|
|
23
|
-
if (node.
|
|
23
|
+
if (node.readAsBuffer) {
|
|
24
24
|
// we set the read function as non-enumerable, so that inspect doesn't trip over it.
|
|
25
|
-
Object.defineProperty(props, '
|
|
26
|
-
value: node.
|
|
25
|
+
Object.defineProperty(props, 'readAsBuffer', {
|
|
26
|
+
value: node.readAsBuffer,
|
|
27
27
|
enumerable: false,
|
|
28
28
|
});
|
|
29
29
|
props.contentType = node.contentType;
|
|
@@ -89,6 +89,12 @@ function findFrom(nodes, start, pred) {
|
|
|
89
89
|
return -1;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
function handleBorder(border, ret) {
|
|
93
|
+
if (border?.type === 'single' && border.size === '6' && border.space === '1') {
|
|
94
|
+
ret.push({ type: 'thematicBreak' });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
92
98
|
/**
|
|
93
99
|
* For each paragraph, check if there is an (inlinecode br+ inlincode) sequence and promote them
|
|
94
100
|
* to codeblocks at the container level.
|
|
@@ -182,13 +188,15 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
182
188
|
return undefined;
|
|
183
189
|
}
|
|
184
190
|
|
|
191
|
+
const ret = [];
|
|
192
|
+
handleBorder(node.border?.top, ret);
|
|
193
|
+
|
|
185
194
|
// check for list
|
|
186
195
|
const [lists] = h.listContainers;
|
|
187
196
|
if (isListParagraph(node)) {
|
|
188
197
|
const numbering = node.numbering || {};
|
|
189
198
|
const { numId = 0, isOrdered = false, level = '0' } = numbering;
|
|
190
199
|
const lvl = Number.parseInt(level, 10);
|
|
191
|
-
let result = null;
|
|
192
200
|
const listProps = {
|
|
193
201
|
ordered: isOrdered,
|
|
194
202
|
spread: false,
|
|
@@ -207,7 +215,7 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
207
215
|
}
|
|
208
216
|
if (!tail) {
|
|
209
217
|
tail = h('list', listProps, []);
|
|
210
|
-
|
|
218
|
+
ret.push(tail);
|
|
211
219
|
}
|
|
212
220
|
lists.push(tail);
|
|
213
221
|
while (lists.length <= lvl) {
|
|
@@ -236,23 +244,22 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
236
244
|
let numInfo = h.numbering[numId];
|
|
237
245
|
if (!numInfo) {
|
|
238
246
|
numInfo = {
|
|
239
|
-
levels:
|
|
247
|
+
levels: [],
|
|
240
248
|
};
|
|
241
249
|
// eslint-disable-next-line no-param-reassign
|
|
242
250
|
h.numbering[numId] = numInfo;
|
|
243
251
|
}
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
lvlInfo = {
|
|
247
|
-
num: 1,
|
|
248
|
-
};
|
|
249
|
-
numInfo.levels[level] = lvlInfo;
|
|
252
|
+
while (numInfo.levels.length <= lvl) {
|
|
253
|
+
numInfo.levels.push({ num: 1 });
|
|
250
254
|
}
|
|
255
|
+
// clear levels after current, as sub-lists always restart the numbering
|
|
256
|
+
numInfo.levels.splice(lvl + 1);
|
|
257
|
+
const lvlInfo = numInfo.levels[lvl];
|
|
251
258
|
listItem.bullet = `${lvlInfo.num}.`;
|
|
252
259
|
lvlInfo.num += 1;
|
|
253
260
|
}
|
|
254
261
|
tail.children.push(listItem);
|
|
255
|
-
return
|
|
262
|
+
return ret;
|
|
256
263
|
}
|
|
257
264
|
// clear lists list marker
|
|
258
265
|
// eslint-disable-next-line no-param-reassign
|
|
@@ -272,20 +279,23 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
272
279
|
if (depth) {
|
|
273
280
|
// check if no horizontal line in heading
|
|
274
281
|
if (isHorizontalLine(nodes)) {
|
|
275
|
-
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
282
|
+
ret.push(h('thematicBreak'));
|
|
283
|
+
} else {
|
|
284
|
+
const heading = h('heading', { depth }, nodes);
|
|
285
|
+
// check bookmark children (could have multiple)
|
|
286
|
+
for (let idx = 0; idx < nodes.length; idx += 1) {
|
|
287
|
+
const child = nodes[idx];
|
|
288
|
+
if (child.type === 'bookmark') {
|
|
289
|
+
// set the bookmark target to this heading and remove the child
|
|
290
|
+
child.bookmark.target = heading;
|
|
291
|
+
nodes.splice(idx, 1);
|
|
292
|
+
idx -= 1;
|
|
293
|
+
}
|
|
286
294
|
}
|
|
295
|
+
ret.push(heading);
|
|
287
296
|
}
|
|
288
|
-
|
|
297
|
+
handleBorder(node.border?.bottom, ret);
|
|
298
|
+
return ret;
|
|
289
299
|
}
|
|
290
300
|
|
|
291
301
|
// check for codeblock
|
|
@@ -298,7 +308,8 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
298
308
|
prev.value += `\n${text}`;
|
|
299
309
|
return undefined;
|
|
300
310
|
}
|
|
301
|
-
|
|
311
|
+
ret.push(h('code', text));
|
|
312
|
+
return ret;
|
|
302
313
|
}
|
|
303
314
|
|
|
304
315
|
// merge consecutive text blocks
|
|
@@ -313,7 +324,6 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
313
324
|
}
|
|
314
325
|
|
|
315
326
|
// check for thematicBreaks and frontmatter. they need to be block elements
|
|
316
|
-
const ret = [];
|
|
317
327
|
let prevBreak;
|
|
318
328
|
let idx = nodes.findIndex(isHorizontalLine);
|
|
319
329
|
while (idx >= 0) {
|
|
@@ -370,6 +380,6 @@ export default function paragraph(h, node, parent, siblings) {
|
|
|
370
380
|
if (isBlockquote(node)) {
|
|
371
381
|
return h('blockquote', ret);
|
|
372
382
|
}
|
|
373
|
-
|
|
383
|
+
handleBorder(node.border?.bottom, ret);
|
|
374
384
|
return ret;
|
|
375
385
|
}
|
|
@@ -67,7 +67,7 @@ export default async function processImages(log, tree, blobHandler, source) {
|
|
|
67
67
|
|
|
68
68
|
visit(tree, (node, index, parent) => {
|
|
69
69
|
if (node.type === 'image') {
|
|
70
|
-
if ((node.
|
|
70
|
+
if ((node.readAsBuffer && typeof node.readAsBuffer === 'function') || node.url) {
|
|
71
71
|
images.push({
|
|
72
72
|
node,
|
|
73
73
|
index,
|
|
@@ -83,9 +83,9 @@ export default async function processImages(log, tree, blobHandler, source) {
|
|
|
83
83
|
let blob;
|
|
84
84
|
|
|
85
85
|
// process inlined images first
|
|
86
|
-
if (node.
|
|
86
|
+
if (node.readAsBuffer) {
|
|
87
87
|
try {
|
|
88
|
-
const data = await node.
|
|
88
|
+
const data = await node.readAsBuffer();
|
|
89
89
|
blob = await getBlob(blobHandler, data, node.contentType, source);
|
|
90
90
|
} catch (e) {
|
|
91
91
|
log.error('Error reading blob data:', e.message);
|