@adobe/helix-docx2md 1.5.11 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [1.6.0](https://github.com/adobe/helix-docx2md/compare/v1.5.12...v1.6.0) (2024-04-22)
2
+
3
+
4
+ ### Features
5
+
6
+ * detect paragraph border and insert thematic breaks ([#444](https://github.com/adobe/helix-docx2md/issues/444)) ([75e013c](https://github.com/adobe/helix-docx2md/commit/75e013c8afcafd5bf1b664c0529a72d84919e6ec))
7
+
8
+ ## [1.5.12](https://github.com/adobe/helix-docx2md/compare/v1.5.11...v1.5.12) (2024-04-22)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * restart mumbering on sublists ([#443](https://github.com/adobe/helix-docx2md/issues/443)) ([21d522f](https://github.com/adobe/helix-docx2md/commit/21d522f2c97c9993ca393aee606306e67c70170a))
14
+
1
15
  ## [1.5.11](https://github.com/adobe/helix-docx2md/compare/v1.5.10...v1.5.11) (2024-04-20)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-docx2md",
3
- "version": "1.5.11",
3
+ "version": "1.6.0",
4
4
  "description": "Helix library that converts word documents to markdown",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -36,7 +36,7 @@
36
36
  "dependencies": {
37
37
  "@adobe/helix-markdown-support": "7.1.2",
38
38
  "@adobe/helix-shared-process-queue": "3.0.4",
39
- "@adobe/mammoth": "1.5.1-bleeding.2",
39
+ "@adobe/mammoth": "1.7.1-bleeding.2",
40
40
  "@adobe/mdast-util-gridtables": "4.0.4",
41
41
  "@adobe/remark-gridtables": "3.0.4",
42
42
  "dirname-filename-esm": "1.1.1",
@@ -20,10 +20,10 @@ export default function image(h, node) {
20
20
  if (node.title && node.title !== node.altText) {
21
21
  props.title = node.title;
22
22
  }
23
- if (node.read) {
23
+ if (node.readAsBuffer) {
24
24
  // we set the read function as non-enumerable, so that inspect doesn't trip over it.
25
- Object.defineProperty(props, 'read', {
26
- value: node.read,
25
+ Object.defineProperty(props, 'readAsBuffer', {
26
+ value: node.readAsBuffer,
27
27
  enumerable: false,
28
28
  });
29
29
  props.contentType = node.contentType;
@@ -89,6 +89,12 @@ function findFrom(nodes, start, pred) {
89
89
  return -1;
90
90
  }
91
91
 
92
+ function handleBorder(border, ret) {
93
+ if (border?.type === 'single' && border.size === '6' && border.space === '1') {
94
+ ret.push({ type: 'thematicBreak' });
95
+ }
96
+ }
97
+
92
98
  /**
93
99
  * For each paragraph, check if there is an (inlinecode br+ inlincode) sequence and promote them
94
100
  * to codeblocks at the container level.
@@ -182,13 +188,15 @@ export default function paragraph(h, node, parent, siblings) {
182
188
  return undefined;
183
189
  }
184
190
 
191
+ const ret = [];
192
+ handleBorder(node.border?.top, ret);
193
+
185
194
  // check for list
186
195
  const [lists] = h.listContainers;
187
196
  if (isListParagraph(node)) {
188
197
  const numbering = node.numbering || {};
189
198
  const { numId = 0, isOrdered = false, level = '0' } = numbering;
190
199
  const lvl = Number.parseInt(level, 10);
191
- let result = null;
192
200
  const listProps = {
193
201
  ordered: isOrdered,
194
202
  spread: false,
@@ -207,7 +215,7 @@ export default function paragraph(h, node, parent, siblings) {
207
215
  }
208
216
  if (!tail) {
209
217
  tail = h('list', listProps, []);
210
- result = tail;
218
+ ret.push(tail);
211
219
  }
212
220
  lists.push(tail);
213
221
  while (lists.length <= lvl) {
@@ -236,23 +244,22 @@ export default function paragraph(h, node, parent, siblings) {
236
244
  let numInfo = h.numbering[numId];
237
245
  if (!numInfo) {
238
246
  numInfo = {
239
- levels: { },
247
+ levels: [],
240
248
  };
241
249
  // eslint-disable-next-line no-param-reassign
242
250
  h.numbering[numId] = numInfo;
243
251
  }
244
- let lvlInfo = numInfo.levels[level];
245
- if (!lvlInfo) {
246
- lvlInfo = {
247
- num: 1,
248
- };
249
- numInfo.levels[level] = lvlInfo;
252
+ while (numInfo.levels.length <= lvl) {
253
+ numInfo.levels.push({ num: 1 });
250
254
  }
255
+ // clear levels after current, as sub-lists always restart the numbering
256
+ numInfo.levels.splice(lvl + 1);
257
+ const lvlInfo = numInfo.levels[lvl];
251
258
  listItem.bullet = `${lvlInfo.num}.`;
252
259
  lvlInfo.num += 1;
253
260
  }
254
261
  tail.children.push(listItem);
255
- return result;
262
+ return ret;
256
263
  }
257
264
  // clear lists list marker
258
265
  // eslint-disable-next-line no-param-reassign
@@ -272,20 +279,23 @@ export default function paragraph(h, node, parent, siblings) {
272
279
  if (depth) {
273
280
  // check if no horizontal line in heading
274
281
  if (isHorizontalLine(nodes)) {
275
- return h('thematicBreak');
276
- }
277
- const heading = h('heading', { depth }, nodes);
278
- // check bookmark children (could have multiple)
279
- for (let idx = 0; idx < nodes.length; idx += 1) {
280
- const child = nodes[idx];
281
- if (child.type === 'bookmark') {
282
- // set the bookmark target to this heading and remove the child
283
- child.bookmark.target = heading;
284
- nodes.splice(idx, 1);
285
- idx -= 1;
282
+ ret.push(h('thematicBreak'));
283
+ } else {
284
+ const heading = h('heading', { depth }, nodes);
285
+ // check bookmark children (could have multiple)
286
+ for (let idx = 0; idx < nodes.length; idx += 1) {
287
+ const child = nodes[idx];
288
+ if (child.type === 'bookmark') {
289
+ // set the bookmark target to this heading and remove the child
290
+ child.bookmark.target = heading;
291
+ nodes.splice(idx, 1);
292
+ idx -= 1;
293
+ }
286
294
  }
295
+ ret.push(heading);
287
296
  }
288
- return heading;
297
+ handleBorder(node.border?.bottom, ret);
298
+ return ret;
289
299
  }
290
300
 
291
301
  // check for codeblock
@@ -298,7 +308,8 @@ export default function paragraph(h, node, parent, siblings) {
298
308
  prev.value += `\n${text}`;
299
309
  return undefined;
300
310
  }
301
- return h('code', text);
311
+ ret.push(h('code', text));
312
+ return ret;
302
313
  }
303
314
 
304
315
  // merge consecutive text blocks
@@ -313,7 +324,6 @@ export default function paragraph(h, node, parent, siblings) {
313
324
  }
314
325
 
315
326
  // check for thematicBreaks and frontmatter. they need to be block elements
316
- const ret = [];
317
327
  let prevBreak;
318
328
  let idx = nodes.findIndex(isHorizontalLine);
319
329
  while (idx >= 0) {
@@ -370,6 +380,6 @@ export default function paragraph(h, node, parent, siblings) {
370
380
  if (isBlockquote(node)) {
371
381
  return h('blockquote', ret);
372
382
  }
373
-
383
+ handleBorder(node.border?.bottom, ret);
374
384
  return ret;
375
385
  }
@@ -67,7 +67,7 @@ export default async function processImages(log, tree, blobHandler, source) {
67
67
 
68
68
  visit(tree, (node, index, parent) => {
69
69
  if (node.type === 'image') {
70
- if ((node.read && typeof node.read === 'function') || node.url) {
70
+ if ((node.readAsBuffer && typeof node.readAsBuffer === 'function') || node.url) {
71
71
  images.push({
72
72
  node,
73
73
  index,
@@ -83,9 +83,9 @@ export default async function processImages(log, tree, blobHandler, source) {
83
83
  let blob;
84
84
 
85
85
  // process inlined images first
86
- if (node.read) {
86
+ if (node.readAsBuffer) {
87
87
  try {
88
- const data = await node.read(null, !!blobHandler);
88
+ const data = await node.readAsBuffer();
89
89
  blob = await getBlob(blobHandler, data, node.contentType, source);
90
90
  } catch (e) {
91
91
  log.error('Error reading blob data:', e.message);