@adobe/helix-docx2md 1.5.12 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## [1.6.1](https://github.com/adobe/helix-docx2md/compare/v1.6.0...v1.6.1) (2024-06-22)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * **deps:** update dependency unified to v11.0.5 ([8ad9645](https://github.com/adobe/helix-docx2md/commit/8ad964588b31b7963bfd730c9474bdc9e3c263a0))
7
+
8
+ # [1.6.0](https://github.com/adobe/helix-docx2md/compare/v1.5.12...v1.6.0) (2024-04-22)
9
+
10
+
11
+ ### Features
12
+
13
+ * detect paragraph border and insert thematic breaks ([#444](https://github.com/adobe/helix-docx2md/issues/444)) ([75e013c](https://github.com/adobe/helix-docx2md/commit/75e013c8afcafd5bf1b664c0529a72d84919e6ec))
14
+
1
15
  ## [1.5.12](https://github.com/adobe/helix-docx2md/compare/v1.5.11...v1.5.12) (2024-04-22)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-docx2md",
3
- "version": "1.5.12",
3
+ "version": "1.6.1",
4
4
  "description": "Helix library that converts word documents to markdown",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -34,11 +34,11 @@
34
34
  },
35
35
  "homepage": "https://github.com/adobe/helix-docx2md#readme",
36
36
  "dependencies": {
37
- "@adobe/helix-markdown-support": "7.1.2",
37
+ "@adobe/helix-markdown-support": "7.1.3",
38
38
  "@adobe/helix-shared-process-queue": "3.0.4",
39
- "@adobe/mammoth": "1.5.1-bleeding.2",
40
- "@adobe/mdast-util-gridtables": "4.0.4",
41
- "@adobe/remark-gridtables": "3.0.4",
39
+ "@adobe/mammoth": "1.7.1-bleeding.2",
40
+ "@adobe/mdast-util-gridtables": "4.0.6",
41
+ "@adobe/remark-gridtables": "3.0.6",
42
42
  "dirname-filename-esm": "1.1.1",
43
43
  "github-slugger": "2.0.0",
44
44
  "mdast-util-to-markdown": "2.1.0",
@@ -46,29 +46,29 @@
46
46
  "remark-gfm": "4.0.0",
47
47
  "remark-parse": "11.0.0",
48
48
  "remark-stringify": "11.0.0",
49
- "unified": "11.0.4",
49
+ "unified": "11.0.5",
50
50
  "unist-util-find": "3.0.0",
51
51
  "unist-util-visit": "5.0.0",
52
52
  "yauzl": "3.1.3"
53
53
  },
54
54
  "devDependencies": {
55
55
  "@adobe/eslint-config-helix": "2.0.6",
56
- "@adobe/helix-admin-support": "2.8.1",
57
- "@adobe/helix-mediahandler": "2.4.21",
58
- "@adobe/helix-onedrive-support": "11.3.19",
59
- "@adobe/helix-shared-tokencache": "1.4.2",
56
+ "@adobe/helix-admin-support": "2.8.15",
57
+ "@adobe/helix-mediahandler": "2.5.10",
58
+ "@adobe/helix-onedrive-support": "11.3.31",
59
+ "@adobe/helix-shared-tokencache": "1.4.10",
60
60
  "@semantic-release/changelog": "6.0.3",
61
61
  "@semantic-release/exec": "6.0.3",
62
62
  "@semantic-release/git": "10.0.1",
63
- "c8": "9.1.0",
63
+ "c8": "10.1.2",
64
64
  "dotenv": "16.4.5",
65
65
  "eslint": "8.57.0",
66
66
  "husky": "9.0.11",
67
67
  "junit-report-builder": "3.2.1",
68
- "lint-staged": "15.2.2",
68
+ "lint-staged": "15.2.7",
69
69
  "mocha": "10.4.0",
70
70
  "mocha-multi-reporters": "1.5.1",
71
- "semantic-release": "23.0.8",
71
+ "semantic-release": "24.0.0",
72
72
  "unist-util-inspect": "8.0.0"
73
73
  },
74
74
  "lint-staged": {
@@ -20,10 +20,10 @@ export default function image(h, node) {
20
20
  if (node.title && node.title !== node.altText) {
21
21
  props.title = node.title;
22
22
  }
23
- if (node.read) {
23
+ if (node.readAsBuffer) {
24
24
  // we set the read function as non-enumerable, so that inspect doesn't trip over it.
25
- Object.defineProperty(props, 'read', {
26
- value: node.read,
25
+ Object.defineProperty(props, 'readAsBuffer', {
26
+ value: node.readAsBuffer,
27
27
  enumerable: false,
28
28
  });
29
29
  props.contentType = node.contentType;
@@ -89,6 +89,12 @@ function findFrom(nodes, start, pred) {
89
89
  return -1;
90
90
  }
91
91
 
92
+ function handleBorder(border, ret) {
93
+ if (border?.type === 'single' && border.size === '6' && border.space === '1') {
94
+ ret.push({ type: 'thematicBreak' });
95
+ }
96
+ }
97
+
92
98
  /**
93
99
  * For each paragraph, check if there is an (inlinecode br+ inlincode) sequence and promote them
94
100
  * to codeblocks at the container level.
@@ -182,13 +188,15 @@ export default function paragraph(h, node, parent, siblings) {
182
188
  return undefined;
183
189
  }
184
190
 
191
+ const ret = [];
192
+ handleBorder(node.border?.top, ret);
193
+
185
194
  // check for list
186
195
  const [lists] = h.listContainers;
187
196
  if (isListParagraph(node)) {
188
197
  const numbering = node.numbering || {};
189
198
  const { numId = 0, isOrdered = false, level = '0' } = numbering;
190
199
  const lvl = Number.parseInt(level, 10);
191
- let result = null;
192
200
  const listProps = {
193
201
  ordered: isOrdered,
194
202
  spread: false,
@@ -207,7 +215,7 @@ export default function paragraph(h, node, parent, siblings) {
207
215
  }
208
216
  if (!tail) {
209
217
  tail = h('list', listProps, []);
210
- result = tail;
218
+ ret.push(tail);
211
219
  }
212
220
  lists.push(tail);
213
221
  while (lists.length <= lvl) {
@@ -251,7 +259,7 @@ export default function paragraph(h, node, parent, siblings) {
251
259
  lvlInfo.num += 1;
252
260
  }
253
261
  tail.children.push(listItem);
254
- return result;
262
+ return ret;
255
263
  }
256
264
  // clear lists list marker
257
265
  // eslint-disable-next-line no-param-reassign
@@ -271,20 +279,23 @@ export default function paragraph(h, node, parent, siblings) {
271
279
  if (depth) {
272
280
  // check if no horizontal line in heading
273
281
  if (isHorizontalLine(nodes)) {
274
- return h('thematicBreak');
275
- }
276
- const heading = h('heading', { depth }, nodes);
277
- // check bookmark children (could have multiple)
278
- for (let idx = 0; idx < nodes.length; idx += 1) {
279
- const child = nodes[idx];
280
- if (child.type === 'bookmark') {
281
- // set the bookmark target to this heading and remove the child
282
- child.bookmark.target = heading;
283
- nodes.splice(idx, 1);
284
- idx -= 1;
282
+ ret.push(h('thematicBreak'));
283
+ } else {
284
+ const heading = h('heading', { depth }, nodes);
285
+ // check bookmark children (could have multiple)
286
+ for (let idx = 0; idx < nodes.length; idx += 1) {
287
+ const child = nodes[idx];
288
+ if (child.type === 'bookmark') {
289
+ // set the bookmark target to this heading and remove the child
290
+ child.bookmark.target = heading;
291
+ nodes.splice(idx, 1);
292
+ idx -= 1;
293
+ }
285
294
  }
295
+ ret.push(heading);
286
296
  }
287
- return heading;
297
+ handleBorder(node.border?.bottom, ret);
298
+ return ret;
288
299
  }
289
300
 
290
301
  // check for codeblock
@@ -297,7 +308,8 @@ export default function paragraph(h, node, parent, siblings) {
297
308
  prev.value += `\n${text}`;
298
309
  return undefined;
299
310
  }
300
- return h('code', text);
311
+ ret.push(h('code', text));
312
+ return ret;
301
313
  }
302
314
 
303
315
  // merge consecutive text blocks
@@ -312,7 +324,6 @@ export default function paragraph(h, node, parent, siblings) {
312
324
  }
313
325
 
314
326
  // check for thematicBreaks and frontmatter. they need to be block elements
315
- const ret = [];
316
327
  let prevBreak;
317
328
  let idx = nodes.findIndex(isHorizontalLine);
318
329
  while (idx >= 0) {
@@ -369,6 +380,6 @@ export default function paragraph(h, node, parent, siblings) {
369
380
  if (isBlockquote(node)) {
370
381
  return h('blockquote', ret);
371
382
  }
372
-
383
+ handleBorder(node.border?.bottom, ret);
373
384
  return ret;
374
385
  }
@@ -67,7 +67,7 @@ export default async function processImages(log, tree, blobHandler, source) {
67
67
 
68
68
  visit(tree, (node, index, parent) => {
69
69
  if (node.type === 'image') {
70
- if ((node.read && typeof node.read === 'function') || node.url) {
70
+ if ((node.readAsBuffer && typeof node.readAsBuffer === 'function') || node.url) {
71
71
  images.push({
72
72
  node,
73
73
  index,
@@ -83,9 +83,9 @@ export default async function processImages(log, tree, blobHandler, source) {
83
83
  let blob;
84
84
 
85
85
  // process inlined images first
86
- if (node.read) {
86
+ if (node.readAsBuffer) {
87
87
  try {
88
- const data = await node.read(null, !!blobHandler);
88
+ const data = await node.readAsBuffer();
89
89
  blob = await getBlob(blobHandler, data, node.contentType, source);
90
90
  } catch (e) {
91
91
  log.error('Error reading blob data:', e.message);