markdown-to-confluence 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -7,6 +7,7 @@ Copyright 2022-2025, Levente Hunyadi
7
7
  """
8
8
 
9
9
  import dataclasses
10
+ import enum
10
11
  import hashlib
11
12
  import logging
12
13
  import os.path
@@ -15,21 +16,25 @@ import uuid
15
16
  from abc import ABC, abstractmethod
16
17
  from dataclasses import dataclass
17
18
  from pathlib import Path
18
- from typing import Any, Literal, Optional, Union
19
- from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
19
+ from typing import ClassVar, Literal, Optional, Union
20
+ from urllib.parse import ParseResult, quote_plus, urlparse
20
21
 
21
22
  import lxml.etree as ET
22
23
  from strong_typing.core import JsonType
24
+ from strong_typing.exception import JsonTypeError
23
25
 
24
26
  from . import drawio, mermaid
25
27
  from .collection import ConfluencePageCollection
26
- from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string
28
+ from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string, normalize_inline
27
29
  from .domain import ConfluenceDocumentOptions, ConfluencePageID
30
+ from .emoticon import emoji_to_emoticon
31
+ from .environment import PageError
28
32
  from .extra import override, path_relative_to
33
+ from .latex import get_png_dimensions, remove_png_chunks, render_latex
29
34
  from .markdown import markdown_to_html
35
+ from .mermaid import MermaidConfigProperties
30
36
  from .metadata import ConfluenceSiteMetadata
31
- from .properties import PageError
32
- from .scanner import ScannedDocument, Scanner
37
+ from .scanner import MermaidScanner, ScannedDocument, Scanner
33
38
  from .toc import TableOfContentsBuilder
34
39
  from .uri import is_absolute_url, to_uuid_urn
35
40
  from .xml import element_to_text
@@ -88,86 +93,90 @@ def encode_title(text: str) -> str:
88
93
 
89
94
 
90
95
  # supported code block languages, for which syntax highlighting is available
91
- _LANGUAGES = [
92
- "abap",
93
- "actionscript3",
94
- "ada",
95
- "applescript",
96
- "arduino",
97
- "autoit",
98
- "bash",
99
- "c",
100
- "clojure",
101
- "coffeescript",
102
- "coldfusion",
103
- "cpp",
104
- "csharp",
105
- "css",
106
- "cuda",
107
- "d",
108
- "dart",
109
- "delphi",
110
- "diff",
111
- "elixir",
112
- "erlang",
113
- "fortran",
114
- "foxpro",
115
- "go",
116
- "graphql",
117
- "groovy",
118
- "haskell",
119
- "haxe",
120
- "html",
121
- "java",
122
- "javafx",
123
- "javascript",
124
- "json",
125
- "jsx",
126
- "julia",
127
- "kotlin",
128
- "livescript",
129
- "lua",
130
- "mermaid",
131
- "mathematica",
132
- "matlab",
133
- "objectivec",
134
- "objectivej",
135
- "ocaml",
136
- "octave",
137
- "pascal",
138
- "perl",
139
- "php",
140
- "powershell",
141
- "prolog",
142
- "puppet",
143
- "python",
144
- "qml",
145
- "r",
146
- "racket",
147
- "rst",
148
- "ruby",
149
- "rust",
150
- "sass",
151
- "scala",
152
- "scheme",
153
- "shell",
154
- "smalltalk",
155
- "splunk",
156
- "sql",
157
- "standardml",
158
- "swift",
159
- "tcl",
160
- "tex",
161
- "tsx",
162
- "typescript",
163
- "vala",
164
- "vb",
165
- "verilog",
166
- "vhdl",
167
- "xml",
168
- "xquery",
169
- "yaml",
170
- ]
96
+ _LANGUAGES = {
97
+ "abap": "abap",
98
+ "actionscript3": "actionscript3",
99
+ "ada": "ada",
100
+ "applescript": "applescript",
101
+ "arduino": "arduino",
102
+ "autoit": "autoit",
103
+ "bash": "bash",
104
+ "c": "c",
105
+ "c#": "c#",
106
+ "clojure": "clojure",
107
+ "coffeescript": "coffeescript",
108
+ "coldfusion": "coldfusion",
109
+ "cpp": "cpp",
110
+ "csharp": "c#",
111
+ "css": "css",
112
+ "cuda": "cuda",
113
+ "d": "d",
114
+ "dart": "dart",
115
+ "delphi": "delphi",
116
+ "diff": "diff",
117
+ "elixir": "elixir",
118
+ "erl": "erl",
119
+ "erlang": "erl",
120
+ "fortran": "fortran",
121
+ "foxpro": "foxpro",
122
+ "go": "go",
123
+ "graphql": "graphql",
124
+ "groovy": "groovy",
125
+ "haskell": "haskell",
126
+ "haxe": "haxe",
127
+ "html": "html",
128
+ "java": "java",
129
+ "javafx": "javafx",
130
+ "javascript": "js",
131
+ "js": "js",
132
+ "json": "json",
133
+ "jsx": "jsx",
134
+ "julia": "julia",
135
+ "kotlin": "kotlin",
136
+ "livescript": "livescript",
137
+ "lua": "lua",
138
+ "mermaid": "mermaid",
139
+ "mathematica": "mathematica",
140
+ "matlab": "matlab",
141
+ "objectivec": "objectivec",
142
+ "objectivej": "objectivej",
143
+ "ocaml": "ocaml",
144
+ "octave": "octave",
145
+ "pascal": "pascal",
146
+ "perl": "perl",
147
+ "php": "php",
148
+ "powershell": "powershell",
149
+ "prolog": "prolog",
150
+ "puppet": "puppet",
151
+ "py": "py",
152
+ "python": "py",
153
+ "qml": "qml",
154
+ "r": "r",
155
+ "racket": "racket",
156
+ "rst": "rst",
157
+ "ruby": "ruby",
158
+ "rust": "rust",
159
+ "sass": "sass",
160
+ "scala": "scala",
161
+ "scheme": "scheme",
162
+ "shell": "shell",
163
+ "smalltalk": "smalltalk",
164
+ "splunk": "splunk",
165
+ "sql": "sql",
166
+ "standardml": "standardml",
167
+ "swift": "swift",
168
+ "tcl": "tcl",
169
+ "tex": "tex",
170
+ "tsx": "tsx",
171
+ "typescript": "typescript",
172
+ "vala": "vala",
173
+ "vb": "vb",
174
+ "verilog": "verilog",
175
+ "vhdl": "vhdl",
176
+ "xml": "xml",
177
+ "xquery": "xquery",
178
+ "yaml": "yaml",
179
+ }
171
180
 
172
181
 
173
182
  class NodeVisitor(ABC):
@@ -181,6 +190,11 @@ class NodeVisitor(ABC):
181
190
  source = node[index]
182
191
  target = self.transform(source)
183
192
  if target is not None:
193
+ # chain sibling text node that immediately follows original element
194
+ target.tail = source.tail
195
+ source.tail = None
196
+
197
+ # replace original element with transformed element
184
198
  node[index] = target
185
199
  else:
186
200
  self.visit(source)
@@ -206,19 +220,99 @@ def element_text_starts_with_any(node: ET._Element, prefixes: list[str]) -> bool
206
220
  return starts_with_any(node.text, prefixes)
207
221
 
208
222
 
223
+ def is_placeholder_for(node: ET._Element, name: str) -> bool:
224
+ """
225
+ Identifies a Confluence widget placeholder, e.g. `[[_TOC_]]` or `[[_LISTING_]]`.
226
+
227
+ :param node: The element to check.
228
+ :param name: The placeholder name.
229
+ """
230
+
231
+ # `[[_TOC_]]` is represented in HTML as <p>[[<em>TOC</em>]]</p>
232
+ if node.text != "[[" or len(node) != 1:
233
+ return False
234
+
235
+ child = node[0]
236
+ if child.tag != "em" or child.text != name or child.tail != "]]":
237
+ return False
238
+
239
+ return True
240
+
241
+
242
+ @enum.unique
243
+ class FormattingContext(enum.Enum):
244
+ "Identifies the formatting context for the element."
245
+
246
+ BLOCK = "block"
247
+ INLINE = "inline"
248
+
249
+
209
250
  @dataclass
210
251
  class ImageAttributes:
211
252
  """
212
253
  Attributes applied to an `<img>` element.
213
254
 
214
- :param caption: Caption text (`alt` attribute).
255
+ :param context: Identifies the formatting context for the element (block or inline).
215
256
  :param width: Natural image width in pixels.
216
257
  :param height: Natural image height in pixels.
258
+ :param alt: Alternate text.
259
+ :param title: Title text (a.k.a. image tooltip).
260
+ :param caption: Caption text (shown below figure).
217
261
  """
218
262
 
263
+ context: FormattingContext
264
+ width: Optional[int]
265
+ height: Optional[int]
266
+ alt: Optional[str]
267
+ title: Optional[str]
219
268
  caption: Optional[str]
220
- width: Optional[str]
221
- height: Optional[str]
269
+
270
+ def __post_init__(self) -> None:
271
+ if self.caption is None and self.context is FormattingContext.BLOCK:
272
+ self.caption = self.title or self.alt
273
+
274
+ def as_dict(self) -> dict[str, str]:
275
+ attributes: dict[str, str] = {}
276
+ if self.context is FormattingContext.BLOCK:
277
+ attributes[AC_ATTR("align")] = "center"
278
+ attributes[AC_ATTR("layout")] = "center"
279
+ if self.width is not None:
280
+ attributes[AC_ATTR("original-width")] = str(self.width)
281
+ if self.height is not None:
282
+ attributes[AC_ATTR("original-height")] = str(self.height)
283
+ if self.width is not None:
284
+ attributes[AC_ATTR("custom-width")] = "true"
285
+ attributes[AC_ATTR("width")] = str(self.width)
286
+
287
+ elif self.context is FormattingContext.INLINE:
288
+ if self.width is not None:
289
+ attributes[AC_ATTR("width")] = str(self.width)
290
+ if self.height is not None:
291
+ attributes[AC_ATTR("height")] = str(self.height)
292
+ else:
293
+ raise NotImplementedError("match not exhaustive for enumeration")
294
+
295
+ if self.alt is not None:
296
+ attributes.update({AC_ATTR("alt"): self.alt})
297
+ if self.title is not None:
298
+ attributes.update({AC_ATTR("title"): self.title})
299
+ return attributes
300
+
301
+ EMPTY_BLOCK: ClassVar["ImageAttributes"]
302
+ EMPTY_INLINE: ClassVar["ImageAttributes"]
303
+
304
+ @classmethod
305
+ def empty(cls, context: FormattingContext) -> "ImageAttributes":
306
+ if context is FormattingContext.BLOCK:
307
+ return cls.EMPTY_BLOCK
308
+ elif context is FormattingContext.INLINE:
309
+ return cls.EMPTY_INLINE
310
+ else:
311
+ raise NotImplementedError("match not exhaustive for enumeration")
312
+
313
+
314
+ ImageAttributes.EMPTY_BLOCK = ImageAttributes(FormattingContext.BLOCK, None, None, None, None, None)
315
+ ImageAttributes.EMPTY_INLINE = ImageAttributes(FormattingContext.INLINE, None, None, None, None, None)
222
316
 
223
317
 
224
318
  @dataclass
@@ -233,6 +327,7 @@ class ConfluenceConverterOptions:
233
327
  :param prefer_raster: Whether to choose PNG files over SVG files when available.
234
328
  :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
235
329
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
330
+ :param render_latex: Whether to pre-render LaTeX formulas into PNG/SVG images.
236
331
  :param diagram_output_format: Target image format for diagrams.
237
332
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
238
333
  """
@@ -242,10 +337,23 @@ class ConfluenceConverterOptions:
242
337
  prefer_raster: bool = True
243
338
  render_drawio: bool = False
244
339
  render_mermaid: bool = False
340
+ render_latex: bool = False
245
341
  diagram_output_format: Literal["png", "svg"] = "png"
246
342
  webui_links: bool = False
247
343
 
248
344
 
345
+ @dataclass
346
+ class ImageData:
347
+ path: Path
348
+ description: Optional[str] = None
349
+
350
+
351
+ @dataclass
352
+ class EmbeddedFileData:
353
+ data: bytes
354
+ description: Optional[str] = None
355
+
356
+
249
357
  class ConfluenceStorageFormatConverter(NodeVisitor):
250
358
  "Transforms a plain HTML tree into Confluence Storage Format."
251
359
 
@@ -255,8 +363,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
255
363
  root_dir: Path
256
364
  toc: TableOfContentsBuilder
257
365
  links: list[str]
258
- images: list[Path]
259
- embedded_files: dict[str, bytes]
366
+ images: list[ImageData]
367
+ embedded_files: dict[str, EmbeddedFileData]
260
368
  site_metadata: ConfluenceSiteMetadata
261
369
  page_metadata: ConfluencePageCollection
262
370
 
@@ -285,7 +393,19 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
285
393
  self.page_metadata = page_metadata
286
394
 
287
395
  def _transform_heading(self, heading: ET._Element) -> None:
288
- "Adds anchors to headings in the same document (if *heading anchors* is enabled)."
396
+ """
397
+ Adds anchors to headings in the same document (if *heading anchors* is enabled).
398
+
399
+ Original:
400
+ ```
401
+ <h1>Heading text</h1>
402
+ ```
403
+
404
+ Transformed:
405
+ ```
406
+ <h1><structured-macro name="anchor">...</structured-macro>Heading text</h1>
407
+ ```
408
+ """
289
409
 
290
410
  for e in heading:
291
411
  self.visit(e)
@@ -308,11 +428,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
308
428
  anchor.tail = heading.text
309
429
  heading.text = None
310
430
 
311
- def _warn_or_raise(self, msg: str) -> None:
431
+ def _anchor_warn_or_raise(self, anchor: ET._Element, msg: str) -> None:
312
432
  "Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
313
433
 
314
434
  if self.options.ignore_invalid_url:
315
435
  LOGGER.warning(msg)
436
+ if anchor.text:
437
+ anchor.text = "❌ " + anchor.text
438
+ elif len(anchor) > 0:
439
+ anchor.text = "❌ "
316
440
  else:
317
441
  raise DocumentError(msg)
318
442
 
@@ -325,7 +449,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
325
449
  * Links to documents in the source hierarchy are mapped into full Confluence URLs.
326
450
  """
327
451
 
328
- url = anchor.attrib.get("href")
452
+ # Confluence doesn't support `title` attribute on `<a>` elements
453
+ anchor.attrib.pop("title", None)
454
+
455
+ url = anchor.get("href")
329
456
  if url is None or is_absolute_url(url):
330
457
  return None
331
458
 
@@ -333,7 +460,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
333
460
  relative_url: ParseResult = urlparse(url)
334
461
 
335
462
  if not relative_url.scheme and not relative_url.netloc and not relative_url.path and not relative_url.params and not relative_url.query:
336
- LOGGER.debug("Found local URL: %s", url)
463
+ LOGGER.debug("Found same-page URL: %s", url)
337
464
  if self.options.heading_anchors:
338
465
  # <ac:link ac:anchor="anchor"><ac:link-body>...</ac:link-body></ac:link>
339
466
  target = relative_url.fragment.lstrip("#")
@@ -346,33 +473,39 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
346
473
  },
347
474
  link_body,
348
475
  )
349
- link_wrapper.tail = anchor.tail
350
476
  return link_wrapper
351
477
  else:
352
478
  return None
353
479
 
354
- # convert the relative URL to absolute URL based on the base path value, then look up
355
- # the absolute path in the page metadata dictionary to discover the relative path
356
- # within Confluence that should be used
480
+ # discard original value: relative links always require transformation
481
+ anchor.attrib.pop("href")
482
+
483
+ # convert the relative URL to absolute path based on the base path value
357
484
  absolute_path = (self.base_dir / relative_url.path).resolve()
485
+
486
+ # look up the absolute path in the page metadata dictionary to discover the relative path within Confluence that should be used
358
487
  if not is_directory_within(absolute_path, self.root_dir):
359
- anchor.attrib.pop("href")
360
- self._warn_or_raise(f"relative URL {url} points to outside root path: {self.root_dir}")
488
+ self._anchor_warn_or_raise(anchor, f"relative URL {url} points to outside root path: {self.root_dir}")
361
489
  return None
362
490
 
491
+ if absolute_path.suffix == ".md":
492
+ return self._transform_page_link(anchor, relative_url, absolute_path)
493
+ else:
494
+ return self._transform_attachment_link(anchor, absolute_path)
495
+
496
+ def _transform_page_link(self, anchor: ET._Element, relative_url: ParseResult, absolute_path: Path) -> Optional[ET._Element]:
497
+ """
498
+ Transforms links to other Markdown documents (Confluence pages).
499
+ """
500
+
363
501
  link_metadata = self.page_metadata.get(absolute_path)
364
502
  if link_metadata is None:
365
- msg = f"unable to find matching page for URL: {url}"
366
- if self.options.ignore_invalid_url:
367
- LOGGER.warning(msg)
368
- anchor.attrib.pop("href")
369
- return None
370
- else:
371
- raise DocumentError(msg)
503
+ self._anchor_warn_or_raise(anchor, f"unable to find matching page for URL: {relative_url.geturl()}")
504
+ return None
372
505
 
373
506
  relative_path = os.path.relpath(absolute_path, self.base_dir)
374
507
  LOGGER.debug("Found link to page %s with metadata: %s", relative_path, link_metadata)
375
- self.links.append(url)
508
+ self.links.append(relative_url.geturl())
376
509
 
377
510
  if self.options.webui_links:
378
511
  page_url = f"{self.site_metadata.base_path}pages/viewpage.action?pageId={link_metadata.page_id}"
@@ -384,7 +517,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
384
517
 
385
518
  page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
386
519
 
387
- components = ParseResult(
520
+ transformed_url = ParseResult(
388
521
  scheme="https",
389
522
  netloc=self.site_metadata.domain,
390
523
  path=page_url,
@@ -392,47 +525,83 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
392
525
  query="",
393
526
  fragment=relative_url.fragment,
394
527
  )
395
- transformed_url = urlunparse(components)
396
528
 
397
- LOGGER.debug("Transformed relative URL: %s to URL: %s", url, transformed_url)
398
- anchor.attrib["href"] = transformed_url
529
+ LOGGER.debug("Transformed relative URL: %s to URL: %s", relative_url.geturl(), transformed_url.geturl())
530
+ anchor.set("href", transformed_url.geturl())
399
531
  return None
400
532
 
533
+ def _transform_attachment_link(self, anchor: ET._Element, absolute_path: Path) -> Optional[ET._Element]:
534
+ """
535
+ Transforms links to document binaries such as PDF, DOCX or XLSX.
536
+ """
537
+
538
+ if not absolute_path.exists():
539
+ self._anchor_warn_or_raise(anchor, f"relative URL points to non-existing file: {absolute_path}")
540
+ return None
541
+
542
+ file_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
543
+ self.images.append(ImageData(absolute_path))
544
+
545
+ link_body = AC_ELEM("link-body", {}, *list(anchor))
546
+ link_body.text = anchor.text
547
+ link_wrapper = AC_ELEM(
548
+ "link",
549
+ {},
550
+ RI_ELEM("attachment", {RI_ATTR("filename"): file_name}),
551
+ link_body,
552
+ )
553
+ return link_wrapper
554
+
401
555
  def _transform_status(self, color: str, caption: str) -> ET._Element:
402
556
  macro_id = str(uuid.uuid4())
403
- return AC_ELEM(
404
- "structured-macro",
405
- {
406
- AC_ATTR("name"): "status",
407
- AC_ATTR("schema-version"): "1",
408
- AC_ATTR("macro-id"): macro_id,
409
- },
410
- AC_ELEM(
411
- "parameter",
412
- {AC_ATTR("name"): "colour"},
413
- color.title(),
414
- ),
415
- AC_ELEM(
416
- "parameter",
417
- {AC_ATTR("name"): "title"},
418
- caption,
419
- ),
420
- )
557
+ attributes = {
558
+ AC_ATTR("name"): "status",
559
+ AC_ATTR("schema-version"): "1",
560
+ AC_ATTR("macro-id"): macro_id,
561
+ }
562
+ if color != "gray":
563
+ return AC_ELEM(
564
+ "structured-macro",
565
+ attributes,
566
+ AC_ELEM(
567
+ "parameter",
568
+ {AC_ATTR("name"): "colour"},
569
+ color.title(),
570
+ ),
571
+ AC_ELEM(
572
+ "parameter",
573
+ {AC_ATTR("name"): "title"},
574
+ caption,
575
+ ),
576
+ )
577
+ else:
578
+ return AC_ELEM(
579
+ "structured-macro",
580
+ attributes,
581
+ AC_ELEM(
582
+ "parameter",
583
+ {AC_ATTR("name"): "title"},
584
+ caption,
585
+ ),
586
+ )
421
587
 
422
- def _transform_image(self, image: ET._Element) -> ET._Element:
588
+ def _transform_image(self, context: FormattingContext, image: ET._Element) -> ET._Element:
423
589
  "Inserts an attached or external image."
424
590
 
425
- src = image.attrib.get("src")
591
+ src = image.get("src")
426
592
  if not src:
427
593
  raise DocumentError("image lacks `src` attribute")
428
594
 
429
- caption = image.attrib.get("alt")
430
- if caption is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
431
- return self._transform_status(color, caption)
595
+ alt = image.get("alt")
596
+ if alt is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
597
+ return self._transform_status(color, alt)
432
598
 
433
- width = image.attrib.get("width")
434
- height = image.attrib.get("height")
435
- attrs = ImageAttributes(caption, width, height)
599
+ title = image.get("title")
600
+ width = image.get("width")
601
+ height = image.get("height")
602
+ pixel_width = int(width) if width is not None and width.isdecimal() else None
603
+ pixel_height = int(height) if height is not None and height.isdecimal() else None
604
+ attrs = ImageAttributes(context, pixel_width, pixel_height, alt, title, None)
436
605
 
437
606
  if is_absolute_url(src):
438
607
  return self._transform_external_image(src, attrs)
@@ -441,7 +610,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
441
610
 
442
611
  absolute_path = self._verify_image_path(path)
443
612
  if absolute_path is None:
444
- return self._create_missing(path, caption)
613
+ return self._create_missing(path, attrs)
445
614
 
446
615
  if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
447
616
  return self._transform_drawio_image(absolute_path, attrs)
@@ -455,15 +624,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
455
624
  def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ET._Element:
456
625
  "Emits Confluence Storage Format XHTML for an external image."
457
626
 
458
- attributes: dict[str, Any] = {
459
- AC_ATTR("align"): "center",
460
- AC_ATTR("layout"): "center",
461
- }
462
- if attrs.width is not None:
463
- attributes.update({AC_ATTR("width"): attrs.width})
464
- if attrs.height is not None:
465
- attributes.update({AC_ATTR("height"): attrs.height})
466
-
467
627
  elements: list[ET._Element] = []
468
628
  elements.append(
469
629
  RI_ELEM(
@@ -472,10 +632,18 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
472
632
  {RI_ATTR("value"): url},
473
633
  )
474
634
  )
475
- if attrs.caption is not None:
476
- elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
635
+ if attrs.caption:
636
+ elements.append(AC_ELEM("caption", attrs.caption))
637
+
638
+ return AC_ELEM("image", attrs.as_dict(), *elements)
477
639
 
478
- return AC_ELEM("image", attributes, *elements)
640
+ def _warn_or_raise(self, msg: str) -> None:
641
+ "Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
642
+
643
+ if self.options.ignore_invalid_url:
644
+ LOGGER.warning(msg)
645
+ else:
646
+ raise DocumentError(msg)
479
647
 
480
648
  def _verify_image_path(self, path: Path) -> Optional[Path]:
481
649
  "Checks whether an image path is safe to use."
@@ -496,13 +664,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
496
664
  def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
497
665
  "Emits Confluence Storage Format XHTML for an attached raster or vector image."
498
666
 
499
- if self.options.prefer_raster and absolute_path.name.endswith(".svg"):
667
+ if self.options.prefer_raster and absolute_path.suffix == ".svg":
500
668
  # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
501
669
  png_file = absolute_path.with_suffix(".png")
502
670
  if png_file.exists():
503
671
  absolute_path = png_file
504
672
 
505
- self.images.append(absolute_path)
673
+ self.images.append(ImageData(absolute_path, attrs.alt))
506
674
  image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
507
675
  return self._create_attached_image(image_name, attrs)
508
676
 
@@ -512,15 +680,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
512
680
  if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
513
681
  raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
514
682
 
683
+ relative_path = path_relative_to(absolute_path, self.base_dir)
515
684
  if self.options.render_drawio:
516
685
  image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
517
- image_hash = hashlib.md5(image_data).hexdigest()
518
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
519
- self.embedded_files[image_filename] = image_data
686
+ image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
687
+ self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
520
688
  return self._create_attached_image(image_filename, attrs)
521
689
  else:
522
- self.images.append(absolute_path)
523
- image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
690
+ self.images.append(ImageData(absolute_path, attrs.alt))
691
+ image_filename = attachment_name(relative_path)
524
692
  return self._create_drawio(image_filename, attrs)
525
693
 
526
694
  def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
@@ -535,22 +703,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
535
703
  # extract embedded editable diagram and upload as *.drawio
536
704
  image_data = drawio.extract_diagram(absolute_path)
537
705
  image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
538
- self.embedded_files[image_filename] = image_data
706
+ self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
539
707
 
540
708
  return self._create_drawio(image_filename, attrs)
541
709
 
542
710
  def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ET._Element:
543
711
  "An image embedded into the page, linking to an attachment."
544
712
 
545
- attributes: dict[str, Any] = {
546
- AC_ATTR("align"): "center",
547
- AC_ATTR("layout"): "center",
548
- }
549
- if attrs.width is not None:
550
- attributes.update({AC_ATTR("width"): attrs.width})
551
- if attrs.height is not None:
552
- attributes.update({AC_ATTR("height"): attrs.height})
553
-
554
713
  elements: list[ET._Element] = []
555
714
  elements.append(
556
715
  RI_ELEM(
@@ -559,10 +718,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
559
718
  {RI_ATTR("filename"): image_name},
560
719
  )
561
720
  )
562
- if attrs.caption is not None:
563
- elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
721
+ if attrs.caption:
722
+ elements.append(AC_ELEM("caption", attrs.caption))
564
723
 
565
- return AC_ELEM("image", attributes, *elements)
724
+ return AC_ELEM("image", attrs.as_dict(), *elements)
566
725
 
567
726
  def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
568
727
  "A draw.io diagram embedded into the page, linking to an attachment."
@@ -579,7 +738,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
579
738
  AC_ELEM(
580
739
  "parameter",
581
740
  {AC_ATTR("name"): "width"},
582
- attrs.width,
741
+ str(attrs.width),
583
742
  ),
584
743
  )
585
744
  if attrs.height is not None:
@@ -587,7 +746,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
587
746
  AC_ELEM(
588
747
  "parameter",
589
748
  {AC_ATTR("name"): "height"},
590
- attrs.height,
749
+ str(attrs.height),
591
750
  ),
592
751
  )
593
752
 
@@ -605,48 +764,56 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
605
764
  *parameters,
606
765
  )
607
766
 
608
- def _create_missing(self, path: Path, caption: Optional[str]) -> ET._Element:
767
+ def _create_missing(self, path: Path, attrs: ImageAttributes) -> ET._Element:
609
768
  "A warning panel for a missing image."
610
769
 
611
- message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
612
- if caption is not None:
613
- content = [
614
- AC_ELEM(
615
- "parameter",
616
- {AC_ATTR("name"): "title"},
617
- caption,
618
- ),
619
- AC_ELEM("rich-text-body", {}, message),
620
- ]
621
- else:
622
- content = [AC_ELEM("rich-text-body", {}, message)]
770
+ if attrs.context is FormattingContext.BLOCK:
771
+ message = HTML.p("❌ Missing image: ", HTML.code(path.as_posix()))
772
+ if attrs.caption is not None:
773
+ content = [
774
+ AC_ELEM(
775
+ "parameter",
776
+ {AC_ATTR("name"): "title"},
777
+ attrs.caption,
778
+ ),
779
+ AC_ELEM("rich-text-body", {}, message),
780
+ ]
781
+ else:
782
+ content = [AC_ELEM("rich-text-body", {}, message)]
623
783
 
624
- return AC_ELEM(
625
- "structured-macro",
626
- {
627
- AC_ATTR("name"): "warning",
628
- AC_ATTR("schema-version"): "1",
629
- },
630
- *content,
631
- )
784
+ return AC_ELEM(
785
+ "structured-macro",
786
+ {
787
+ AC_ATTR("name"): "warning",
788
+ AC_ATTR("schema-version"): "1",
789
+ },
790
+ *content,
791
+ )
792
+ else:
793
+ return HTML.span({"style": "color: rgb(255,86,48);"}, "❌ ", HTML.code(path.as_posix()))
632
794
 
633
795
  def _transform_code_block(self, code: ET._Element) -> ET._Element:
634
796
  "Transforms a code block."
635
797
 
636
- language = code.attrib.get("class")
637
- if language:
638
- m = re.match("^language-(.*)$", language)
639
- if m:
640
- language = m.group(1)
798
+ if language_class := code.get("class"):
799
+ if m := re.match("^language-(.*)$", language_class):
800
+ language_name = m.group(1)
641
801
  else:
642
- language = "none"
643
- if language not in _LANGUAGES:
644
- language = "none"
802
+ language_name = None
803
+ else:
804
+ language_name = None
805
+
806
+ # translate name to standard name for (programming) language
807
+ if language_name is not None:
808
+ language_id = _LANGUAGES.get(language_name)
809
+ else:
810
+ language_id = None
811
+
645
812
  content: str = code.text or ""
646
813
  content = content.rstrip()
647
814
 
648
- if language == "mermaid":
649
- return self._transform_inline_mermaid(content)
815
+ if language_id == "mermaid":
816
+ return self._transform_fenced_mermaid(content)
650
817
 
651
818
  return AC_ELEM(
652
819
  "structured-macro",
@@ -654,55 +821,60 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
654
821
  AC_ATTR("name"): "code",
655
822
  AC_ATTR("schema-version"): "1",
656
823
  },
657
- AC_ELEM(
658
- "parameter",
659
- {AC_ATTR("name"): "theme"},
660
- "Default",
661
- ),
662
824
  AC_ELEM(
663
825
  "parameter",
664
826
  {AC_ATTR("name"): "language"},
665
- language,
827
+ language_id or "none",
666
828
  ),
667
829
  AC_ELEM("plain-text-body", ET.CDATA(content)),
668
830
  )
669
831
 
832
+ def _extract_mermaid_config(self, content: str) -> Optional[MermaidConfigProperties]:
833
+ """Extract scale from Mermaid YAML front matter configuration."""
834
+ try:
835
+ properties = MermaidScanner().read(content)
836
+ return properties.config
837
+ except JsonTypeError as ex:
838
+ LOGGER.warning("Failed to extract Mermaid properties: %s", ex)
839
+ return None
840
+
670
841
  def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
671
842
  "Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
672
843
 
673
844
  if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
674
845
  raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
675
846
 
847
+ relative_path = path_relative_to(absolute_path, self.base_dir)
676
848
  if self.options.render_mermaid:
677
849
  with open(absolute_path, "r", encoding="utf-8") as f:
678
850
  content = f.read()
679
- return self._create_mermaid_image(content, attrs)
851
+ config = self._extract_mermaid_config(content)
852
+ image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
853
+ image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
854
+ self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
855
+ return self._create_attached_image(image_filename, attrs)
680
856
  else:
681
- self.images.append(absolute_path)
682
- mermaid_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
857
+ self.images.append(ImageData(absolute_path, attrs.alt))
858
+ mermaid_filename = attachment_name(relative_path)
683
859
  return self._create_mermaid_embed(mermaid_filename)
684
860
 
685
- def _transform_inline_mermaid(self, content: str) -> ET._Element:
686
- "Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a code block."
861
+ def _transform_fenced_mermaid(self, content: str) -> ET._Element:
862
+ "Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a fenced code block."
687
863
 
688
864
  if self.options.render_mermaid:
689
- return self._create_mermaid_image(content, ImageAttributes(None, None, None))
865
+ config = self._extract_mermaid_config(content)
866
+ image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
867
+ image_hash = hashlib.md5(image_data).hexdigest()
868
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
869
+ self.embedded_files[image_filename] = EmbeddedFileData(image_data)
870
+ return self._create_attached_image(image_filename, ImageAttributes.EMPTY_BLOCK)
690
871
  else:
691
872
  mermaid_data = content.encode("utf-8")
692
873
  mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
693
874
  mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
694
- self.embedded_files[mermaid_filename] = mermaid_data
875
+ self.embedded_files[mermaid_filename] = EmbeddedFileData(mermaid_data)
695
876
  return self._create_mermaid_embed(mermaid_filename)
696
877
 
697
- def _create_mermaid_image(self, content: str, attrs: ImageAttributes) -> ET._Element:
698
- "A rendered Mermaid diagram, linking to an attachment uploaded as an image."
699
-
700
- image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
701
- image_hash = hashlib.md5(image_data).hexdigest()
702
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
703
- self.embedded_files[image_filename] = image_data
704
- return self._create_attached_image(image_filename, attrs)
705
-
706
878
  def _create_mermaid_embed(self, filename: str) -> ET._Element:
707
879
  "A Mermaid diagram, linking to an attachment that captures the Mermaid source."
708
880
 
@@ -743,6 +915,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
743
915
  {
744
916
  AC_ATTR("name"): "toc",
745
917
  AC_ATTR("schema-version"): "1",
918
+ "data-layout": "default",
746
919
  },
747
920
  AC_ELEM("parameter", {AC_ATTR("name"): "outline"}, "clear"),
748
921
  AC_ELEM("parameter", {AC_ATTR("name"): "style"}, "default"),
@@ -769,8 +942,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
769
942
  syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
770
943
  """
771
944
 
945
+ if len(elem) < 1:
946
+ raise DocumentError("empty admonition")
947
+
772
948
  # <div class="admonition note">
773
- class_list = elem.attrib.get("class", "").split(" ")
949
+ class_list = elem.get("class", "").split(" ")
774
950
  class_name: Optional[str] = None
775
951
  if "info" in class_list:
776
952
  class_name = "info"
@@ -788,7 +964,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
788
964
  self.visit(e)
789
965
 
790
966
  # <p class="admonition-title">Note</p>
791
- if "admonition-title" in elem[0].attrib.get("class", "").split(" "):
967
+ if "admonition-title" in elem[0].get("class", "").split(" "):
792
968
  content = [
793
969
  AC_ELEM(
794
970
  "parameter",
@@ -809,12 +985,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
809
985
  *content,
810
986
  )
811
987
 
812
- def _transform_github_alert(self, elem: ET._Element) -> ET._Element:
988
+ def _transform_github_alert(self, blockquote: ET._Element) -> ET._Element:
813
989
  """
814
990
  Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
815
991
  """
816
992
 
817
- content = elem[0]
993
+ if len(blockquote) < 1:
994
+ raise DocumentError("empty GitHub alert")
995
+
996
+ content = blockquote[0]
818
997
  if content.text is None:
819
998
  raise DocumentError("empty content")
820
999
 
@@ -839,9 +1018,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
839
1018
  else:
840
1019
  raise DocumentError(f"unsupported GitHub alert: {alert}")
841
1020
 
842
- return self._transform_alert(elem, class_name, skip)
1021
+ return self._transform_alert(blockquote, class_name, skip)
843
1022
 
844
- def _transform_gitlab_alert(self, elem: ET._Element) -> ET._Element:
1023
+ def _transform_gitlab_alert(self, blockquote: ET._Element) -> ET._Element:
845
1024
  """
846
1025
  Creates a classic GitLab-style panel.
847
1026
 
@@ -849,7 +1028,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
849
1028
  This syntax does not use Hugo shortcode.
850
1029
  """
851
1030
 
852
- content = elem[0]
1031
+ if len(blockquote) < 1:
1032
+ raise DocumentError("empty GitLab alert")
1033
+
1034
+ content = blockquote[0]
853
1035
  if content.text is None:
854
1036
  raise DocumentError("empty content")
855
1037
 
@@ -872,9 +1054,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
872
1054
  else:
873
1055
  raise DocumentError(f"unsupported GitLab alert: {alert}")
874
1056
 
875
- return self._transform_alert(elem, class_name, skip)
1057
+ return self._transform_alert(blockquote, class_name, skip)
876
1058
 
877
- def _transform_alert(self, elem: ET._Element, class_name: Optional[str], skip: int) -> ET._Element:
1059
+ def _transform_alert(self, blockquote: ET._Element, class_name: Optional[str], skip: int) -> ET._Element:
878
1060
  """
879
1061
  Creates an info, tip, note or warning panel from a GitHub or GitLab alert.
880
1062
 
@@ -884,14 +1066,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
884
1066
  :see: https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes
885
1067
  """
886
1068
 
887
- content = elem[0]
1069
+ content = blockquote[0]
888
1070
  if content.text is None:
889
1071
  raise DocumentError("empty content")
890
1072
 
891
1073
  if class_name is None:
892
1074
  raise DocumentError("not an alert")
893
1075
 
894
- for e in elem:
1076
+ for e in blockquote:
895
1077
  self.visit(e)
896
1078
 
897
1079
  content.text = content.text[skip:]
@@ -901,10 +1083,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
901
1083
  AC_ATTR("name"): class_name,
902
1084
  AC_ATTR("schema-version"): "1",
903
1085
  },
904
- AC_ELEM("rich-text-body", {}, *list(elem)),
1086
+ AC_ELEM("rich-text-body", {}, *list(blockquote)),
905
1087
  )
906
1088
 
907
- def _transform_section(self, elem: ET._Element) -> ET._Element:
1089
+ def _transform_collapsed(self, details: ET._Element) -> ET._Element:
908
1090
  """
909
1091
  Creates a collapsed section.
910
1092
 
@@ -913,16 +1095,31 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
913
1095
  :see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections
914
1096
  """
915
1097
 
916
- if elem[0].tag != "summary":
1098
+ summary = details[0]
1099
+ if summary.tag != "summary":
917
1100
  raise DocumentError("expected: `<summary>` as first direct child of `<details>`")
918
- if elem[0].tail is not None:
1101
+ if details.text is not None or summary.tail is not None:
1102
+ # when `<details>` has attribute `markdown=1`, content is parsed as Markdown:
1103
+ # ```
1104
+ # <details>
1105
+ # <summary>...</summary>
1106
+ # <p>Text with <em>emphasis</em>.</p>
1107
+ # </details>
1108
+ # ```
1109
+ #
1110
+ # when `<details>` lacks attribute `markdown=1`, content is passed down as raw HTML, partly as `text` of `<detail>` or `tail` of `<summary>`:
1111
+ # ```
1112
+ # <details>
1113
+ # <summary>...</summary>
1114
+ # Text with *emphasis*.
1115
+ # </details>
919
1116
  raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
920
1117
 
921
- summary = element_to_text(elem[0])
922
- elem.remove(elem[0])
1118
+ summary_text = element_to_text(summary)
1119
+ details.remove(summary)
923
1120
 
924
1121
  # transform Markdown to Confluence within collapsed section content
925
- self.visit(elem)
1122
+ self.visit(details)
926
1123
 
927
1124
  return AC_ELEM(
928
1125
  "structured-macro",
@@ -933,9 +1130,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
933
1130
  AC_ELEM(
934
1131
  "parameter",
935
1132
  {AC_ATTR("name"): "title"},
936
- summary,
1133
+ summary_text,
937
1134
  ),
938
- AC_ELEM("rich-text-body", {}, *list(elem)),
1135
+ AC_ELEM("rich-text-body", {}, *list(details)),
939
1136
  )
940
1137
 
941
1138
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
@@ -943,21 +1140,63 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
943
1140
  Inserts an inline emoji character.
944
1141
  """
945
1142
 
946
- shortname = elem.attrib.get("data-shortname", "")
947
- unicode = elem.attrib.get("data-unicode", None)
1143
+ shortname = elem.get("data-shortname", "")
1144
+ unicode = elem.get("data-unicode", None)
948
1145
  alt = elem.text or ""
949
1146
 
1147
+ # emoji with a matching emoticon:
950
1148
  # <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="&#128521;"/>
1149
+ #
1150
+ # emoji without a corresponding emoticon:
1151
+ # <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":shield:" ac:emoji-id="1f6e1" ac:emoji-fallback="&#128737;"/>
951
1152
  return AC_ELEM(
952
1153
  "emoticon",
953
1154
  {
954
- AC_ATTR("name"): shortname,
1155
+ AC_ATTR("name"): emoji_to_emoticon(shortname),
955
1156
  AC_ATTR("emoji-shortname"): f":{shortname}:",
956
1157
  AC_ATTR("emoji-id"): unicode,
957
1158
  AC_ATTR("emoji-fallback"): alt,
958
1159
  },
959
1160
  )
960
1161
 
1162
+ def _transform_mark(self, mark: ET._Element) -> ET._Element:
1163
+ """
1164
+ Adds inline highlighting to text.
1165
+ """
1166
+
1167
+ attrs = dict(mark.items())
1168
+ old_style = attrs.get("style")
1169
+ new_style = "background-color: rgb(254,222,200);"
1170
+ if old_style is not None:
1171
+ new_style += f" {old_style}"
1172
+ attrs["style"] = new_style
1173
+ span = HTML("span", attrs, *list(mark))
1174
+ span.text = mark.text
1175
+ return span
1176
+
1177
+ def _transform_latex(self, elem: ET._Element, context: FormattingContext) -> ET._Element:
1178
+ """
1179
+ Creates an image rendering of a LaTeX formula with Matplotlib.
1180
+ """
1181
+
1182
+ content = elem.text
1183
+ if not content:
1184
+ raise DocumentError("empty LaTeX formula")
1185
+
1186
+ image_data = render_latex(content, format=self.options.diagram_output_format)
1187
+ if self.options.diagram_output_format == "png":
1188
+ width, height = get_png_dimensions(data=image_data)
1189
+ image_data = remove_png_chunks(["pHYs"], source_data=image_data)
1190
+ attrs = ImageAttributes(context, width, height, content, None, "")
1191
+ else:
1192
+ attrs = ImageAttributes.empty(context)
1193
+
1194
+ image_hash = hashlib.md5(image_data).hexdigest()
1195
+ image_filename = attachment_name(f"formula_{image_hash}.{self.options.diagram_output_format}")
1196
+ self.embedded_files[image_filename] = EmbeddedFileData(image_data, content)
1197
+ image = self._create_attached_image(image_filename, attrs)
1198
+ return image
1199
+
961
1200
  def _transform_inline_math(self, elem: ET._Element) -> ET._Element:
962
1201
  """
963
1202
  Creates an inline LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
@@ -965,12 +1204,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
965
1204
  :see: https://help.narva.net/latex-math-for-confluence/
966
1205
  """
967
1206
 
968
- content = elem.text or ""
1207
+ content = elem.text
969
1208
  if not content:
970
1209
  raise DocumentError("empty inline LaTeX formula")
971
1210
 
972
1211
  LOGGER.debug("Found inline LaTeX formula: %s", content)
973
1212
 
1213
+ if self.options.render_latex:
1214
+ return self._transform_latex(elem, FormattingContext.INLINE)
1215
+
974
1216
  local_id = str(uuid.uuid4())
975
1217
  macro_id = str(uuid.uuid4())
976
1218
  macro = AC_ELEM(
@@ -988,7 +1230,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
988
1230
  ),
989
1231
  AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
990
1232
  )
991
- macro.tail = elem.tail # chain sibling text node that immediately follows original element
992
1233
  return macro
993
1234
 
994
1235
  def _transform_block_math(self, elem: ET._Element) -> ET._Element:
@@ -998,12 +1239,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
998
1239
  :see: https://help.narva.net/latex-math-for-confluence/
999
1240
  """
1000
1241
 
1001
- content = elem.text or ""
1242
+ content = elem.text
1002
1243
  if not content:
1003
1244
  raise DocumentError("empty block-level LaTeX formula")
1004
1245
 
1005
1246
  LOGGER.debug("Found block-level LaTeX formula: %s", content)
1006
1247
 
1248
+ if self.options.render_latex:
1249
+ return self._transform_latex(elem, FormattingContext.BLOCK)
1250
+
1007
1251
  local_id = str(uuid.uuid4())
1008
1252
  macro_id = str(uuid.uuid4())
1009
1253
 
@@ -1029,7 +1273,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1029
1273
  Transforms a footnote reference.
1030
1274
 
1031
1275
  ```
1032
- <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1276
+ <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
1033
1277
  ```
1034
1278
  """
1035
1279
 
@@ -1041,7 +1285,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1041
1285
  raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1042
1286
  footnote_ref = ref_id.removeprefix("fnref:")
1043
1287
 
1044
- link = elem[0]
1288
+ link = next((elem.iterchildren(tag="a")), None)
1289
+ if link is None:
1290
+ raise DocumentError("expected: `<a>` as the first HTML element in a footnote reference")
1045
1291
  def_href = link.attrib.pop("href", "")
1046
1292
  if not def_href.startswith("#fn:"):
1047
1293
  raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
@@ -1095,18 +1341,28 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1095
1341
  ```
1096
1342
  """
1097
1343
 
1098
- for list_item in elem[1]:
1344
+ ordered_list = next((elem.iterchildren(tag="ol")), None)
1345
+ if ordered_list is None:
1346
+ raise DocumentError("expected: `<ol>` as direct child of footnote definition block")
1347
+
1348
+ for list_item in ordered_list:
1349
+ if list_item.tag != "li":
1350
+ raise DocumentError("expected: `<li>` as children of `<ol>` in footnote definition block")
1351
+
1099
1352
  def_id = list_item.attrib.pop("id", "")
1100
1353
  if not def_id.startswith("fn:"):
1101
1354
  raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1102
1355
  footnote_def = def_id.removeprefix("fn:")
1103
1356
 
1104
- paragraph = list_item[0]
1105
- ref_anchor = paragraph[-1]
1106
- if ref_anchor.tag != "a":
1357
+ paragraph = next((list_item.iterchildren(tag="p")), None)
1358
+ if paragraph is None:
1359
+ raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
1360
+
1361
+ ref_anchor = next((paragraph.iterchildren(tag="a", reversed=True)), None)
1362
+ if ref_anchor is None:
1107
1363
  raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1108
1364
 
1109
- ref_href = ref_anchor.attrib.get("href", "")
1365
+ ref_href = ref_anchor.get("href", "")
1110
1366
  if not ref_href.startswith("#fnref:"):
1111
1367
  raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1112
1368
  footnote_ref = ref_href.removeprefix("#fnref:")
@@ -1159,9 +1415,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1159
1415
  if not element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]):
1160
1416
  raise DocumentError("expected: each `<li>` in a task list starting with [ ] or [x]")
1161
1417
 
1162
- # transform Markdown to Confluence within tasklist content
1163
- self.visit(elem)
1164
-
1165
1418
  tasks: list[ET._Element] = []
1166
1419
  for index, item in enumerate(elem, start=1):
1167
1420
  if item.text is None:
@@ -1171,11 +1424,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1171
1424
  raise NotImplementedError("pre-condition check not exhaustive")
1172
1425
 
1173
1426
  status = "incomplete" if match.group(1).isspace() else "complete"
1427
+ item.text = item.text[3:]
1174
1428
 
1175
- body = AC_ELEM("task-body")
1176
- body.text = item.text[3:]
1177
- for child in item:
1178
- body.append(child)
1429
+ # transform Markdown to Confluence within tasklist content
1430
+ self.visit(item)
1431
+
1432
+ body = AC_ELEM("task-body", *list(item))
1433
+ body.text = item.text
1179
1434
  tasks.append(
1180
1435
  AC_ELEM(
1181
1436
  "task",
@@ -1194,47 +1449,32 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1194
1449
  Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
1195
1450
  """
1196
1451
 
1197
- # normalize line breaks to regular space in element text
1452
+ # replace line breaks with regular space in element text to minimize phantom changes
1198
1453
  if child.text:
1199
- text: str = child.text
1200
- child.text = text.replace("\n", " ")
1454
+ child.text = child.text.replace("\n", " ")
1201
1455
  if child.tail:
1202
- tail: str = child.tail
1203
- child.tail = tail.replace("\n", " ")
1456
+ child.tail = child.tail.replace("\n", " ")
1204
1457
 
1205
1458
  if not isinstance(child.tag, str):
1206
1459
  return None
1207
1460
 
1208
- # <h1>...</h1>
1209
- # <h2>...</h2> ...
1210
- m = re.match(r"^h([1-6])$", child.tag, flags=re.IGNORECASE)
1211
- if m is not None:
1212
- level = int(m.group(1))
1213
- title = element_to_text(child)
1214
- self.toc.add(level, title)
1215
-
1216
- if self.options.heading_anchors:
1217
- self._transform_heading(child)
1218
- return None
1219
-
1220
1461
  # <p>...</p>
1221
1462
  if child.tag == "p":
1222
1463
  # <p><img src="..." /></p>
1223
- if len(child) == 1 and child[0].tag == "img":
1224
- return self._transform_image(child[0])
1464
+ if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
1465
+ return self._transform_image(FormattingContext.BLOCK, child[0])
1225
1466
 
1226
- # <p>[[_TOC_]]</p> (represented as <p>[[<em>TOC</em>]]</p>)
1227
- # <p>[TOC]</p>
1228
- elif element_to_text(child) in ["[[TOC]]", "[TOC]"]:
1467
+ # <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
1468
+ elif is_placeholder_for(child, "TOC"):
1229
1469
  return self._transform_toc(child)
1230
1470
 
1231
- # <p>[[_LISTING_]]</p> (represented as <p>[[<em>LISTING</em>]]</p>)
1232
- elif element_to_text(child) in ["[[LISTING]]", "[LISTING]"]:
1471
+ # <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
1472
+ elif is_placeholder_for(child, "LISTING"):
1233
1473
  return self._transform_listing(child)
1234
1474
 
1235
1475
  # <div>...</div>
1236
1476
  elif child.tag == "div":
1237
- classes = child.attrib.get("class", "").split(" ")
1477
+ classes = child.get("class", "").split(" ")
1238
1478
 
1239
1479
  # <div class="arithmatex">...</div>
1240
1480
  if "arithmatex" in classes:
@@ -1291,48 +1531,89 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1291
1531
  # ...
1292
1532
  # </details>
1293
1533
  elif child.tag == "details" and len(child) > 1 and child[0].tag == "summary":
1294
- return self._transform_section(child)
1534
+ return self._transform_collapsed(child)
1535
+
1536
+ # <ol>...</ol>
1537
+ elif child.tag == "ol":
1538
+ # Confluence adds the attribute `start` for every ordered list
1539
+ child.set("start", "1")
1540
+ return None
1295
1541
 
1296
1542
  # <ul>
1297
1543
  # <li>[ ] ...</li>
1298
1544
  # <li>[x] ...</li>
1299
1545
  # </ul>
1300
- elif child.tag == "ul" and len(child) > 0 and element_text_starts_with_any(child[0], ["[ ]", "[x]", "[X]"]):
1301
- return self._transform_tasklist(child)
1546
+ elif child.tag == "ul":
1547
+ if len(child) > 0 and element_text_starts_with_any(child[0], ["[ ]", "[x]", "[X]"]):
1548
+ return self._transform_tasklist(child)
1549
+
1550
+ return None
1551
+
1552
+ elif child.tag == "li":
1553
+ normalize_inline(child)
1554
+ return None
1302
1555
 
1303
1556
  # <pre><code class="language-java"> ... </code></pre>
1304
1557
  elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1305
1558
  return self._transform_code_block(child[0])
1306
1559
 
1560
+ # <table>...</table>
1561
+ elif child.tag == "table":
1562
+ for td in child.iterdescendants("td", "th"):
1563
+ normalize_inline(td)
1564
+ child.set("data-layout", "default")
1565
+ return None
1566
+
1307
1567
  # <img src="..." alt="..." />
1308
1568
  elif child.tag == "img":
1309
- return self._transform_image(child)
1569
+ return self._transform_image(FormattingContext.INLINE, child)
1310
1570
 
1311
1571
  # <a href="..."> ... </a>
1312
1572
  elif child.tag == "a":
1313
1573
  return self._transform_link(child)
1314
1574
 
1575
+ # <mark>...</mark>
1576
+ elif child.tag == "mark":
1577
+ return self._transform_mark(child)
1578
+
1315
1579
  # <span>...</span>
1316
1580
  elif child.tag == "span":
1317
- classes = child.attrib.get("class", "").split(" ")
1581
+ classes = child.get("class", "").split(" ")
1318
1582
 
1319
1583
  # <span class="arithmatex">...</span>
1320
1584
  if "arithmatex" in classes:
1321
1585
  return self._transform_inline_math(child)
1322
1586
 
1323
1587
  # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1324
- elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1588
+ elif child.tag == "sup" and child.get("id", "").startswith("fnref:"):
1325
1589
  self._transform_footnote_ref(child)
1326
1590
  return None
1327
1591
 
1328
1592
  # <input type="date" value="1984-01-01" />
1329
- elif child.tag == "input" and child.attrib.get("type", "") == "date":
1330
- return HTML("time", {"datetime": child.attrib.get("value", "")})
1593
+ elif child.tag == "input" and child.get("type", "") == "date":
1594
+ return HTML("time", {"datetime": child.get("value", "")})
1595
+
1596
+ # <ins>...</ins>
1597
+ elif child.tag == "ins":
1598
+ # Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
1599
+ child.tag = "u"
1331
1600
 
1332
1601
  # <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
1333
1602
  elif child.tag == "x-emoji":
1334
1603
  return self._transform_emoji(child)
1335
1604
 
1605
+ # <h1>...</h1>
1606
+ # <h2>...</h2> ...
1607
+ m = re.match(r"^h([1-6])$", child.tag, flags=re.IGNORECASE)
1608
+ if m is not None:
1609
+ level = int(m.group(1))
1610
+ title = element_to_text(child)
1611
+ self.toc.add(level, title)
1612
+
1613
+ if self.options.heading_anchors:
1614
+ self._transform_heading(child)
1615
+ return None
1616
+
1336
1617
  return None
1337
1618
 
1338
1619
 
@@ -1345,11 +1626,15 @@ class ConversionError(RuntimeError):
1345
1626
 
1346
1627
 
1347
1628
  class ConfluenceDocument:
1629
+ "Encapsulates an element tree for a Confluence document created by parsing a Markdown document."
1630
+
1348
1631
  title: Optional[str]
1349
1632
  labels: Optional[list[str]]
1350
1633
  properties: Optional[dict[str, JsonType]]
1634
+
1351
1635
  links: list[str]
1352
- images: list[Path]
1636
+ images: list[ImageData]
1637
+ embedded_files: dict[str, EmbeddedFileData]
1353
1638
 
1354
1639
  options: ConfluenceDocumentOptions
1355
1640
  root: ET._Element