markdown-to-confluence 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -25,6 +25,8 @@ import markdown
25
25
  from lxml.builder import ElementMaker
26
26
  from strong_typing.core import JsonType
27
27
 
28
+ from md2conf.drawio import extract_diagram
29
+
28
30
  from .collection import ConfluencePageCollection
29
31
  from .extra import path_relative_to
30
32
  from .mermaid import render_diagram
@@ -39,6 +41,17 @@ namespaces = {
39
41
  for key, value in namespaces.items():
40
42
  ET.register_namespace(key, value)
41
43
 
44
+
45
+ def get_volatile_attributes() -> list[ET.QName]:
46
+ "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
47
+
48
+ return [
49
+ ET.QName(namespaces["ac"], "local-id"),
50
+ ET.QName(namespaces["ac"], "macro-id"),
51
+ ET.QName(namespaces["ri"], "version-at-save"),
52
+ ]
53
+
54
+
42
55
  HTML = ElementMaker()
43
56
  AC = ElementMaker(namespace=namespaces["ac"])
44
57
  RI = ElementMaker(namespace=namespaces["ri"])
@@ -99,6 +112,10 @@ def emoji_generator(
99
112
  options: dict[str, Any],
100
113
  md: markdown.Markdown,
101
114
  ) -> xml.etree.ElementTree.Element:
115
+ """
116
+ Custom generator for `pymdownx.emoji`.
117
+ """
118
+
102
119
  name = (alias or shortname).strip(":")
103
120
  span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
104
121
  if uc is not None:
@@ -111,28 +128,59 @@ def emoji_generator(
111
128
  return span
112
129
 
113
130
 
131
+ def math_formatter(
132
+ source: str,
133
+ language: str,
134
+ css_class: str,
135
+ options: dict[str, Any],
136
+ md: markdown.Markdown,
137
+ classes: Optional[list[str]] = None,
138
+ id_value: str = "",
139
+ attrs: Optional[dict[str, str]] = None,
140
+ **kwargs: Any,
141
+ ) -> str:
142
+ """
143
+ Custom formatter for language `math` in `pymdownx.superfences`.
144
+ """
145
+
146
+ if classes is None:
147
+ classes = [css_class]
148
+ else:
149
+ classes.insert(0, css_class)
150
+
151
+ html_id = f' id="{id_value}"' if id_value else ""
152
+ html_class = ' class="{}"'.format(" ".join(classes))
153
+ html_attrs = " " + " ".join(f'{k}="{v}"' for k, v in attrs.items()) if attrs else ""
154
+
155
+ return f"<div{html_id}{html_class}{html_attrs}>{source}</div>"
156
+
157
+
114
158
  def markdown_to_html(content: str) -> str:
115
159
  return markdown.markdown(
116
160
  content,
117
161
  extensions=[
118
162
  "admonition",
163
+ "footnotes",
119
164
  "markdown.extensions.tables",
120
- # "markdown.extensions.fenced_code",
165
+ "md_in_html",
166
+ "pymdownx.arithmatex",
121
167
  "pymdownx.emoji",
122
168
  "pymdownx.highlight", # required by `pymdownx.superfences`
123
169
  "pymdownx.magiclink",
124
170
  "pymdownx.superfences",
125
171
  "pymdownx.tilde",
126
172
  "sane_lists",
127
- "md_in_html",
128
173
  ],
129
174
  extension_configs={
175
+ "footnotes": {"BACKLINK_TITLE": ""},
176
+ "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
130
177
  "pymdownx.emoji": {
131
178
  "emoji_generator": emoji_generator,
132
179
  },
133
180
  "pymdownx.highlight": {
134
181
  "use_pygments": False,
135
182
  },
183
+ "pymdownx.superfences": {"custom_fences": [{"name": "math", "class": "arithmatex", "format": math_formatter}]},
136
184
  },
137
185
  )
138
186
 
@@ -296,6 +344,13 @@ def element_to_text(node: ET._Element) -> str:
296
344
  return "".join(node.itertext()).strip()
297
345
 
298
346
 
347
+ @dataclass
348
+ class ImageAttributes:
349
+ caption: Optional[str]
350
+ width: Optional[str]
351
+ height: Optional[str]
352
+
353
+
299
354
  @dataclass
300
355
  class TableOfContentsEntry:
301
356
  level: int
@@ -346,6 +401,8 @@ class ConfluenceConverterOptions:
346
401
  plain text; when false, raise an exception.
347
402
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
348
403
  conversion rules for the identifier.
404
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
405
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
349
406
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
350
407
  :param diagram_output_format: Target image format for diagrams.
351
408
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
@@ -353,13 +410,15 @@ class ConfluenceConverterOptions:
353
410
 
354
411
  ignore_invalid_url: bool = False
355
412
  heading_anchors: bool = False
413
+ prefer_raster: bool = True
414
+ render_drawio: bool = False
356
415
  render_mermaid: bool = False
357
416
  diagram_output_format: Literal["png", "svg"] = "png"
358
417
  webui_links: bool = False
359
418
 
360
419
 
361
420
  class ConfluenceStorageFormatConverter(NodeVisitor):
362
- "Transforms a plain HTML tree into the Confluence storage format."
421
+ "Transforms a plain HTML tree into Confluence Storage Format."
363
422
 
364
423
  options: ConfluenceConverterOptions
365
424
  path: Path
@@ -397,6 +456,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
397
456
  self.page_metadata = page_metadata
398
457
 
399
458
  def _transform_heading(self, heading: ET._Element) -> None:
459
+ "Adds anchors to headings in the same document (if *heading anchors* is enabled)."
460
+
400
461
  for e in heading:
401
462
  self.visit(e)
402
463
 
@@ -427,6 +488,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
427
488
  raise DocumentError(msg)
428
489
 
429
490
  def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
491
+ """
492
+ Transforms links (HTML anchor `<a>`).
493
+
494
+ * Absolute URLs are left intact.
495
+ * Links to headings in the same document are transformed into `<ac:link>` (if *heading anchors* is enabled).
496
+ * Links to documents in the source hierarchy are mapped into full Confluence URLs.
497
+ """
498
+
430
499
  url = anchor.attrib.get("href")
431
500
  if url is None or is_absolute_url(url):
432
501
  return None
@@ -451,7 +520,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
451
520
  link_wrapper.tail = anchor.tail
452
521
  return link_wrapper
453
522
  else:
454
- anchor.attrib["href"] = url
455
523
  return None
456
524
 
457
525
  # convert the relative URL to absolute URL based on the base path value, then look up
@@ -474,7 +542,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
474
542
  raise DocumentError(msg)
475
543
 
476
544
  relative_path = os.path.relpath(absolute_path, self.base_dir)
477
- LOGGER.debug("found link to page %s with metadata: %s", relative_path, link_metadata)
545
+ LOGGER.debug("Found link to page %s with metadata: %s", relative_path, link_metadata)
478
546
  self.links.append(url)
479
547
 
480
548
  if self.options.webui_links:
@@ -502,32 +570,48 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
502
570
  return None
503
571
 
504
572
  def _transform_image(self, image: ET._Element) -> ET._Element:
573
+ "Inserts an attached or external image."
574
+
505
575
  src = image.attrib.get("src")
506
576
 
507
577
  if not src:
508
578
  raise DocumentError("image lacks `src` attribute")
509
579
 
510
- attributes: dict[str, Any] = {
511
- ET.QName(namespaces["ac"], "align"): "center",
512
- ET.QName(namespaces["ac"], "layout"): "center",
513
- }
580
+ caption = image.attrib.get("alt")
514
581
  width = image.attrib.get("width")
515
- if width is not None:
516
- attributes.update({ET.QName(namespaces["ac"], "width"): width})
517
582
  height = image.attrib.get("height")
518
- if height is not None:
519
- attributes.update({ET.QName(namespaces["ac"], "height"): height})
520
-
521
- caption = image.attrib.get("alt")
583
+ attrs = ImageAttributes(caption, width, height)
522
584
 
523
585
  if is_absolute_url(src):
524
- return self._transform_external_image(src, caption, attributes)
586
+ return self._transform_external_image(src, attrs)
525
587
  else:
526
- return self._transform_attached_image(Path(src), caption, attributes)
588
+ path = Path(src)
527
589
 
528
- def _transform_external_image(self, url: str, caption: Optional[str], attributes: dict[str, Any]) -> ET._Element:
590
+ absolute_path = self._verify_image_path(path)
591
+ if absolute_path is None:
592
+ return self._create_missing(path, caption)
593
+
594
+ if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
595
+ return self._transform_drawio_image(absolute_path, attrs)
596
+ elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
597
+ self.images.append(absolute_path)
598
+ image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
599
+ return self._create_drawio(image_filename, attrs)
600
+ else:
601
+ return self._transform_attached_image(absolute_path, attrs)
602
+
603
+ def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ET._Element:
529
604
  "Emits Confluence Storage Format XHTML for an external image."
530
605
 
606
+ attributes: dict[str, Any] = {
607
+ ET.QName(namespaces["ac"], "align"): "center",
608
+ ET.QName(namespaces["ac"], "layout"): "center",
609
+ }
610
+ if attrs.width is not None:
611
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
612
+ if attrs.height is not None:
613
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
614
+
531
615
  elements: list[ET._Element] = []
532
616
  elements.append(
533
617
  RI(
@@ -536,33 +620,68 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
536
620
  {ET.QName(namespaces["ri"], "value"): url},
537
621
  )
538
622
  )
539
- if caption is not None:
540
- elements.append(AC("caption", HTML.p(caption)))
623
+ if attrs.caption is not None:
624
+ elements.append(AC("caption", HTML.p(attrs.caption)))
541
625
 
542
626
  return AC("image", attributes, *elements)
543
627
 
544
- def _transform_attached_image(self, path: Path, caption: Optional[str], attributes: dict[str, Any]) -> ET._Element:
545
- "Emits Confluence Storage Format XHTML for an attached image."
628
+ def _verify_image_path(self, path: Path) -> Optional[Path]:
629
+ "Checks whether an image path is safe to use."
546
630
 
547
631
  # resolve relative path into absolute path w.r.t. base dir
548
632
  absolute_path = (self.base_dir / path).resolve()
549
633
 
550
- if absolute_path.exists():
634
+ if not absolute_path.exists():
635
+ self._warn_or_raise(f"path to image {path} does not exist")
636
+ return None
637
+
638
+ if not is_directory_within(absolute_path, self.root_dir):
639
+ self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
640
+ return None
641
+
642
+ return absolute_path
643
+
644
+ def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
645
+ "Emits Confluence Storage Format XHTML for an attached raster or vector image."
646
+
647
+ if self.options.prefer_raster and absolute_path.name.endswith(".svg"):
551
648
  # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
552
- if absolute_path.suffix == ".svg":
553
- png_file = absolute_path.with_suffix(".png")
554
- if png_file.exists():
555
- absolute_path = png_file
649
+ png_file = absolute_path.with_suffix(".png")
650
+ if png_file.exists():
651
+ absolute_path = png_file
556
652
 
557
- if is_directory_within(absolute_path, self.root_dir):
558
- self.images.append(absolute_path)
559
- image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
560
- else:
561
- image_name = ""
562
- self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
653
+ self.images.append(absolute_path)
654
+ return self._create_image(absolute_path, attrs)
655
+
656
+ def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
657
+ "Emits Confluence Storage Format XHTML for a draw.io image."
658
+
659
+ if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
660
+ raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
661
+
662
+ if self.options.render_drawio:
663
+ return self._transform_attached_image(absolute_path, attrs)
563
664
  else:
564
- image_name = ""
565
- self._warn_or_raise(f"path to image {path} does not exist")
665
+ # extract embedded editable diagram and upload as *.drawio
666
+ image_data = extract_diagram(absolute_path)
667
+ image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
668
+ self.embedded_images[image_filename] = image_data
669
+
670
+ return self._create_drawio(image_filename, attrs)
671
+
672
+ def _create_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
673
+ "An image embedded into the page, linking to an attachment."
674
+
675
+ image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
676
+
677
+ attributes: dict[str, Any] = {
678
+ ET.QName(namespaces["ac"], "align"): "center",
679
+ ET.QName(namespaces["ac"], "layout"): "center",
680
+ }
681
+ if attrs.width is not None:
682
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
683
+ if attrs.height is not None:
684
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
566
685
 
567
686
  elements: list[ET._Element] = []
568
687
  elements.append(
@@ -572,12 +691,80 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
572
691
  {ET.QName(namespaces["ri"], "filename"): image_name},
573
692
  )
574
693
  )
575
- if caption is not None:
576
- elements.append(AC("caption", HTML.p(caption)))
694
+ if attrs.caption is not None:
695
+ elements.append(AC("caption", HTML.p(attrs.caption)))
577
696
 
578
697
  return AC("image", attributes, *elements)
579
698
 
580
- def _transform_block(self, code: ET._Element) -> ET._Element:
699
+ def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
700
+ "A draw.io diagram embedded into the page, linking to an attachment."
701
+
702
+ parameters: list[ET._Element] = [
703
+ AC(
704
+ "parameter",
705
+ {ET.QName(namespaces["ac"], "name"): "diagramName"},
706
+ filename,
707
+ ),
708
+ ]
709
+ if attrs.width is not None:
710
+ parameters.append(
711
+ AC(
712
+ "parameter",
713
+ {ET.QName(namespaces["ac"], "name"): "width"},
714
+ attrs.width,
715
+ ),
716
+ )
717
+ if attrs.height is not None:
718
+ parameters.append(
719
+ AC(
720
+ "parameter",
721
+ {ET.QName(namespaces["ac"], "name"): "height"},
722
+ attrs.height,
723
+ ),
724
+ )
725
+
726
+ local_id = str(uuid.uuid4())
727
+ macro_id = str(uuid.uuid4())
728
+ return AC(
729
+ "structured-macro",
730
+ {
731
+ ET.QName(namespaces["ac"], "name"): "drawio",
732
+ ET.QName(namespaces["ac"], "schema-version"): "1",
733
+ "data-layout": "default",
734
+ ET.QName(namespaces["ac"], "local-id"): local_id,
735
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
736
+ },
737
+ *parameters,
738
+ )
739
+
740
+ def _create_missing(self, path: Path, caption: Optional[str]) -> ET._Element:
741
+ "A warning panel for a missing image."
742
+
743
+ message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
744
+ if caption is not None:
745
+ content = [
746
+ AC(
747
+ "parameter",
748
+ {ET.QName(namespaces["ac"], "name"): "title"},
749
+ caption,
750
+ ),
751
+ AC("rich-text-body", {}, message),
752
+ ]
753
+ else:
754
+ content = [AC("rich-text-body", {}, message)]
755
+
756
+ return AC(
757
+ "structured-macro",
758
+ {
759
+ ET.QName(namespaces["ac"], "name"): "warning",
760
+ ET.QName(namespaces["ac"], "schema-version"): "1",
761
+ },
762
+ *content,
763
+ )
764
+
765
+ def _transform_code_block(self, code: ET._Element) -> ET._Element:
766
+ "Transforms a code block."
767
+
581
768
  language = code.attrib.get("class")
582
769
  if language:
583
770
  m = re.match("^language-(.*)$", language)
@@ -639,7 +826,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
639
826
  {
640
827
  ET.QName(namespaces["ac"], "name"): "macro-diagram",
641
828
  ET.QName(namespaces["ac"], "schema-version"): "1",
642
- ET.QName(namespaces["ac"], "data-layout"): "default",
829
+ "data-layout": "default",
643
830
  ET.QName(namespaces["ac"], "local-id"): local_id,
644
831
  ET.QName(namespaces["ac"], "macro-id"): macro_id,
645
832
  },
@@ -666,6 +853,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
666
853
  )
667
854
 
668
855
  def _transform_toc(self, code: ET._Element) -> ET._Element:
856
+ "Creates a table of contents, constructed from headings in the document."
857
+
669
858
  return AC(
670
859
  "structured-macro",
671
860
  {
@@ -676,6 +865,19 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
676
865
  AC("parameter", {ET.QName(namespaces["ac"], "name"): "style"}, "default"),
677
866
  )
678
867
 
868
+ def _transform_listing(self, code: ET._Element) -> ET._Element:
869
+ "Creates a list of child pages."
870
+
871
+ return AC(
872
+ "structured-macro",
873
+ {
874
+ ET.QName(namespaces["ac"], "name"): "children",
875
+ ET.QName(namespaces["ac"], "schema-version"): "2",
876
+ "data-layout": "default",
877
+ },
878
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "allChildren"}, "true"),
879
+ )
880
+
679
881
  def _transform_admonition(self, elem: ET._Element) -> ET._Element:
680
882
  """
681
883
  Creates an info, tip, note or warning panel from a Markdown admonition.
@@ -725,6 +927,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
725
927
  )
726
928
 
727
929
  def _transform_github_alert(self, elem: ET._Element) -> ET._Element:
930
+ """
931
+ Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
932
+ """
933
+
728
934
  content = elem[0]
729
935
  if content.text is None:
730
936
  raise DocumentError("empty content")
@@ -753,6 +959,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
753
959
  return self._transform_alert(elem, class_name, skip)
754
960
 
755
961
  def _transform_gitlab_alert(self, elem: ET._Element) -> ET._Element:
962
+ """
963
+ Creates a classic GitLab-style panel.
964
+
965
+ Classic panels are defined with a block-quote and text starting with a capitalized string such as `DISCLAIMER:`.
966
+ This syntax does not use Hugo shortcode.
967
+ """
968
+
756
969
  content = elem[0]
757
970
  if content.text is None:
758
971
  raise DocumentError("empty content")
@@ -842,6 +1055,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
842
1055
  )
843
1056
 
844
1057
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
1058
+ """
1059
+ Inserts an inline emoji character.
1060
+ """
1061
+
845
1062
  shortname = elem.attrib.get("data-emoji-shortname", "")
846
1063
  unicode = elem.attrib.get("data-emoji-unicode", None)
847
1064
  alt = elem.text or ""
@@ -852,7 +1069,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
852
1069
  return AC(
853
1070
  "emoticon",
854
1071
  {
855
- # use "blue-star" as a placeholder name to ensure wiki page loads in timely manner
856
1072
  ET.QName(namespaces["ac"], "name"): shortname,
857
1073
  ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
858
1074
  ET.QName(namespaces["ac"], "emoji-id"): unicode,
@@ -860,7 +1076,196 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
860
1076
  },
861
1077
  )
862
1078
 
1079
+ def _transform_inline_math(self, elem: ET._Element) -> ET._Element:
1080
+ """
1081
+ Creates an inline LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1082
+
1083
+ :see: https://help.narva.net/latex-math-for-confluence/
1084
+ """
1085
+
1086
+ content = elem.text or ""
1087
+ if not content:
1088
+ raise DocumentError("empty inline LaTeX formula")
1089
+
1090
+ LOGGER.debug("Found inline LaTeX formula: %s", content)
1091
+
1092
+ local_id = str(uuid.uuid4())
1093
+ macro_id = str(uuid.uuid4())
1094
+ macro = AC(
1095
+ "structured-macro",
1096
+ {
1097
+ ET.QName(namespaces["ac"], "name"): "eazy-math-inline",
1098
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1099
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1100
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1101
+ },
1102
+ AC(
1103
+ "parameter",
1104
+ {ET.QName(namespaces["ac"], "name"): "body"},
1105
+ content,
1106
+ ),
1107
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1108
+ )
1109
+ macro.tail = elem.tail # chain sibling text node that immediately follows original element
1110
+ return macro
1111
+
1112
+ def _transform_block_math(self, elem: ET._Element) -> ET._Element:
1113
+ """
1114
+ Creates a block-level LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1115
+
1116
+ :see: https://help.narva.net/latex-math-for-confluence/
1117
+ """
1118
+
1119
+ content = elem.text or ""
1120
+ if not content:
1121
+ raise DocumentError("empty block-level LaTeX formula")
1122
+
1123
+ LOGGER.debug("Found block-level LaTeX formula: %s", content)
1124
+
1125
+ local_id = str(uuid.uuid4())
1126
+ macro_id = str(uuid.uuid4())
1127
+
1128
+ return AC(
1129
+ "structured-macro",
1130
+ {
1131
+ ET.QName(namespaces["ac"], "name"): "easy-math-block",
1132
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1133
+ "data-layout": "default",
1134
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1135
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1136
+ },
1137
+ AC(
1138
+ "parameter",
1139
+ {ET.QName(namespaces["ac"], "name"): "body"},
1140
+ content,
1141
+ ),
1142
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1143
+ )
1144
+
1145
+ def _transform_footnote_ref(self, elem: ET._Element) -> None:
1146
+ """
1147
+ Transforms a footnote reference.
1148
+
1149
+ ```
1150
+ <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1151
+ ```
1152
+ """
1153
+
1154
+ if elem.tag != "sup":
1155
+ raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
1156
+
1157
+ ref_id = elem.attrib.pop("id", "")
1158
+ if not ref_id.startswith("fnref:"):
1159
+ raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1160
+ footnote_ref = ref_id.removeprefix("fnref:")
1161
+
1162
+ link = elem[0]
1163
+ def_href = link.attrib.pop("href", "")
1164
+ if not def_href.startswith("#fn:"):
1165
+ raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
1166
+ footnote_def = def_href.removeprefix("#fn:")
1167
+
1168
+ text = link.text or ""
1169
+
1170
+ # remove link generated by Python-Markdown
1171
+ elem.remove(link)
1172
+
1173
+ # build new anchor for footnote reference
1174
+ ref_anchor = AC(
1175
+ "structured-macro",
1176
+ {
1177
+ ET.QName(namespaces["ac"], "name"): "anchor",
1178
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1179
+ },
1180
+ AC(
1181
+ "parameter",
1182
+ {ET.QName(namespaces["ac"], "name"): ""},
1183
+ f"footnote-ref-{footnote_ref}",
1184
+ ),
1185
+ )
1186
+
1187
+ # build new link to footnote definition at the end of page
1188
+ def_link = AC(
1189
+ "link",
1190
+ {
1191
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-def-{footnote_def}",
1192
+ },
1193
+ AC("link-body", ET.CDATA(text)),
1194
+ )
1195
+
1196
+ # append children synthesized for Confluence
1197
+ elem.append(ref_anchor)
1198
+ elem.append(def_link)
1199
+
1200
+ def _transform_footnote_def(self, elem: ET._Element) -> None:
1201
+ """
1202
+ Transforms the footnote definition block.
1203
+
1204
+ ```
1205
+ <div class="footnote">
1206
+ <hr/>
1207
+ <ol>
1208
+ <li id="fn:NAME">
1209
+ <p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p>
1210
+ </li>
1211
+ </ol>
1212
+ </div>
1213
+ ```
1214
+ """
1215
+
1216
+ for list_item in elem[1]:
1217
+ def_id = list_item.attrib.pop("id", "")
1218
+ if not def_id.startswith("fn:"):
1219
+ raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1220
+ footnote_def = def_id.removeprefix("fn:")
1221
+
1222
+ paragraph = list_item[0]
1223
+ ref_anchor = paragraph[-1]
1224
+ if ref_anchor.tag != "a":
1225
+ raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1226
+
1227
+ ref_href = ref_anchor.attrib.get("href", "")
1228
+ if not ref_href.startswith("#fnref:"):
1229
+ raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1230
+ footnote_ref = ref_href.removeprefix("#fnref:")
1231
+
1232
+ # remove back-link generated by Python-Markdown
1233
+ paragraph.remove(ref_anchor)
1234
+
1235
+ # build new anchor for footnote definition
1236
+ def_anchor = AC(
1237
+ "structured-macro",
1238
+ {
1239
+ ET.QName(namespaces["ac"], "name"): "anchor",
1240
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1241
+ },
1242
+ AC(
1243
+ "parameter",
1244
+ {ET.QName(namespaces["ac"], "name"): ""},
1245
+ f"footnote-def-{footnote_def}",
1246
+ ),
1247
+ )
1248
+
1249
+ # build new link to footnote reference in page body
1250
+ ref_link = AC(
1251
+ "link",
1252
+ {
1253
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-ref-{footnote_ref}",
1254
+ },
1255
+ AC("link-body", ET.CDATA("↩")),
1256
+ )
1257
+
1258
+ # append children synthesized for Confluence
1259
+ paragraph.insert(0, def_anchor)
1260
+ def_anchor.tail = paragraph.text
1261
+ paragraph.text = None
1262
+ paragraph.append(ref_link)
1263
+
863
1264
  def transform(self, child: ET._Element) -> Optional[ET._Element]:
1265
+ """
1266
+ Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
1267
+ """
1268
+
864
1269
  # normalize line breaks to regular space in element text
865
1270
  if child.text:
866
1271
  text: str = child.text
@@ -893,6 +1298,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
893
1298
  elif child.tag == "p" and "".join(child.itertext()) in ["[[TOC]]", "[TOC]"]:
894
1299
  return self._transform_toc(child)
895
1300
 
1301
+ # <p>[[_LISTING_]]</p>
1302
+ elif child.tag == "p" and "".join(child.itertext()) in ["[[LISTING]]", "[LISTING]"]:
1303
+ return self._transform_listing(child)
1304
+
896
1305
  # <div class="admonition note">
897
1306
  # <p class="admonition-title">Note</p>
898
1307
  # <p>...</p>
@@ -943,20 +1352,35 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
943
1352
 
944
1353
  # <pre><code class="language-java"> ... </code></pre>
945
1354
  elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
946
- return self._transform_block(child[0])
1355
+ return self._transform_code_block(child[0])
947
1356
 
1357
+ # <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
948
1358
  elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
949
1359
  return self._transform_emoji(child)
950
1360
 
951
- return None
1361
+ # <div class="arithmatex">...</div>
1362
+ elif child.tag == "div" and "arithmatex" in child.attrib.get("class", "").split(" "):
1363
+ return self._transform_block_math(child)
952
1364
 
1365
+ # <span class="arithmatex">...</span>
1366
+ elif child.tag == "span" and "arithmatex" in child.attrib.get("class", "").split(" "):
1367
+ return self._transform_inline_math(child)
953
1368
 
954
- class ConfluenceStorageFormatCleaner(NodeVisitor):
955
- "Removes volatile attributes from a Confluence storage format XHTML document."
1369
+ # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1370
+ elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1371
+ self._transform_footnote_ref(child)
1372
+ return None
1373
+
1374
+ # <div class="footnote">
1375
+ # <hr/>
1376
+ # <ol>
1377
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1378
+ # </ol>
1379
+ # </div>
1380
+ elif child.tag == "div" and "footnote" in child.attrib.get("class", "").split(" "):
1381
+ self._transform_footnote_def(child)
1382
+ return None
956
1383
 
957
- def transform(self, child: ET._Element) -> Optional[ET._Element]:
958
- child.attrib.pop(ET.QName(namespaces["ac"], "macro-id"), None)
959
- child.attrib.pop(ET.QName(namespaces["ri"], "version-at-save"), None)
960
1384
  return None
961
1385
 
962
1386
 
@@ -987,6 +1411,8 @@ class ConfluenceDocumentOptions:
987
1411
  :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
988
1412
  :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
989
1413
  :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
1414
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
1415
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
990
1416
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
991
1417
  :param diagram_output_format: Target image format for diagrams.
992
1418
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
@@ -997,6 +1423,8 @@ class ConfluenceDocumentOptions:
997
1423
  generated_by: Optional[str] = "This page has been generated with a tool."
998
1424
  root_page_id: Optional[ConfluencePageID] = None
999
1425
  keep_hierarchy: bool = False
1426
+ prefer_raster: bool = True
1427
+ render_drawio: bool = False
1000
1428
  render_mermaid: bool = False
1001
1429
  diagram_output_format: Literal["png", "svg"] = "png"
1002
1430
  webui_links: bool = False
@@ -1082,6 +1510,8 @@ class ConfluenceDocument:
1082
1510
  ConfluenceConverterOptions(
1083
1511
  ignore_invalid_url=self.options.ignore_invalid_url,
1084
1512
  heading_anchors=self.options.heading_anchors,
1513
+ prefer_raster=self.options.prefer_raster,
1514
+ render_drawio=self.options.render_drawio,
1085
1515
  render_mermaid=self.options.render_mermaid,
1086
1516
  diagram_output_format=self.options.diagram_output_format,
1087
1517
  webui_links=self.options.webui_links,
@@ -1136,17 +1566,6 @@ def attachment_name(ref: Union[Path, str]) -> str:
1136
1566
  return Path(*parts).as_posix().replace("/", "_")
1137
1567
 
1138
1568
 
1139
- def sanitize_confluence(html: str) -> str:
1140
- "Generates a sanitized version of a Confluence storage format XHTML document with no volatile attributes."
1141
-
1142
- if not html:
1143
- return ""
1144
-
1145
- root = elements_from_strings([html])
1146
- ConfluenceStorageFormatCleaner().visit(root)
1147
- return elements_to_string(root)
1148
-
1149
-
1150
1569
  def elements_to_string(root: ET._Element) -> str:
1151
1570
  xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
1152
1571
  m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)