markdown-to-confluence 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -23,8 +23,12 @@ from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
23
23
  import lxml.etree as ET
24
24
  import markdown
25
25
  from lxml.builder import ElementMaker
26
+ from strong_typing.core import JsonType
27
+
28
+ from md2conf.drawio import extract_diagram
26
29
 
27
30
  from .collection import ConfluencePageCollection
31
+ from .extra import path_relative_to
28
32
  from .mermaid import render_diagram
29
33
  from .metadata import ConfluenceSiteMetadata
30
34
  from .properties import PageError
@@ -37,6 +41,17 @@ namespaces = {
37
41
  for key, value in namespaces.items():
38
42
  ET.register_namespace(key, value)
39
43
 
44
+
45
+ def get_volatile_attributes() -> list[ET.QName]:
46
+ "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
47
+
48
+ return [
49
+ ET.QName(namespaces["ac"], "local-id"),
50
+ ET.QName(namespaces["ac"], "macro-id"),
51
+ ET.QName(namespaces["ri"], "version-at-save"),
52
+ ]
53
+
54
+
40
55
  HTML = ElementMaker()
41
56
  AC = ElementMaker(namespace=namespaces["ac"])
42
57
  RI = ElementMaker(namespace=namespaces["ri"])
@@ -67,6 +82,12 @@ def is_relative_url(url: str) -> bool:
67
82
  return not bool(urlparts.scheme) and not bool(urlparts.netloc)
68
83
 
69
84
 
85
+ def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
86
+ "True if the absolute path is nested within the base path."
87
+
88
+ return absolute_path.as_posix().startswith(base_path.as_posix())
89
+
90
+
70
91
  def encode_title(text: str) -> str:
71
92
  "Converts a title string such that it is safe to embed into a Confluence URL."
72
93
 
@@ -91,6 +112,10 @@ def emoji_generator(
91
112
  options: dict[str, Any],
92
113
  md: markdown.Markdown,
93
114
  ) -> xml.etree.ElementTree.Element:
115
+ """
116
+ Custom generator for `pymdownx.emoji`.
117
+ """
118
+
94
119
  name = (alias or shortname).strip(":")
95
120
  span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
96
121
  if uc is not None:
@@ -103,28 +128,59 @@ def emoji_generator(
103
128
  return span
104
129
 
105
130
 
131
+ def math_formatter(
132
+ source: str,
133
+ language: str,
134
+ css_class: str,
135
+ options: dict[str, Any],
136
+ md: markdown.Markdown,
137
+ classes: Optional[list[str]] = None,
138
+ id_value: str = "",
139
+ attrs: Optional[dict[str, str]] = None,
140
+ **kwargs: Any,
141
+ ) -> str:
142
+ """
143
+ Custom formatter for language `math` in `pymdownx.superfences`.
144
+ """
145
+
146
+ if classes is None:
147
+ classes = [css_class]
148
+ else:
149
+ classes.insert(0, css_class)
150
+
151
+ html_id = f' id="{id_value}"' if id_value else ""
152
+ html_class = ' class="{}"'.format(" ".join(classes))
153
+ html_attrs = " " + " ".join(f'{k}="{v}"' for k, v in attrs.items()) if attrs else ""
154
+
155
+ return f"<div{html_id}{html_class}{html_attrs}>{source}</div>"
156
+
157
+
106
158
  def markdown_to_html(content: str) -> str:
107
159
  return markdown.markdown(
108
160
  content,
109
161
  extensions=[
110
162
  "admonition",
163
+ "footnotes",
111
164
  "markdown.extensions.tables",
112
- # "markdown.extensions.fenced_code",
165
+ "md_in_html",
166
+ "pymdownx.arithmatex",
113
167
  "pymdownx.emoji",
114
168
  "pymdownx.highlight", # required by `pymdownx.superfences`
115
169
  "pymdownx.magiclink",
116
170
  "pymdownx.superfences",
117
171
  "pymdownx.tilde",
118
172
  "sane_lists",
119
- "md_in_html",
120
173
  ],
121
174
  extension_configs={
175
+ "footnotes": {"BACKLINK_TITLE": ""},
176
+ "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
122
177
  "pymdownx.emoji": {
123
178
  "emoji_generator": emoji_generator,
124
179
  },
125
180
  "pymdownx.highlight": {
126
181
  "use_pygments": False,
127
182
  },
183
+ "pymdownx.superfences": {"custom_fences": [{"name": "math", "class": "arithmatex", "format": math_formatter}]},
128
184
  },
129
185
  )
130
186
 
@@ -145,14 +201,11 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
145
201
  load_dtd=True,
146
202
  )
147
203
 
148
- ns_attr_list = "".join(
149
- f' xmlns:{key}="{value}"' for key, value in namespaces.items()
150
- )
204
+ ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
151
205
 
152
206
  data = [
153
207
  '<?xml version="1.0"?>',
154
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}">'
155
- f"<root{ns_attr_list}>",
208
+ f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
156
209
  ]
157
210
  data.extend(items)
158
211
  data.append("</root>")
@@ -291,6 +344,13 @@ def element_to_text(node: ET._Element) -> str:
291
344
  return "".join(node.itertext()).strip()
292
345
 
293
346
 
347
+ @dataclass
348
+ class ImageAttributes:
349
+ caption: Optional[str]
350
+ width: Optional[str]
351
+ height: Optional[str]
352
+
353
+
294
354
  @dataclass
295
355
  class TableOfContentsEntry:
296
356
  level: int
@@ -341,6 +401,8 @@ class ConfluenceConverterOptions:
341
401
  plain text; when false, raise an exception.
342
402
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
343
403
  conversion rules for the identifier.
404
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
405
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
344
406
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
345
407
  :param diagram_output_format: Target image format for diagrams.
346
408
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
@@ -348,13 +410,15 @@ class ConfluenceConverterOptions:
348
410
 
349
411
  ignore_invalid_url: bool = False
350
412
  heading_anchors: bool = False
413
+ prefer_raster: bool = True
414
+ render_drawio: bool = False
351
415
  render_mermaid: bool = False
352
416
  diagram_output_format: Literal["png", "svg"] = "png"
353
417
  webui_links: bool = False
354
418
 
355
419
 
356
420
  class ConfluenceStorageFormatConverter(NodeVisitor):
357
- "Transforms a plain HTML tree into the Confluence storage format."
421
+ "Transforms a plain HTML tree into Confluence Storage Format."
358
422
 
359
423
  options: ConfluenceConverterOptions
360
424
  path: Path
@@ -376,6 +440,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
376
440
  page_metadata: ConfluencePageCollection,
377
441
  ) -> None:
378
442
  super().__init__()
443
+
444
+ path = path.resolve(True)
445
+ root_dir = root_dir.resolve(True)
446
+
379
447
  self.options = options
380
448
  self.path = path
381
449
  self.base_dir = path.parent
@@ -388,6 +456,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
388
456
  self.page_metadata = page_metadata
389
457
 
390
458
  def _transform_heading(self, heading: ET._Element) -> None:
459
+ "Adds anchors to headings in the same document (if *heading anchors* is enabled)."
460
+
391
461
  for e in heading:
392
462
  self.visit(e)
393
463
 
@@ -409,7 +479,23 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
409
479
  anchor.tail = heading.text
410
480
  heading.text = None
411
481
 
482
+ def _warn_or_raise(self, msg: str) -> None:
483
+ "Emit a warning or raise an exception when a path points to a resource that doesn't exist."
484
+
485
+ if self.options.ignore_invalid_url:
486
+ LOGGER.warning(msg)
487
+ else:
488
+ raise DocumentError(msg)
489
+
412
490
  def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
491
+ """
492
+ Transforms links (HTML anchor `<a>`).
493
+
494
+ * Absolute URLs are left intact.
495
+ * Links to headings in the same document are transformed into `<ac:link>` (if *heading anchors* is enabled).
496
+ * Links to documents in the source hierarchy are mapped into full Confluence URLs.
497
+ """
498
+
413
499
  url = anchor.attrib.get("href")
414
500
  if url is None or is_absolute_url(url):
415
501
  return None
@@ -417,13 +503,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
417
503
  LOGGER.debug("Found link %s relative to %s", url, self.path)
418
504
  relative_url: ParseResult = urlparse(url)
419
505
 
420
- if (
421
- not relative_url.scheme
422
- and not relative_url.netloc
423
- and not relative_url.path
424
- and not relative_url.params
425
- and not relative_url.query
426
- ):
506
+ if not relative_url.scheme and not relative_url.netloc and not relative_url.path and not relative_url.params and not relative_url.query:
427
507
  LOGGER.debug("Found local URL: %s", url)
428
508
  if self.options.heading_anchors:
429
509
  # <ac:link ac:anchor="anchor"><ac:link-body>...</ac:link-body></ac:link>
@@ -440,21 +520,16 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
440
520
  link_wrapper.tail = anchor.tail
441
521
  return link_wrapper
442
522
  else:
443
- anchor.attrib["href"] = url
444
523
  return None
445
524
 
446
525
  # convert the relative URL to absolute URL based on the base path value, then look up
447
526
  # the absolute path in the page metadata dictionary to discover the relative path
448
527
  # within Confluence that should be used
449
- absolute_path = (self.base_dir / relative_url.path).resolve(True)
450
- if not str(absolute_path).startswith(str(self.root_dir)):
451
- msg = f"relative URL {url} points to outside root path: {self.root_dir}"
452
- if self.options.ignore_invalid_url:
453
- LOGGER.warning(msg)
454
- anchor.attrib.pop("href")
455
- return None
456
- else:
457
- raise DocumentError(msg)
528
+ absolute_path = (self.base_dir / relative_url.path).resolve()
529
+ if not is_directory_within(absolute_path, self.root_dir):
530
+ anchor.attrib.pop("href")
531
+ self._warn_or_raise(f"relative URL {url} points to outside root path: {self.root_dir}")
532
+ return None
458
533
 
459
534
  link_metadata = self.page_metadata.get(absolute_path)
460
535
  if link_metadata is None:
@@ -467,9 +542,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
467
542
  raise DocumentError(msg)
468
543
 
469
544
  relative_path = os.path.relpath(absolute_path, self.base_dir)
470
- LOGGER.debug(
471
- "found link to page %s with metadata: %s", relative_path, link_metadata
472
- )
545
+ LOGGER.debug("Found link to page %s with metadata: %s", relative_path, link_metadata)
473
546
  self.links.append(url)
474
547
 
475
548
  if self.options.webui_links:
@@ -478,9 +551,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
478
551
  space_key = link_metadata.space_key or self.site_metadata.space_key
479
552
 
480
553
  if space_key is None:
481
- raise DocumentError(
482
- "Confluence space key required for building full web URLs"
483
- )
554
+ raise DocumentError("Confluence space key required for building full web URLs")
484
555
 
485
556
  page_url = f"{self.site_metadata.base_path}spaces/{space_key}/pages/{link_metadata.page_id}/{encode_title(link_metadata.title)}"
486
557
 
@@ -499,34 +570,48 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
499
570
  return None
500
571
 
501
572
  def _transform_image(self, image: ET._Element) -> ET._Element:
573
+ "Inserts an attached or external image."
574
+
502
575
  src = image.attrib.get("src")
503
576
 
504
577
  if not src:
505
578
  raise DocumentError("image lacks `src` attribute")
506
579
 
507
- attributes: dict[str, Any] = {
508
- ET.QName(namespaces["ac"], "align"): "center",
509
- ET.QName(namespaces["ac"], "layout"): "center",
510
- }
580
+ caption = image.attrib.get("alt")
511
581
  width = image.attrib.get("width")
512
- if width is not None:
513
- attributes.update({ET.QName(namespaces["ac"], "width"): width})
514
582
  height = image.attrib.get("height")
515
- if height is not None:
516
- attributes.update({ET.QName(namespaces["ac"], "height"): height})
517
-
518
- caption = image.attrib.get("alt")
583
+ attrs = ImageAttributes(caption, width, height)
519
584
 
520
585
  if is_absolute_url(src):
521
- return self._transform_external_image(src, caption, attributes)
586
+ return self._transform_external_image(src, attrs)
522
587
  else:
523
- return self._transform_attached_image(Path(src), caption, attributes)
588
+ path = Path(src)
589
+
590
+ absolute_path = self._verify_image_path(path)
591
+ if absolute_path is None:
592
+ return self._create_missing(path, caption)
593
+
594
+ if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
595
+ return self._transform_drawio_image(absolute_path, attrs)
596
+ elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
597
+ self.images.append(absolute_path)
598
+ image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
599
+ return self._create_drawio(image_filename, attrs)
600
+ else:
601
+ return self._transform_attached_image(absolute_path, attrs)
524
602
 
525
- def _transform_external_image(
526
- self, url: str, caption: Optional[str], attributes: dict[str, Any]
527
- ) -> ET._Element:
603
+ def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ET._Element:
528
604
  "Emits Confluence Storage Format XHTML for an external image."
529
605
 
606
+ attributes: dict[str, Any] = {
607
+ ET.QName(namespaces["ac"], "align"): "center",
608
+ ET.QName(namespaces["ac"], "layout"): "center",
609
+ }
610
+ if attrs.width is not None:
611
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
612
+ if attrs.height is not None:
613
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
614
+
530
615
  elements: list[ET._Element] = []
531
616
  elements.append(
532
617
  RI(
@@ -535,23 +620,68 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
535
620
  {ET.QName(namespaces["ri"], "value"): url},
536
621
  )
537
622
  )
538
- if caption is not None:
539
- elements.append(AC("caption", HTML.p(caption)))
623
+ if attrs.caption is not None:
624
+ elements.append(AC("caption", HTML.p(attrs.caption)))
540
625
 
541
626
  return AC("image", attributes, *elements)
542
627
 
543
- def _transform_attached_image(
544
- self, path: Path, caption: Optional[str], attributes: dict[str, Any]
545
- ) -> ET._Element:
546
- "Emits Confluence Storage Format XHTML for an attached image."
628
+ def _verify_image_path(self, path: Path) -> Optional[Path]:
629
+ "Checks whether an image path is safe to use."
630
+
631
+ # resolve relative path into absolute path w.r.t. base dir
632
+ absolute_path = (self.base_dir / path).resolve()
633
+
634
+ if not absolute_path.exists():
635
+ self._warn_or_raise(f"path to image {path} does not exist")
636
+ return None
637
+
638
+ if not is_directory_within(absolute_path, self.root_dir):
639
+ self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
640
+ return None
641
+
642
+ return absolute_path
643
+
644
+ def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
645
+ "Emits Confluence Storage Format XHTML for an attached raster or vector image."
646
+
647
+ if self.options.prefer_raster and absolute_path.name.endswith(".svg"):
648
+ # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
649
+ png_file = absolute_path.with_suffix(".png")
650
+ if png_file.exists():
651
+ absolute_path = png_file
652
+
653
+ self.images.append(absolute_path)
654
+ return self._create_image(absolute_path, attrs)
655
+
656
+ def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
657
+ "Emits Confluence Storage Format XHTML for a draw.io image."
658
+
659
+ if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
660
+ raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
661
+
662
+ if self.options.render_drawio:
663
+ return self._transform_attached_image(absolute_path, attrs)
664
+ else:
665
+ # extract embedded editable diagram and upload as *.drawio
666
+ image_data = extract_diagram(absolute_path)
667
+ image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
668
+ self.embedded_images[image_filename] = image_data
547
669
 
548
- # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
549
- png_file = path.with_suffix(".png")
550
- if path.suffix == ".svg" and (self.base_dir / png_file).exists():
551
- path = png_file
670
+ return self._create_drawio(image_filename, attrs)
552
671
 
553
- self.images.append(path)
554
- image_name = attachment_name(path)
672
+ def _create_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
673
+ "An image embedded into the page, linking to an attachment."
674
+
675
+ image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
676
+
677
+ attributes: dict[str, Any] = {
678
+ ET.QName(namespaces["ac"], "align"): "center",
679
+ ET.QName(namespaces["ac"], "layout"): "center",
680
+ }
681
+ if attrs.width is not None:
682
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
683
+ if attrs.height is not None:
684
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
555
685
 
556
686
  elements: list[ET._Element] = []
557
687
  elements.append(
@@ -561,12 +691,80 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
561
691
  {ET.QName(namespaces["ri"], "filename"): image_name},
562
692
  )
563
693
  )
564
- if caption is not None:
565
- elements.append(AC("caption", HTML.p(caption)))
694
+ if attrs.caption is not None:
695
+ elements.append(AC("caption", HTML.p(attrs.caption)))
566
696
 
567
697
  return AC("image", attributes, *elements)
568
698
 
569
- def _transform_block(self, code: ET._Element) -> ET._Element:
699
+ def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
700
+ "A draw.io diagram embedded into the page, linking to an attachment."
701
+
702
+ parameters: list[ET._Element] = [
703
+ AC(
704
+ "parameter",
705
+ {ET.QName(namespaces["ac"], "name"): "diagramName"},
706
+ filename,
707
+ ),
708
+ ]
709
+ if attrs.width is not None:
710
+ parameters.append(
711
+ AC(
712
+ "parameter",
713
+ {ET.QName(namespaces["ac"], "name"): "width"},
714
+ attrs.width,
715
+ ),
716
+ )
717
+ if attrs.height is not None:
718
+ parameters.append(
719
+ AC(
720
+ "parameter",
721
+ {ET.QName(namespaces["ac"], "name"): "height"},
722
+ attrs.height,
723
+ ),
724
+ )
725
+
726
+ local_id = str(uuid.uuid4())
727
+ macro_id = str(uuid.uuid4())
728
+ return AC(
729
+ "structured-macro",
730
+ {
731
+ ET.QName(namespaces["ac"], "name"): "drawio",
732
+ ET.QName(namespaces["ac"], "schema-version"): "1",
733
+ "data-layout": "default",
734
+ ET.QName(namespaces["ac"], "local-id"): local_id,
735
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
736
+ },
737
+ *parameters,
738
+ )
739
+
740
+ def _create_missing(self, path: Path, caption: Optional[str]) -> ET._Element:
741
+ "A warning panel for a missing image."
742
+
743
+ message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
744
+ if caption is not None:
745
+ content = [
746
+ AC(
747
+ "parameter",
748
+ {ET.QName(namespaces["ac"], "name"): "title"},
749
+ caption,
750
+ ),
751
+ AC("rich-text-body", {}, message),
752
+ ]
753
+ else:
754
+ content = [AC("rich-text-body", {}, message)]
755
+
756
+ return AC(
757
+ "structured-macro",
758
+ {
759
+ ET.QName(namespaces["ac"], "name"): "warning",
760
+ ET.QName(namespaces["ac"], "schema-version"): "1",
761
+ },
762
+ *content,
763
+ )
764
+
765
+ def _transform_code_block(self, code: ET._Element) -> ET._Element:
766
+ "Transforms a code block."
767
+
570
768
  language = code.attrib.get("class")
571
769
  if language:
572
770
  m = re.match("^language-(.*)$", language)
@@ -607,9 +805,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
607
805
  if self.options.render_mermaid:
608
806
  image_data = render_diagram(content, self.options.diagram_output_format)
609
807
  image_hash = hashlib.md5(image_data).hexdigest()
610
- image_filename = attachment_name(
611
- f"embedded_{image_hash}.{self.options.diagram_output_format}"
612
- )
808
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
613
809
  self.embedded_images[image_filename] = image_data
614
810
  return AC(
615
811
  "image",
@@ -630,7 +826,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
630
826
  {
631
827
  ET.QName(namespaces["ac"], "name"): "macro-diagram",
632
828
  ET.QName(namespaces["ac"], "schema-version"): "1",
633
- ET.QName(namespaces["ac"], "data-layout"): "default",
829
+ "data-layout": "default",
634
830
  ET.QName(namespaces["ac"], "local-id"): local_id,
635
831
  ET.QName(namespaces["ac"], "macro-id"): macro_id,
636
832
  },
@@ -657,6 +853,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
657
853
  )
658
854
 
659
855
  def _transform_toc(self, code: ET._Element) -> ET._Element:
856
+ "Creates a table of contents, constructed from headings in the document."
857
+
660
858
  return AC(
661
859
  "structured-macro",
662
860
  {
@@ -667,6 +865,19 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
667
865
  AC("parameter", {ET.QName(namespaces["ac"], "name"): "style"}, "default"),
668
866
  )
669
867
 
868
+ def _transform_listing(self, code: ET._Element) -> ET._Element:
869
+ "Creates a list of child pages."
870
+
871
+ return AC(
872
+ "structured-macro",
873
+ {
874
+ ET.QName(namespaces["ac"], "name"): "children",
875
+ ET.QName(namespaces["ac"], "schema-version"): "2",
876
+ "data-layout": "default",
877
+ },
878
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "allChildren"}, "true"),
879
+ )
880
+
670
881
  def _transform_admonition(self, elem: ET._Element) -> ET._Element:
671
882
  """
672
883
  Creates an info, tip, note or warning panel from a Markdown admonition.
@@ -716,6 +927,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
716
927
  )
717
928
 
718
929
  def _transform_github_alert(self, elem: ET._Element) -> ET._Element:
930
+ """
931
+ Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
932
+ """
933
+
719
934
  content = elem[0]
720
935
  if content.text is None:
721
936
  raise DocumentError("empty content")
@@ -744,6 +959,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
744
959
  return self._transform_alert(elem, class_name, skip)
745
960
 
746
961
  def _transform_gitlab_alert(self, elem: ET._Element) -> ET._Element:
962
+ """
963
+ Creates a classic GitLab-style panel.
964
+
965
+ Classic panels are defined with a block-quote and text starting with a capitalized string such as `DISCLAIMER:`.
966
+ This syntax does not use Hugo shortcode.
967
+ """
968
+
747
969
  content = elem[0]
748
970
  if content.text is None:
749
971
  raise DocumentError("empty content")
@@ -769,9 +991,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
769
991
 
770
992
  return self._transform_alert(elem, class_name, skip)
771
993
 
772
- def _transform_alert(
773
- self, elem: ET._Element, class_name: Optional[str], skip: int
774
- ) -> ET._Element:
994
+ def _transform_alert(self, elem: ET._Element, class_name: Optional[str], skip: int) -> ET._Element:
775
995
  """
776
996
  Creates an info, tip, note or warning panel from a GitHub or GitLab alert.
777
997
 
@@ -806,14 +1026,12 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
806
1026
  Creates a collapsed section.
807
1027
 
808
1028
  Transforms
809
- [GitHub collapsed section](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections) # noqa: E501 # no way to make this link shorter
1029
+ [GitHub collapsed section](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections)
810
1030
  syntax into the Confluence structured macro *expand*.
811
1031
  """
812
1032
 
813
1033
  if elem[0].tag != "summary":
814
- raise DocumentError(
815
- "expected: `<summary>` as first direct child of `<details>`"
816
- )
1034
+ raise DocumentError("expected: `<summary>` as first direct child of `<details>`")
817
1035
  if elem[0].tail is not None:
818
1036
  raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
819
1037
 
@@ -837,6 +1055,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
837
1055
  )
838
1056
 
839
1057
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
1058
+ """
1059
+ Inserts an inline emoji character.
1060
+ """
1061
+
840
1062
  shortname = elem.attrib.get("data-emoji-shortname", "")
841
1063
  unicode = elem.attrib.get("data-emoji-unicode", None)
842
1064
  alt = elem.text or ""
@@ -847,7 +1069,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
847
1069
  return AC(
848
1070
  "emoticon",
849
1071
  {
850
- # use "blue-star" as a placeholder name to ensure wiki page loads in timely manner
851
1072
  ET.QName(namespaces["ac"], "name"): shortname,
852
1073
  ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
853
1074
  ET.QName(namespaces["ac"], "emoji-id"): unicode,
@@ -855,7 +1076,196 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
855
1076
  },
856
1077
  )
857
1078
 
1079
+ def _transform_inline_math(self, elem: ET._Element) -> ET._Element:
1080
+ """
1081
+ Creates an inline LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1082
+
1083
+ :see: https://help.narva.net/latex-math-for-confluence/
1084
+ """
1085
+
1086
+ content = elem.text or ""
1087
+ if not content:
1088
+ raise DocumentError("empty inline LaTeX formula")
1089
+
1090
+ LOGGER.debug("Found inline LaTeX formula: %s", content)
1091
+
1092
+ local_id = str(uuid.uuid4())
1093
+ macro_id = str(uuid.uuid4())
1094
+ macro = AC(
1095
+ "structured-macro",
1096
+ {
1097
+ ET.QName(namespaces["ac"], "name"): "eazy-math-inline",
1098
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1099
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1100
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1101
+ },
1102
+ AC(
1103
+ "parameter",
1104
+ {ET.QName(namespaces["ac"], "name"): "body"},
1105
+ content,
1106
+ ),
1107
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1108
+ )
1109
+ macro.tail = elem.tail # chain sibling text node that immediately follows original element
1110
+ return macro
1111
+
1112
+ def _transform_block_math(self, elem: ET._Element) -> ET._Element:
1113
+ """
1114
+ Creates a block-level LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1115
+
1116
+ :see: https://help.narva.net/latex-math-for-confluence/
1117
+ """
1118
+
1119
+ content = elem.text or ""
1120
+ if not content:
1121
+ raise DocumentError("empty block-level LaTeX formula")
1122
+
1123
+ LOGGER.debug("Found block-level LaTeX formula: %s", content)
1124
+
1125
+ local_id = str(uuid.uuid4())
1126
+ macro_id = str(uuid.uuid4())
1127
+
1128
+ return AC(
1129
+ "structured-macro",
1130
+ {
1131
+ ET.QName(namespaces["ac"], "name"): "easy-math-block",
1132
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1133
+ "data-layout": "default",
1134
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1135
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1136
+ },
1137
+ AC(
1138
+ "parameter",
1139
+ {ET.QName(namespaces["ac"], "name"): "body"},
1140
+ content,
1141
+ ),
1142
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1143
+ )
1144
+
1145
+ def _transform_footnote_ref(self, elem: ET._Element) -> None:
1146
+ """
1147
+ Transforms a footnote reference.
1148
+
1149
+ ```
1150
+ <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1151
+ ```
1152
+ """
1153
+
1154
+ if elem.tag != "sup":
1155
+ raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
1156
+
1157
+ ref_id = elem.attrib.pop("id", "")
1158
+ if not ref_id.startswith("fnref:"):
1159
+ raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1160
+ footnote_ref = ref_id.removeprefix("fnref:")
1161
+
1162
+ link = elem[0]
1163
+ def_href = link.attrib.pop("href", "")
1164
+ if not def_href.startswith("#fn:"):
1165
+ raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
1166
+ footnote_def = def_href.removeprefix("#fn:")
1167
+
1168
+ text = link.text or ""
1169
+
1170
+ # remove link generated by Python-Markdown
1171
+ elem.remove(link)
1172
+
1173
+ # build new anchor for footnote reference
1174
+ ref_anchor = AC(
1175
+ "structured-macro",
1176
+ {
1177
+ ET.QName(namespaces["ac"], "name"): "anchor",
1178
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1179
+ },
1180
+ AC(
1181
+ "parameter",
1182
+ {ET.QName(namespaces["ac"], "name"): ""},
1183
+ f"footnote-ref-{footnote_ref}",
1184
+ ),
1185
+ )
1186
+
1187
+ # build new link to footnote definition at the end of page
1188
+ def_link = AC(
1189
+ "link",
1190
+ {
1191
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-def-{footnote_def}",
1192
+ },
1193
+ AC("link-body", ET.CDATA(text)),
1194
+ )
1195
+
1196
+ # append children synthesized for Confluence
1197
+ elem.append(ref_anchor)
1198
+ elem.append(def_link)
1199
+
1200
+ def _transform_footnote_def(self, elem: ET._Element) -> None:
1201
+ """
1202
+ Transforms the footnote definition block.
1203
+
1204
+ ```
1205
+ <div class="footnote">
1206
+ <hr/>
1207
+ <ol>
1208
+ <li id="fn:NAME">
1209
+ <p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p>
1210
+ </li>
1211
+ </ol>
1212
+ </div>
1213
+ ```
1214
+ """
1215
+
1216
+ for list_item in elem[1]:
1217
+ def_id = list_item.attrib.pop("id", "")
1218
+ if not def_id.startswith("fn:"):
1219
+ raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1220
+ footnote_def = def_id.removeprefix("fn:")
1221
+
1222
+ paragraph = list_item[0]
1223
+ ref_anchor = paragraph[-1]
1224
+ if ref_anchor.tag != "a":
1225
+ raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1226
+
1227
+ ref_href = ref_anchor.attrib.get("href", "")
1228
+ if not ref_href.startswith("#fnref:"):
1229
+ raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1230
+ footnote_ref = ref_href.removeprefix("#fnref:")
1231
+
1232
+ # remove back-link generated by Python-Markdown
1233
+ paragraph.remove(ref_anchor)
1234
+
1235
+ # build new anchor for footnote definition
1236
+ def_anchor = AC(
1237
+ "structured-macro",
1238
+ {
1239
+ ET.QName(namespaces["ac"], "name"): "anchor",
1240
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1241
+ },
1242
+ AC(
1243
+ "parameter",
1244
+ {ET.QName(namespaces["ac"], "name"): ""},
1245
+ f"footnote-def-{footnote_def}",
1246
+ ),
1247
+ )
1248
+
1249
+ # build new link to footnote reference in page body
1250
+ ref_link = AC(
1251
+ "link",
1252
+ {
1253
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-ref-{footnote_ref}",
1254
+ },
1255
+ AC("link-body", ET.CDATA("↩")),
1256
+ )
1257
+
1258
+ # append children synthesized for Confluence
1259
+ paragraph.insert(0, def_anchor)
1260
+ def_anchor.tail = paragraph.text
1261
+ paragraph.text = None
1262
+ paragraph.append(ref_link)
1263
+
858
1264
  def transform(self, child: ET._Element) -> Optional[ET._Element]:
1265
+ """
1266
+ Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
1267
+ """
1268
+
859
1269
  # normalize line breaks to regular space in element text
860
1270
  if child.text:
861
1271
  text: str = child.text
@@ -888,6 +1298,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
888
1298
  elif child.tag == "p" and "".join(child.itertext()) in ["[[TOC]]", "[TOC]"]:
889
1299
  return self._transform_toc(child)
890
1300
 
1301
+ # <p>[[_LISTING_]]</p>
1302
+ elif child.tag == "p" and "".join(child.itertext()) in ["[[LISTING]]", "[LISTING]"]:
1303
+ return self._transform_listing(child)
1304
+
891
1305
  # <div class="admonition note">
892
1306
  # <p class="admonition-title">Note</p>
893
1307
  # <p>...</p>
@@ -905,13 +1319,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
905
1319
  # <blockquote>
906
1320
  # <p>[!TIP] ...</p>
907
1321
  # </blockquote>
908
- elif (
909
- child.tag == "blockquote"
910
- and len(child) > 0
911
- and child[0].tag == "p"
912
- and child[0].text is not None
913
- and child[0].text.startswith("[!")
914
- ):
1322
+ elif child.tag == "blockquote" and len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
915
1323
  return self._transform_github_alert(child)
916
1324
 
917
1325
  # Alerts in GitLab
@@ -923,9 +1331,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
923
1331
  and len(child) > 0
924
1332
  and child[0].tag == "p"
925
1333
  and child[0].text is not None
926
- and starts_with_any(
927
- child[0].text, ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]
928
- )
1334
+ and starts_with_any(child[0].text, ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"])
929
1335
  ):
930
1336
  return self._transform_gitlab_alert(child)
931
1337
 
@@ -946,20 +1352,35 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
946
1352
 
947
1353
  # <pre><code class="language-java"> ... </code></pre>
948
1354
  elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
949
- return self._transform_block(child[0])
1355
+ return self._transform_code_block(child[0])
950
1356
 
1357
+ # <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
951
1358
  elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
952
1359
  return self._transform_emoji(child)
953
1360
 
954
- return None
1361
+ # <div class="arithmatex">...</div>
1362
+ elif child.tag == "div" and "arithmatex" in child.attrib.get("class", "").split(" "):
1363
+ return self._transform_block_math(child)
955
1364
 
1365
+ # <span class="arithmatex">...</span>
1366
+ elif child.tag == "span" and "arithmatex" in child.attrib.get("class", "").split(" "):
1367
+ return self._transform_inline_math(child)
956
1368
 
957
- class ConfluenceStorageFormatCleaner(NodeVisitor):
958
- "Removes volatile attributes from a Confluence storage format XHTML document."
1369
+ # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1370
+ elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1371
+ self._transform_footnote_ref(child)
1372
+ return None
1373
+
1374
+ # <div class="footnote">
1375
+ # <hr/>
1376
+ # <ol>
1377
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1378
+ # </ol>
1379
+ # </div>
1380
+ elif child.tag == "div" and "footnote" in child.attrib.get("class", "").split(" "):
1381
+ self._transform_footnote_def(child)
1382
+ return None
959
1383
 
960
- def transform(self, child: ET._Element) -> Optional[ET._Element]:
961
- child.attrib.pop(ET.QName(namespaces["ac"], "macro-id"), None)
962
- child.attrib.pop(ET.QName(namespaces["ri"], "version-at-save"), None)
963
1384
  return None
964
1385
 
965
1386
 
@@ -990,6 +1411,8 @@ class ConfluenceDocumentOptions:
990
1411
  :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
991
1412
  :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
992
1413
  :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
1414
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
1415
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
993
1416
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
994
1417
  :param diagram_output_format: Target image format for diagrams.
995
1418
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
@@ -1000,6 +1423,8 @@ class ConfluenceDocumentOptions:
1000
1423
  generated_by: Optional[str] = "This page has been generated with a tool."
1001
1424
  root_page_id: Optional[ConfluencePageID] = None
1002
1425
  keep_hierarchy: bool = False
1426
+ prefer_raster: bool = True
1427
+ render_drawio: bool = False
1003
1428
  render_mermaid: bool = False
1004
1429
  diagram_output_format: Literal["png", "svg"] = "png"
1005
1430
  webui_links: bool = False
@@ -1012,6 +1437,7 @@ class ConversionError(RuntimeError):
1012
1437
  class ConfluenceDocument:
1013
1438
  title: Optional[str]
1014
1439
  labels: Optional[list[str]]
1440
+ properties: Optional[dict[str, JsonType]]
1015
1441
  links: list[str]
1016
1442
  images: list[Path]
1017
1443
 
@@ -1041,9 +1467,7 @@ class ConfluenceDocument:
1041
1467
  else:
1042
1468
  raise PageError("missing Confluence page ID")
1043
1469
 
1044
- return page_id, ConfluenceDocument(
1045
- path, document, options, root_dir, site_metadata, page_metadata
1046
- )
1470
+ return page_id, ConfluenceDocument(path, document, options, root_dir, site_metadata, page_metadata)
1047
1471
 
1048
1472
  def __init__(
1049
1473
  self,
@@ -1086,6 +1510,8 @@ class ConfluenceDocument:
1086
1510
  ConfluenceConverterOptions(
1087
1511
  ignore_invalid_url=self.options.ignore_invalid_url,
1088
1512
  heading_anchors=self.options.heading_anchors,
1513
+ prefer_raster=self.options.prefer_raster,
1514
+ render_drawio=self.options.render_drawio,
1089
1515
  render_mermaid=self.options.render_mermaid,
1090
1516
  diagram_output_format=self.options.diagram_output_format,
1091
1517
  webui_links=self.options.webui_links,
@@ -1102,32 +1528,42 @@ class ConfluenceDocument:
1102
1528
 
1103
1529
  self.title = document.title or converter.toc.get_title()
1104
1530
  self.labels = document.tags
1531
+ self.properties = document.properties
1105
1532
 
1106
1533
  def xhtml(self) -> str:
1107
1534
  return elements_to_string(self.root)
1108
1535
 
1109
1536
 
1110
- def attachment_name(name: Union[Path, str]) -> str:
1537
+ def attachment_name(ref: Union[Path, str]) -> str:
1111
1538
  """
1112
1539
  Safe name for use with attachment uploads.
1113
1540
 
1541
+ Mutates a relative path such that it meets Confluence's attachment naming requirements.
1542
+
1114
1543
  Allowed characters:
1544
+
1115
1545
  * Alphanumeric characters: 0-9, a-z, A-Z
1116
1546
  * Special characters: hyphen (-), underscore (_), period (.)
1117
1547
  """
1118
1548
 
1119
- return re.sub(r"[^\-0-9A-Za-z_.]", "_", str(name))
1549
+ if isinstance(ref, Path):
1550
+ path = ref
1551
+ else:
1552
+ path = Path(ref)
1120
1553
 
1554
+ if path.drive or path.root:
1555
+ raise ValueError(f"required: relative path; got: {ref}")
1121
1556
 
1122
- def sanitize_confluence(html: str) -> str:
1123
- "Generates a sanitized version of a Confluence storage format XHTML document with no volatile attributes."
1557
+ regexp = re.compile(r"[^\-0-9A-Za-z_.]", re.UNICODE)
1124
1558
 
1125
- if not html:
1126
- return ""
1559
+ def replace_part(part: str) -> str:
1560
+ if part == "..":
1561
+ return "PAR"
1562
+ else:
1563
+ return regexp.sub("_", part)
1127
1564
 
1128
- root = elements_from_strings([html])
1129
- ConfluenceStorageFormatCleaner().visit(root)
1130
- return elements_to_string(root)
1565
+ parts = [replace_part(p) for p in path.parts]
1566
+ return Path(*parts).as_posix().replace("/", "_")
1131
1567
 
1132
1568
 
1133
1569
  def elements_to_string(root: ET._Element) -> str:
@@ -1147,14 +1583,11 @@ def _content_to_string(dtd_path: Path, content: str) -> str:
1147
1583
  load_dtd=True,
1148
1584
  )
1149
1585
 
1150
- ns_attr_list = "".join(
1151
- f' xmlns:{key}="{value}"' for key, value in namespaces.items()
1152
- )
1586
+ ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
1153
1587
 
1154
1588
  data = [
1155
1589
  '<?xml version="1.0"?>',
1156
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}">'
1157
- f"<root{ns_attr_list}>",
1590
+ f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
1158
1591
  ]
1159
1592
  data.append(content)
1160
1593
  data.append("</root>")