markdown-to-confluence 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -8,26 +8,27 @@ Copyright 2022-2025, Levente Hunyadi
8
8
 
9
9
  # mypy: disable-error-code="dict-item"
10
10
 
11
+ import dataclasses
11
12
  import hashlib
12
13
  import importlib.resources as resources
13
14
  import logging
14
15
  import os.path
15
16
  import re
16
17
  import uuid
17
- import xml.etree.ElementTree
18
18
  from dataclasses import dataclass
19
19
  from pathlib import Path
20
20
  from typing import Any, Literal, Optional, Union
21
21
  from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
22
22
 
23
23
  import lxml.etree as ET
24
- import markdown
25
24
  from lxml.builder import ElementMaker
26
25
  from strong_typing.core import JsonType
27
26
 
27
+ from . import drawio, mermaid
28
28
  from .collection import ConfluencePageCollection
29
+ from .domain import ConfluenceDocumentOptions, ConfluencePageID
29
30
  from .extra import path_relative_to
30
- from .mermaid import render_diagram
31
+ from .markdown import markdown_to_html
31
32
  from .metadata import ConfluenceSiteMetadata
32
33
  from .properties import PageError
33
34
  from .scanner import ScannedDocument, Scanner
@@ -39,6 +40,17 @@ namespaces = {
39
40
  for key, value in namespaces.items():
40
41
  ET.register_namespace(key, value)
41
42
 
43
+
44
+ def get_volatile_attributes() -> list[ET.QName]:
45
+ "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
46
+
47
+ return [
48
+ ET.QName(namespaces["ac"], "local-id"),
49
+ ET.QName(namespaces["ac"], "macro-id"),
50
+ ET.QName(namespaces["ri"], "version-at-save"),
51
+ ]
52
+
53
+
42
54
  HTML = ElementMaker()
43
55
  AC = ElementMaker(namespace=namespaces["ac"])
44
56
  RI = ElementMaker(namespace=namespaces["ri"])
@@ -88,55 +100,6 @@ def encode_title(text: str) -> str:
88
100
  return quote_plus(text.strip())
89
101
 
90
102
 
91
- def emoji_generator(
92
- index: str,
93
- shortname: str,
94
- alias: Optional[str],
95
- uc: Optional[str],
96
- alt: str,
97
- title: Optional[str],
98
- category: Optional[str],
99
- options: dict[str, Any],
100
- md: markdown.Markdown,
101
- ) -> xml.etree.ElementTree.Element:
102
- name = (alias or shortname).strip(":")
103
- span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
104
- if uc is not None:
105
- span.attrib["data-emoji-unicode"] = uc
106
-
107
- # convert series of Unicode code point hexadecimal values into characters
108
- span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
109
- else:
110
- span.text = alt
111
- return span
112
-
113
-
114
- def markdown_to_html(content: str) -> str:
115
- return markdown.markdown(
116
- content,
117
- extensions=[
118
- "admonition",
119
- "markdown.extensions.tables",
120
- # "markdown.extensions.fenced_code",
121
- "pymdownx.emoji",
122
- "pymdownx.highlight", # required by `pymdownx.superfences`
123
- "pymdownx.magiclink",
124
- "pymdownx.superfences",
125
- "pymdownx.tilde",
126
- "sane_lists",
127
- "md_in_html",
128
- ],
129
- extension_configs={
130
- "pymdownx.emoji": {
131
- "emoji_generator": emoji_generator,
132
- },
133
- "pymdownx.highlight": {
134
- "use_pygments": False,
135
- },
136
- },
137
- )
138
-
139
-
140
103
  def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
141
104
  """
142
105
  Creates a fragment of several XML nodes from their string representation wrapped in a root element.
@@ -285,8 +248,8 @@ def title_to_identifier(title: str) -> str:
285
248
  "Converts a section heading title to a GitHub-style Markdown same-page anchor."
286
249
 
287
250
  s = title.strip().lower()
288
- s = re.sub("[^ A-Za-z0-9]", "", s)
289
- s = s.replace(" ", "-")
251
+ s = re.sub(r"[^\sA-Za-z0-9_\-]", "", s)
252
+ s = re.sub(r"\s+", "-", s)
290
253
  return s
291
254
 
292
255
 
@@ -296,6 +259,13 @@ def element_to_text(node: ET._Element) -> str:
296
259
  return "".join(node.itertext()).strip()
297
260
 
298
261
 
262
+ @dataclass
263
+ class ImageAttributes:
264
+ caption: Optional[str]
265
+ width: Optional[str]
266
+ height: Optional[str]
267
+
268
+
299
269
  @dataclass
300
270
  class TableOfContentsEntry:
301
271
  level: int
@@ -346,6 +316,8 @@ class ConfluenceConverterOptions:
346
316
  plain text; when false, raise an exception.
347
317
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
348
318
  conversion rules for the identifier.
319
+ :param prefer_raster: Whether to choose PNG files over SVG files when available.
320
+ :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
349
321
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
350
322
  :param diagram_output_format: Target image format for diagrams.
351
323
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
@@ -353,13 +325,15 @@ class ConfluenceConverterOptions:
353
325
 
354
326
  ignore_invalid_url: bool = False
355
327
  heading_anchors: bool = False
328
+ prefer_raster: bool = True
329
+ render_drawio: bool = False
356
330
  render_mermaid: bool = False
357
331
  diagram_output_format: Literal["png", "svg"] = "png"
358
332
  webui_links: bool = False
359
333
 
360
334
 
361
335
  class ConfluenceStorageFormatConverter(NodeVisitor):
362
- "Transforms a plain HTML tree into the Confluence storage format."
336
+ "Transforms a plain HTML tree into Confluence Storage Format."
363
337
 
364
338
  options: ConfluenceConverterOptions
365
339
  path: Path
@@ -397,6 +371,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
397
371
  self.page_metadata = page_metadata
398
372
 
399
373
  def _transform_heading(self, heading: ET._Element) -> None:
374
+ "Adds anchors to headings in the same document (if *heading anchors* is enabled)."
375
+
400
376
  for e in heading:
401
377
  self.visit(e)
402
378
 
@@ -427,6 +403,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
427
403
  raise DocumentError(msg)
428
404
 
429
405
  def _transform_link(self, anchor: ET._Element) -> Optional[ET._Element]:
406
+ """
407
+ Transforms links (HTML anchor `<a>`).
408
+
409
+ * Absolute URLs are left intact.
410
+ * Links to headings in the same document are transformed into `<ac:link>` (if *heading anchors* is enabled).
411
+ * Links to documents in the source hierarchy are mapped into full Confluence URLs.
412
+ """
413
+
430
414
  url = anchor.attrib.get("href")
431
415
  if url is None or is_absolute_url(url):
432
416
  return None
@@ -451,7 +435,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
451
435
  link_wrapper.tail = anchor.tail
452
436
  return link_wrapper
453
437
  else:
454
- anchor.attrib["href"] = url
455
438
  return None
456
439
 
457
440
  # convert the relative URL to absolute URL based on the base path value, then look up
@@ -474,7 +457,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
474
457
  raise DocumentError(msg)
475
458
 
476
459
  relative_path = os.path.relpath(absolute_path, self.base_dir)
477
- LOGGER.debug("found link to page %s with metadata: %s", relative_path, link_metadata)
460
+ LOGGER.debug("Found link to page %s with metadata: %s", relative_path, link_metadata)
478
461
  self.links.append(url)
479
462
 
480
463
  if self.options.webui_links:
@@ -502,32 +485,46 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
502
485
  return None
503
486
 
504
487
  def _transform_image(self, image: ET._Element) -> ET._Element:
488
+ "Inserts an attached or external image."
489
+
505
490
  src = image.attrib.get("src")
506
491
 
507
492
  if not src:
508
493
  raise DocumentError("image lacks `src` attribute")
509
494
 
510
- attributes: dict[str, Any] = {
511
- ET.QName(namespaces["ac"], "align"): "center",
512
- ET.QName(namespaces["ac"], "layout"): "center",
513
- }
495
+ caption = image.attrib.get("alt")
514
496
  width = image.attrib.get("width")
515
- if width is not None:
516
- attributes.update({ET.QName(namespaces["ac"], "width"): width})
517
497
  height = image.attrib.get("height")
518
- if height is not None:
519
- attributes.update({ET.QName(namespaces["ac"], "height"): height})
520
-
521
- caption = image.attrib.get("alt")
498
+ attrs = ImageAttributes(caption, width, height)
522
499
 
523
500
  if is_absolute_url(src):
524
- return self._transform_external_image(src, caption, attributes)
501
+ return self._transform_external_image(src, attrs)
525
502
  else:
526
- return self._transform_attached_image(Path(src), caption, attributes)
503
+ path = Path(src)
504
+
505
+ absolute_path = self._verify_image_path(path)
506
+ if absolute_path is None:
507
+ return self._create_missing(path, caption)
527
508
 
528
- def _transform_external_image(self, url: str, caption: Optional[str], attributes: dict[str, Any]) -> ET._Element:
509
+ if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
510
+ return self._transform_drawio_image(absolute_path, attrs)
511
+ elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
512
+ return self._transform_drawio(absolute_path, attrs)
513
+ else:
514
+ return self._transform_attached_image(absolute_path, attrs)
515
+
516
+ def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ET._Element:
529
517
  "Emits Confluence Storage Format XHTML for an external image."
530
518
 
519
+ attributes: dict[str, Any] = {
520
+ ET.QName(namespaces["ac"], "align"): "center",
521
+ ET.QName(namespaces["ac"], "layout"): "center",
522
+ }
523
+ if attrs.width is not None:
524
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
525
+ if attrs.height is not None:
526
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
527
+
531
528
  elements: list[ET._Element] = []
532
529
  elements.append(
533
530
  RI(
@@ -536,33 +533,84 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
536
533
  {ET.QName(namespaces["ri"], "value"): url},
537
534
  )
538
535
  )
539
- if caption is not None:
540
- elements.append(AC("caption", HTML.p(caption)))
536
+ if attrs.caption is not None:
537
+ elements.append(AC("caption", HTML.p(attrs.caption)))
541
538
 
542
539
  return AC("image", attributes, *elements)
543
540
 
544
- def _transform_attached_image(self, path: Path, caption: Optional[str], attributes: dict[str, Any]) -> ET._Element:
545
- "Emits Confluence Storage Format XHTML for an attached image."
541
+ def _verify_image_path(self, path: Path) -> Optional[Path]:
542
+ "Checks whether an image path is safe to use."
546
543
 
547
544
  # resolve relative path into absolute path w.r.t. base dir
548
545
  absolute_path = (self.base_dir / path).resolve()
549
546
 
550
- if absolute_path.exists():
547
+ if not absolute_path.exists():
548
+ self._warn_or_raise(f"path to image {path} does not exist")
549
+ return None
550
+
551
+ if not is_directory_within(absolute_path, self.root_dir):
552
+ self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
553
+ return None
554
+
555
+ return absolute_path
556
+
557
+ def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
558
+ "Emits Confluence Storage Format XHTML for an attached raster or vector image."
559
+
560
+ if self.options.prefer_raster and absolute_path.name.endswith(".svg"):
551
561
  # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
552
- if absolute_path.suffix == ".svg":
553
- png_file = absolute_path.with_suffix(".png")
554
- if png_file.exists():
555
- absolute_path = png_file
556
-
557
- if is_directory_within(absolute_path, self.root_dir):
558
- self.images.append(absolute_path)
559
- image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
560
- else:
561
- image_name = ""
562
- self._warn_or_raise(f"path to image {path} points to outside root path {self.root_dir}")
562
+ png_file = absolute_path.with_suffix(".png")
563
+ if png_file.exists():
564
+ absolute_path = png_file
565
+
566
+ self.images.append(absolute_path)
567
+ image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
568
+ return self._create_attached_image(image_name, attrs)
569
+
570
+ def _transform_drawio(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
571
+ "Emits Confluence Storage Format XHTML for a draw.io diagram."
572
+
573
+ if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
574
+ raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
575
+
576
+ if self.options.render_drawio:
577
+ image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
578
+ image_hash = hashlib.md5(image_data).hexdigest()
579
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
580
+ self.embedded_images[image_filename] = image_data
581
+ return self._create_attached_image(image_filename, attrs)
563
582
  else:
564
- image_name = ""
565
- self._warn_or_raise(f"path to image {path} does not exist")
583
+ self.images.append(absolute_path)
584
+ image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
585
+ return self._create_drawio(image_filename, attrs)
586
+
587
+ def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
588
+ "Emits Confluence Storage Format XHTML for a draw.io diagram embedded in a PNG or SVG image."
589
+
590
+ if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
591
+ raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
592
+
593
+ if self.options.render_drawio:
594
+ return self._transform_attached_image(absolute_path, attrs)
595
+ else:
596
+ # extract embedded editable diagram and upload as *.drawio
597
+ image_data = drawio.extract_diagram(absolute_path)
598
+ image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
599
+ self.embedded_images[image_filename] = image_data
600
+
601
+ return self._create_drawio(image_filename, attrs)
602
+
603
+ def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ET._Element:
604
+ "An image embedded into the page, linking to an attachment."
605
+
606
+ attributes: dict[str, Any] = {
607
+ ET.QName(namespaces["ac"], "align"): "center",
608
+ ET.QName(namespaces["ac"], "layout"): "center",
609
+ }
610
+ if attrs.width is not None:
611
+ attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
612
+ if attrs.height is not None:
613
+ attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
566
614
 
567
615
  elements: list[ET._Element] = []
568
616
  elements.append(
@@ -572,12 +620,80 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
572
620
  {ET.QName(namespaces["ri"], "filename"): image_name},
573
621
  )
574
622
  )
575
- if caption is not None:
576
- elements.append(AC("caption", HTML.p(caption)))
623
+ if attrs.caption is not None:
624
+ elements.append(AC("caption", HTML.p(attrs.caption)))
577
625
 
578
626
  return AC("image", attributes, *elements)
579
627
 
580
- def _transform_block(self, code: ET._Element) -> ET._Element:
628
+ def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
629
+ "A draw.io diagram embedded into the page, linking to an attachment."
630
+
631
+ parameters: list[ET._Element] = [
632
+ AC(
633
+ "parameter",
634
+ {ET.QName(namespaces["ac"], "name"): "diagramName"},
635
+ filename,
636
+ ),
637
+ ]
638
+ if attrs.width is not None:
639
+ parameters.append(
640
+ AC(
641
+ "parameter",
642
+ {ET.QName(namespaces["ac"], "name"): "width"},
643
+ attrs.width,
644
+ ),
645
+ )
646
+ if attrs.height is not None:
647
+ parameters.append(
648
+ AC(
649
+ "parameter",
650
+ {ET.QName(namespaces["ac"], "name"): "height"},
651
+ attrs.height,
652
+ ),
653
+ )
654
+
655
+ local_id = str(uuid.uuid4())
656
+ macro_id = str(uuid.uuid4())
657
+ return AC(
658
+ "structured-macro",
659
+ {
660
+ ET.QName(namespaces["ac"], "name"): "drawio",
661
+ ET.QName(namespaces["ac"], "schema-version"): "1",
662
+ "data-layout": "default",
663
+ ET.QName(namespaces["ac"], "local-id"): local_id,
664
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
665
+ },
666
+ *parameters,
667
+ )
668
+
669
+ def _create_missing(self, path: Path, caption: Optional[str]) -> ET._Element:
670
+ "A warning panel for a missing image."
671
+
672
+ message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
673
+ if caption is not None:
674
+ content = [
675
+ AC(
676
+ "parameter",
677
+ {ET.QName(namespaces["ac"], "name"): "title"},
678
+ caption,
679
+ ),
680
+ AC("rich-text-body", {}, message),
681
+ ]
682
+ else:
683
+ content = [AC("rich-text-body", {}, message)]
684
+
685
+ return AC(
686
+ "structured-macro",
687
+ {
688
+ ET.QName(namespaces["ac"], "name"): "warning",
689
+ ET.QName(namespaces["ac"], "schema-version"): "1",
690
+ },
691
+ *content,
692
+ )
693
+
694
+ def _transform_code_block(self, code: ET._Element) -> ET._Element:
695
+ "Transforms a code block."
696
+
581
697
  language = code.attrib.get("class")
582
698
  if language:
583
699
  m = re.match("^language-(.*)$", language)
@@ -616,21 +732,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
616
732
  "Transforms a Mermaid diagram code block."
617
733
 
618
734
  if self.options.render_mermaid:
619
- image_data = render_diagram(content, self.options.diagram_output_format)
735
+ image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
620
736
  image_hash = hashlib.md5(image_data).hexdigest()
621
737
  image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
622
738
  self.embedded_images[image_filename] = image_data
623
- return AC(
624
- "image",
625
- {
626
- ET.QName(namespaces["ac"], "align"): "center",
627
- ET.QName(namespaces["ac"], "layout"): "center",
628
- },
629
- RI(
630
- "attachment",
631
- {ET.QName(namespaces["ri"], "filename"): image_filename},
632
- ),
633
- )
739
+ return self._create_attached_image(image_filename, ImageAttributes(None, None, None))
634
740
  else:
635
741
  local_id = str(uuid.uuid4())
636
742
  macro_id = str(uuid.uuid4())
@@ -639,7 +745,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
639
745
  {
640
746
  ET.QName(namespaces["ac"], "name"): "macro-diagram",
641
747
  ET.QName(namespaces["ac"], "schema-version"): "1",
642
- ET.QName(namespaces["ac"], "data-layout"): "default",
748
+ "data-layout": "default",
643
749
  ET.QName(namespaces["ac"], "local-id"): local_id,
644
750
  ET.QName(namespaces["ac"], "macro-id"): macro_id,
645
751
  },
@@ -666,6 +772,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
666
772
  )
667
773
 
668
774
  def _transform_toc(self, code: ET._Element) -> ET._Element:
775
+ "Creates a table of contents, constructed from headings in the document."
776
+
669
777
  return AC(
670
778
  "structured-macro",
671
779
  {
@@ -676,6 +784,19 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
676
784
  AC("parameter", {ET.QName(namespaces["ac"], "name"): "style"}, "default"),
677
785
  )
678
786
 
787
+ def _transform_listing(self, code: ET._Element) -> ET._Element:
788
+ "Creates a list of child pages."
789
+
790
+ return AC(
791
+ "structured-macro",
792
+ {
793
+ ET.QName(namespaces["ac"], "name"): "children",
794
+ ET.QName(namespaces["ac"], "schema-version"): "2",
795
+ "data-layout": "default",
796
+ },
797
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "allChildren"}, "true"),
798
+ )
799
+
679
800
  def _transform_admonition(self, elem: ET._Element) -> ET._Element:
680
801
  """
681
802
  Creates an info, tip, note or warning panel from a Markdown admonition.
@@ -725,6 +846,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
725
846
  )
726
847
 
727
848
  def _transform_github_alert(self, elem: ET._Element) -> ET._Element:
849
+ """
850
+ Creates a GitHub-style panel, normally triggered with a block-quote starting with a capitalized string such as `[!TIP]`.
851
+ """
852
+
728
853
  content = elem[0]
729
854
  if content.text is None:
730
855
  raise DocumentError("empty content")
@@ -753,6 +878,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
753
878
  return self._transform_alert(elem, class_name, skip)
754
879
 
755
880
  def _transform_gitlab_alert(self, elem: ET._Element) -> ET._Element:
881
+ """
882
+ Creates a classic GitLab-style panel.
883
+
884
+ Classic panels are defined with a block-quote and text starting with a capitalized string such as `DISCLAIMER:`.
885
+ This syntax does not use Hugo shortcode.
886
+ """
887
+
756
888
  content = elem[0]
757
889
  if content.text is None:
758
890
  raise DocumentError("empty content")
@@ -842,6 +974,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
842
974
  )
843
975
 
844
976
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
977
+ """
978
+ Inserts an inline emoji character.
979
+ """
980
+
845
981
  shortname = elem.attrib.get("data-emoji-shortname", "")
846
982
  unicode = elem.attrib.get("data-emoji-unicode", None)
847
983
  alt = elem.text or ""
@@ -852,7 +988,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
852
988
  return AC(
853
989
  "emoticon",
854
990
  {
855
- # use "blue-star" as a placeholder name to ensure wiki page loads in timely manner
856
991
  ET.QName(namespaces["ac"], "name"): shortname,
857
992
  ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
858
993
  ET.QName(namespaces["ac"], "emoji-id"): unicode,
@@ -860,7 +995,196 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
860
995
  },
861
996
  )
862
997
 
998
+ def _transform_inline_math(self, elem: ET._Element) -> ET._Element:
999
+ """
1000
+ Creates an inline LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1001
+
1002
+ :see: https://help.narva.net/latex-math-for-confluence/
1003
+ """
1004
+
1005
+ content = elem.text or ""
1006
+ if not content:
1007
+ raise DocumentError("empty inline LaTeX formula")
1008
+
1009
+ LOGGER.debug("Found inline LaTeX formula: %s", content)
1010
+
1011
+ local_id = str(uuid.uuid4())
1012
+ macro_id = str(uuid.uuid4())
1013
+ macro = AC(
1014
+ "structured-macro",
1015
+ {
1016
+ ET.QName(namespaces["ac"], "name"): "eazy-math-inline",
1017
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1018
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1019
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1020
+ },
1021
+ AC(
1022
+ "parameter",
1023
+ {ET.QName(namespaces["ac"], "name"): "body"},
1024
+ content,
1025
+ ),
1026
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1027
+ )
1028
+ macro.tail = elem.tail # chain sibling text node that immediately follows original element
1029
+ return macro
1030
+
1031
+ def _transform_block_math(self, elem: ET._Element) -> ET._Element:
1032
+ """
1033
+ Creates a block-level LaTeX formula using the Confluence extension "LaTeX Math for Confluence - Math Formula & Equations".
1034
+
1035
+ :see: https://help.narva.net/latex-math-for-confluence/
1036
+ """
1037
+
1038
+ content = elem.text or ""
1039
+ if not content:
1040
+ raise DocumentError("empty block-level LaTeX formula")
1041
+
1042
+ LOGGER.debug("Found block-level LaTeX formula: %s", content)
1043
+
1044
+ local_id = str(uuid.uuid4())
1045
+ macro_id = str(uuid.uuid4())
1046
+
1047
+ return AC(
1048
+ "structured-macro",
1049
+ {
1050
+ ET.QName(namespaces["ac"], "name"): "easy-math-block",
1051
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1052
+ "data-layout": "default",
1053
+ ET.QName(namespaces["ac"], "local-id"): local_id,
1054
+ ET.QName(namespaces["ac"], "macro-id"): macro_id,
1055
+ },
1056
+ AC(
1057
+ "parameter",
1058
+ {ET.QName(namespaces["ac"], "name"): "body"},
1059
+ content,
1060
+ ),
1061
+ AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1062
+ )
1063
+
1064
+ def _transform_footnote_ref(self, elem: ET._Element) -> None:
1065
+ """
1066
+ Transforms a footnote reference.
1067
+
1068
+ ```
1069
+ <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1070
+ ```
1071
+ """
1072
+
1073
+ if elem.tag != "sup":
1074
+ raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
1075
+
1076
+ ref_id = elem.attrib.pop("id", "")
1077
+ if not ref_id.startswith("fnref:"):
1078
+ raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1079
+ footnote_ref = ref_id.removeprefix("fnref:")
1080
+
1081
+ link = elem[0]
1082
+ def_href = link.attrib.pop("href", "")
1083
+ if not def_href.startswith("#fn:"):
1084
+ raise DocumentError("expected: attribute `href` of format `#fn:NAME` applied on `<a>` for a footnote reference")
1085
+ footnote_def = def_href.removeprefix("#fn:")
1086
+
1087
+ text = link.text or ""
1088
+
1089
+ # remove link generated by Python-Markdown
1090
+ elem.remove(link)
1091
+
1092
+ # build new anchor for footnote reference
1093
+ ref_anchor = AC(
1094
+ "structured-macro",
1095
+ {
1096
+ ET.QName(namespaces["ac"], "name"): "anchor",
1097
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1098
+ },
1099
+ AC(
1100
+ "parameter",
1101
+ {ET.QName(namespaces["ac"], "name"): ""},
1102
+ f"footnote-ref-{footnote_ref}",
1103
+ ),
1104
+ )
1105
+
1106
+ # build new link to footnote definition at the end of page
1107
+ def_link = AC(
1108
+ "link",
1109
+ {
1110
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-def-{footnote_def}",
1111
+ },
1112
+ AC("link-body", ET.CDATA(text)),
1113
+ )
1114
+
1115
+ # append children synthesized for Confluence
1116
+ elem.append(ref_anchor)
1117
+ elem.append(def_link)
1118
+
1119
+ def _transform_footnote_def(self, elem: ET._Element) -> None:
1120
+ """
1121
+ Transforms the footnote definition block.
1122
+
1123
+ ```
1124
+ <div class="footnote">
1125
+ <hr/>
1126
+ <ol>
1127
+ <li id="fn:NAME">
1128
+ <p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p>
1129
+ </li>
1130
+ </ol>
1131
+ </div>
1132
+ ```
1133
+ """
1134
+
1135
+ for list_item in elem[1]:
1136
+ def_id = list_item.attrib.pop("id", "")
1137
+ if not def_id.startswith("fn:"):
1138
+ raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1139
+ footnote_def = def_id.removeprefix("fn:")
1140
+
1141
+ paragraph = list_item[0]
1142
+ ref_anchor = paragraph[-1]
1143
+ if ref_anchor.tag != "a":
1144
+ raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1145
+
1146
+ ref_href = ref_anchor.attrib.get("href", "")
1147
+ if not ref_href.startswith("#fnref:"):
1148
+ raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1149
+ footnote_ref = ref_href.removeprefix("#fnref:")
1150
+
1151
+ # remove back-link generated by Python-Markdown
1152
+ paragraph.remove(ref_anchor)
1153
+
1154
+ # build new anchor for footnote definition
1155
+ def_anchor = AC(
1156
+ "structured-macro",
1157
+ {
1158
+ ET.QName(namespaces["ac"], "name"): "anchor",
1159
+ ET.QName(namespaces["ac"], "schema-version"): "1",
1160
+ },
1161
+ AC(
1162
+ "parameter",
1163
+ {ET.QName(namespaces["ac"], "name"): ""},
1164
+ f"footnote-def-{footnote_def}",
1165
+ ),
1166
+ )
1167
+
1168
+ # build new link to footnote reference in page body
1169
+ ref_link = AC(
1170
+ "link",
1171
+ {
1172
+ ET.QName(namespaces["ac"], "anchor"): f"footnote-ref-{footnote_ref}",
1173
+ },
1174
+ AC("link-body", ET.CDATA("↩")),
1175
+ )
1176
+
1177
+ # append children synthesized for Confluence
1178
+ paragraph.insert(0, def_anchor)
1179
+ def_anchor.tail = paragraph.text
1180
+ paragraph.text = None
1181
+ paragraph.append(ref_link)
1182
+
863
1183
  def transform(self, child: ET._Element) -> Optional[ET._Element]:
1184
+ """
1185
+ Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
1186
+ """
1187
+
864
1188
  # normalize line breaks to regular space in element text
865
1189
  if child.text:
866
1190
  text: str = child.text
@@ -893,6 +1217,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
893
1217
  elif child.tag == "p" and "".join(child.itertext()) in ["[[TOC]]", "[TOC]"]:
894
1218
  return self._transform_toc(child)
895
1219
 
1220
+ # <p>[[_LISTING_]]</p>
1221
+ elif child.tag == "p" and "".join(child.itertext()) in ["[[LISTING]]", "[LISTING]"]:
1222
+ return self._transform_listing(child)
1223
+
896
1224
  # <div class="admonition note">
897
1225
  # <p class="admonition-title">Note</p>
898
1226
  # <p>...</p>
@@ -943,20 +1271,35 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
943
1271
 
944
1272
  # <pre><code class="language-java"> ... </code></pre>
945
1273
  elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
946
- return self._transform_block(child[0])
1274
+ return self._transform_code_block(child[0])
947
1275
 
1276
+ # <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
948
1277
  elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
949
1278
  return self._transform_emoji(child)
950
1279
 
951
- return None
1280
+ # <div class="arithmatex">...</div>
1281
+ elif child.tag == "div" and "arithmatex" in child.attrib.get("class", "").split(" "):
1282
+ return self._transform_block_math(child)
952
1283
 
1284
+ # <span class="arithmatex">...</span>
1285
+ elif child.tag == "span" and "arithmatex" in child.attrib.get("class", "").split(" "):
1286
+ return self._transform_inline_math(child)
953
1287
 
954
- class ConfluenceStorageFormatCleaner(NodeVisitor):
955
- "Removes volatile attributes from a Confluence storage format XHTML document."
1288
+ # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1289
+ elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1290
+ self._transform_footnote_ref(child)
1291
+ return None
1292
+
1293
+ # <div class="footnote">
1294
+ # <hr/>
1295
+ # <ol>
1296
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1297
+ # </ol>
1298
+ # </div>
1299
+ elif child.tag == "div" and "footnote" in child.attrib.get("class", "").split(" "):
1300
+ self._transform_footnote_def(child)
1301
+ return None
956
1302
 
957
- def transform(self, child: ET._Element) -> Optional[ET._Element]:
958
- child.attrib.pop(ET.QName(namespaces["ac"], "macro-id"), None)
959
- child.attrib.pop(ET.QName(namespaces["ri"], "version-at-save"), None)
960
1303
  return None
961
1304
 
962
1305
 
@@ -964,44 +1307,6 @@ class DocumentError(RuntimeError):
964
1307
  "Raised when a converted Markdown document has an unexpected element or attribute."
965
1308
 
966
1309
 
967
- @dataclass
968
- class ConfluencePageID:
969
- page_id: str
970
-
971
-
972
- @dataclass
973
- class ConfluenceQualifiedID:
974
- page_id: str
975
- space_key: str
976
-
977
-
978
- @dataclass
979
- class ConfluenceDocumentOptions:
980
- """
981
- Options that control the generated page content.
982
-
983
- :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
984
- plain text; when false, raise an exception.
985
- :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
986
- conversion rules for the identifier.
987
- :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
988
- :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
989
- :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
990
- :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
991
- :param diagram_output_format: Target image format for diagrams.
992
- :param webui_links: When true, convert relative URLs to Confluence Web UI links.
993
- """
994
-
995
- ignore_invalid_url: bool = False
996
- heading_anchors: bool = False
997
- generated_by: Optional[str] = "This page has been generated with a tool."
998
- root_page_id: Optional[ConfluencePageID] = None
999
- keep_hierarchy: bool = False
1000
- render_mermaid: bool = False
1001
- diagram_output_format: Literal["png", "svg"] = "png"
1002
- webui_links: bool = False
1003
-
1004
-
1005
1310
  class ConversionError(RuntimeError):
1006
1311
  "Raised when a Markdown document cannot be converted to Confluence Storage Format."
1007
1312
 
@@ -1079,13 +1384,7 @@ class ConfluenceDocument:
1079
1384
  raise ConversionError(path) from ex
1080
1385
 
1081
1386
  converter = ConfluenceStorageFormatConverter(
1082
- ConfluenceConverterOptions(
1083
- ignore_invalid_url=self.options.ignore_invalid_url,
1084
- heading_anchors=self.options.heading_anchors,
1085
- render_mermaid=self.options.render_mermaid,
1086
- diagram_output_format=self.options.diagram_output_format,
1087
- webui_links=self.options.webui_links,
1088
- ),
1387
+ ConfluenceConverterOptions(**{field.name: getattr(self.options, field.name) for field in dataclasses.fields(ConfluenceConverterOptions)}),
1089
1388
  path,
1090
1389
  root_dir,
1091
1390
  site_metadata,
@@ -1136,17 +1435,6 @@ def attachment_name(ref: Union[Path, str]) -> str:
1136
1435
  return Path(*parts).as_posix().replace("/", "_")
1137
1436
 
1138
1437
 
1139
- def sanitize_confluence(html: str) -> str:
1140
- "Generates a sanitized version of a Confluence storage format XHTML document with no volatile attributes."
1141
-
1142
- if not html:
1143
- return ""
1144
-
1145
- root = elements_from_strings([html])
1146
- ConfluenceStorageFormatCleaner().visit(root)
1147
- return elements_to_string(root)
1148
-
1149
-
1150
1438
  def elements_to_string(root: ET._Element) -> str:
1151
1439
  xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
1152
1440
  m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)