markdown-to-confluence 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -6,60 +6,57 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- # mypy: disable-error-code="dict-item"
10
-
11
9
  import dataclasses
12
10
  import hashlib
13
- import importlib.resources as resources
14
11
  import logging
15
12
  import os.path
16
13
  import re
17
14
  import uuid
15
+ from abc import ABC, abstractmethod
18
16
  from dataclasses import dataclass
19
17
  from pathlib import Path
20
18
  from typing import Any, Literal, Optional, Union
21
19
  from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
22
20
 
23
21
  import lxml.etree as ET
24
- from lxml.builder import ElementMaker
25
22
  from strong_typing.core import JsonType
26
23
 
27
24
  from . import drawio, mermaid
28
25
  from .collection import ConfluencePageCollection
26
+ from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string
29
27
  from .domain import ConfluenceDocumentOptions, ConfluencePageID
30
- from .extra import path_relative_to
28
+ from .extra import override, path_relative_to
31
29
  from .markdown import markdown_to_html
32
30
  from .metadata import ConfluenceSiteMetadata
33
31
  from .properties import PageError
34
32
  from .scanner import ScannedDocument, Scanner
35
-
36
- namespaces = {
37
- "ac": "http://atlassian.com/content",
38
- "ri": "http://atlassian.com/resource/identifier",
39
- }
40
- for key, value in namespaces.items():
41
- ET.register_namespace(key, value)
33
+ from .toc import TableOfContentsBuilder
34
+ from .uri import is_absolute_url, to_uuid_urn
35
+ from .xml import element_to_text
42
36
 
43
37
 
44
- def get_volatile_attributes() -> list[ET.QName]:
38
+ def get_volatile_attributes() -> list[str]:
45
39
  "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
46
40
 
47
41
  return [
48
- ET.QName(namespaces["ac"], "local-id"),
49
- ET.QName(namespaces["ac"], "macro-id"),
50
- ET.QName(namespaces["ri"], "version-at-save"),
42
+ AC_ATTR("local-id"),
43
+ AC_ATTR("macro-id"),
44
+ RI_ATTR("version-at-save"),
51
45
  ]
52
46
 
53
47
 
54
- HTML = ElementMaker()
55
- AC = ElementMaker(namespace=namespaces["ac"])
56
- RI = ElementMaker(namespace=namespaces["ri"])
48
+ def get_volatile_elements() -> list[str]:
49
+ "Returns a list of volatile elements whose content frequently changes as a Confluence storage format XHTML document is updated."
50
+
51
+ return [AC_ATTR("task-uuid")]
57
52
 
58
- LOGGER = logging.getLogger(__name__)
59
53
 
54
+ status_images: dict[str, str] = {
55
+ to_uuid_urn(f'<svg height="10" width="10" xmlns="http://www.w3.org/2000/svg"><circle r="5" cx="5" cy="5" fill="{color}" /></svg>'): color
56
+ for color in ["gray", "purple", "blue", "red", "yellow", "green"]
57
+ }
60
58
 
61
- class ParseError(RuntimeError):
62
- pass
59
+ LOGGER = logging.getLogger(__name__)
63
60
 
64
61
 
65
62
  def starts_with_any(text: str, prefixes: list[str]) -> bool:
@@ -71,16 +68,6 @@ def starts_with_any(text: str, prefixes: list[str]) -> bool:
71
68
  return False
72
69
 
73
70
 
74
- def is_absolute_url(url: str) -> bool:
75
- urlparts = urlparse(url)
76
- return bool(urlparts.scheme) or bool(urlparts.netloc)
77
-
78
-
79
- def is_relative_url(url: str) -> bool:
80
- urlparts = urlparse(url)
81
- return not bool(urlparts.scheme) and not bool(urlparts.netloc)
82
-
83
-
84
71
  def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
85
72
  "True if the absolute path is nested within the base path."
86
73
 
@@ -100,50 +87,8 @@ def encode_title(text: str) -> str:
100
87
  return quote_plus(text.strip())
101
88
 
102
89
 
103
- def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
104
- """
105
- Creates a fragment of several XML nodes from their string representation wrapped in a root element.
106
-
107
- :param dtd_path: Path to a DTD document that defines entities like &cent; or &copy;.
108
- :param items: Strings to parse into XML fragments.
109
- :returns: An XML document as an element tree.
110
- """
111
-
112
- parser = ET.XMLParser(
113
- remove_blank_text=True,
114
- remove_comments=True,
115
- strip_cdata=False,
116
- load_dtd=True,
117
- )
118
-
119
- ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
120
-
121
- data = [
122
- '<?xml version="1.0"?>',
123
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
124
- ]
125
- data.extend(items)
126
- data.append("</root>")
127
-
128
- try:
129
- return ET.fromstringlist(data, parser=parser)
130
- except ET.XMLSyntaxError as ex:
131
- raise ParseError() from ex
132
-
133
-
134
- def elements_from_strings(items: list[str]) -> ET._Element:
135
- "Creates a fragment of several XML nodes from their string representation wrapped in a root element."
136
-
137
- resource_path = resources.files(__package__).joinpath("entities.dtd")
138
- with resources.as_file(resource_path) as dtd_path:
139
- return _elements_from_strings(dtd_path, items)
140
-
141
-
142
- def elements_from_string(content: str) -> ET._Element:
143
- return elements_from_strings([content])
144
-
145
-
146
- _languages = [
90
+ # supported code block languages, for which syntax highlighting is available
91
+ _LANGUAGES = [
147
92
  "abap",
148
93
  "actionscript3",
149
94
  "ada",
@@ -225,7 +170,7 @@ _languages = [
225
170
  ]
226
171
 
227
172
 
228
- class NodeVisitor:
173
+ class NodeVisitor(ABC):
229
174
  def visit(self, node: ET._Element) -> None:
230
175
  "Recursively visits all descendants of this node."
231
176
 
@@ -240,8 +185,8 @@ class NodeVisitor:
240
185
  else:
241
186
  self.visit(source)
242
187
 
243
- def transform(self, child: ET._Element) -> Optional[ET._Element]:
244
- pass
188
+ @abstractmethod
189
+ def transform(self, child: ET._Element) -> Optional[ET._Element]: ...
245
190
 
246
191
 
247
192
  def title_to_identifier(title: str) -> str:
@@ -253,60 +198,29 @@ def title_to_identifier(title: str) -> str:
253
198
  return s
254
199
 
255
200
 
256
- def element_to_text(node: ET._Element) -> str:
257
- "Returns all text contained in an element as a concatenated string."
201
+ def element_text_starts_with_any(node: ET._Element, prefixes: list[str]) -> bool:
202
+ "True if the text contained in an element starts with any of the specified prefix strings."
258
203
 
259
- return "".join(node.itertext()).strip()
204
+ if node.text is None:
205
+ return False
206
+ return starts_with_any(node.text, prefixes)
260
207
 
261
208
 
262
209
  @dataclass
263
210
  class ImageAttributes:
211
+ """
212
+ Attributes applied to an `<img>` element.
213
+
214
+ :param caption: Caption text (`alt` attribute).
215
+ :param width: Natural image width in pixels.
216
+ :param height: Natural image height in pixels.
217
+ """
218
+
264
219
  caption: Optional[str]
265
220
  width: Optional[str]
266
221
  height: Optional[str]
267
222
 
268
223
 
269
- @dataclass
270
- class TableOfContentsEntry:
271
- level: int
272
- text: str
273
-
274
-
275
- class TableOfContents:
276
- "Builds a table of contents from Markdown headings."
277
-
278
- headings: list[TableOfContentsEntry]
279
-
280
- def __init__(self) -> None:
281
- self.headings = []
282
-
283
- def add(self, level: int, text: str) -> None:
284
- """
285
- Adds a heading to the table of contents.
286
-
287
- :param level: Markdown heading level (e.g. `1` for first-level heading).
288
- :param text: Markdown heading text.
289
- """
290
-
291
- self.headings.append(TableOfContentsEntry(level, text))
292
-
293
- def get_title(self) -> Optional[str]:
294
- """
295
- Returns a proposed document title (if unique).
296
-
297
- :returns: Title text, or `None` if no unique title can be inferred.
298
- """
299
-
300
- for level in range(1, 7):
301
- try:
302
- (title,) = (item.text for item in self.headings if item.level == level)
303
- return title
304
- except ValueError:
305
- pass
306
-
307
- return None
308
-
309
-
310
224
  @dataclass
311
225
  class ConfluenceConverterOptions:
312
226
  """
@@ -339,10 +253,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
339
253
  path: Path
340
254
  base_dir: Path
341
255
  root_dir: Path
342
- toc: TableOfContents
256
+ toc: TableOfContentsBuilder
343
257
  links: list[str]
344
258
  images: list[Path]
345
- embedded_images: dict[str, bytes]
259
+ embedded_files: dict[str, bytes]
346
260
  site_metadata: ConfluenceSiteMetadata
347
261
  page_metadata: ConfluencePageCollection
348
262
 
@@ -363,10 +277,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
363
277
  self.path = path
364
278
  self.base_dir = path.parent
365
279
  self.root_dir = root_dir
366
- self.toc = TableOfContents()
280
+ self.toc = TableOfContentsBuilder()
367
281
  self.links = []
368
282
  self.images = []
369
- self.embedded_images = {}
283
+ self.embedded_files = {}
370
284
  self.site_metadata = site_metadata
371
285
  self.page_metadata = page_metadata
372
286
 
@@ -376,15 +290,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
376
290
  for e in heading:
377
291
  self.visit(e)
378
292
 
379
- anchor = AC(
293
+ anchor = AC_ELEM(
380
294
  "structured-macro",
381
295
  {
382
- ET.QName(namespaces["ac"], "name"): "anchor",
383
- ET.QName(namespaces["ac"], "schema-version"): "1",
296
+ AC_ATTR("name"): "anchor",
297
+ AC_ATTR("schema-version"): "1",
384
298
  },
385
- AC(
299
+ AC_ELEM(
386
300
  "parameter",
387
- {ET.QName(namespaces["ac"], "name"): ""},
301
+ {AC_ATTR("name"): ""},
388
302
  title_to_identifier(element_to_text(heading)),
389
303
  ),
390
304
  )
@@ -395,7 +309,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
395
309
  heading.text = None
396
310
 
397
311
  def _warn_or_raise(self, msg: str) -> None:
398
- "Emit a warning or raise an exception when a path points to a resource that doesn't exist."
312
+ "Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
399
313
 
400
314
  if self.options.ignore_invalid_url:
401
315
  LOGGER.warning(msg)
@@ -423,12 +337,12 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
423
337
  if self.options.heading_anchors:
424
338
  # <ac:link ac:anchor="anchor"><ac:link-body>...</ac:link-body></ac:link>
425
339
  target = relative_url.fragment.lstrip("#")
426
- link_body = AC("link-body", {}, *list(anchor))
340
+ link_body = AC_ELEM("link-body", {}, *list(anchor))
427
341
  link_body.text = anchor.text
428
- link_wrapper = AC(
342
+ link_wrapper = AC_ELEM(
429
343
  "link",
430
344
  {
431
- ET.QName(namespaces["ac"], "anchor"): target,
345
+ AC_ATTR("anchor"): target,
432
346
  },
433
347
  link_body,
434
348
  )
@@ -484,15 +398,38 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
484
398
  anchor.attrib["href"] = transformed_url
485
399
  return None
486
400
 
401
+ def _transform_status(self, color: str, caption: str) -> ET._Element:
402
+ macro_id = str(uuid.uuid4())
403
+ return AC_ELEM(
404
+ "structured-macro",
405
+ {
406
+ AC_ATTR("name"): "status",
407
+ AC_ATTR("schema-version"): "1",
408
+ AC_ATTR("macro-id"): macro_id,
409
+ },
410
+ AC_ELEM(
411
+ "parameter",
412
+ {AC_ATTR("name"): "colour"},
413
+ color.title(),
414
+ ),
415
+ AC_ELEM(
416
+ "parameter",
417
+ {AC_ATTR("name"): "title"},
418
+ caption,
419
+ ),
420
+ )
421
+
487
422
  def _transform_image(self, image: ET._Element) -> ET._Element:
488
423
  "Inserts an attached or external image."
489
424
 
490
425
  src = image.attrib.get("src")
491
-
492
426
  if not src:
493
427
  raise DocumentError("image lacks `src` attribute")
494
428
 
495
429
  caption = image.attrib.get("alt")
430
+ if caption is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
431
+ return self._transform_status(color, caption)
432
+
496
433
  width = image.attrib.get("width")
497
434
  height = image.attrib.get("height")
498
435
  attrs = ImageAttributes(caption, width, height)
@@ -510,6 +447,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
510
447
  return self._transform_drawio_image(absolute_path, attrs)
511
448
  elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
512
449
  return self._transform_drawio(absolute_path, attrs)
450
+ elif absolute_path.name.endswith(".mmd") or absolute_path.name.endswith(".mermaid"):
451
+ return self._transform_external_mermaid(absolute_path, attrs)
513
452
  else:
514
453
  return self._transform_attached_image(absolute_path, attrs)
515
454
 
@@ -517,26 +456,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
517
456
  "Emits Confluence Storage Format XHTML for an external image."
518
457
 
519
458
  attributes: dict[str, Any] = {
520
- ET.QName(namespaces["ac"], "align"): "center",
521
- ET.QName(namespaces["ac"], "layout"): "center",
459
+ AC_ATTR("align"): "center",
460
+ AC_ATTR("layout"): "center",
522
461
  }
523
462
  if attrs.width is not None:
524
- attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
463
+ attributes.update({AC_ATTR("width"): attrs.width})
525
464
  if attrs.height is not None:
526
- attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
465
+ attributes.update({AC_ATTR("height"): attrs.height})
527
466
 
528
467
  elements: list[ET._Element] = []
529
468
  elements.append(
530
- RI(
469
+ RI_ELEM(
531
470
  "url",
532
471
  # refers to an external image
533
- {ET.QName(namespaces["ri"], "value"): url},
472
+ {RI_ATTR("value"): url},
534
473
  )
535
474
  )
536
475
  if attrs.caption is not None:
537
- elements.append(AC("caption", HTML.p(attrs.caption)))
476
+ elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
538
477
 
539
- return AC("image", attributes, *elements)
478
+ return AC_ELEM("image", attributes, *elements)
540
479
 
541
480
  def _verify_image_path(self, path: Path) -> Optional[Path]:
542
481
  "Checks whether an image path is safe to use."
@@ -577,7 +516,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
577
516
  image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
578
517
  image_hash = hashlib.md5(image_data).hexdigest()
579
518
  image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
580
- self.embedded_images[image_filename] = image_data
519
+ self.embedded_files[image_filename] = image_data
581
520
  return self._create_attached_image(image_filename, attrs)
582
521
  else:
583
522
  self.images.append(absolute_path)
@@ -596,7 +535,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
596
535
  # extract embedded editable diagram and upload as *.drawio
597
536
  image_data = drawio.extract_diagram(absolute_path)
598
537
  image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
599
- self.embedded_images[image_filename] = image_data
538
+ self.embedded_files[image_filename] = image_data
600
539
 
601
540
  return self._create_drawio(image_filename, attrs)
602
541
 
@@ -604,64 +543,64 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
604
543
  "An image embedded into the page, linking to an attachment."
605
544
 
606
545
  attributes: dict[str, Any] = {
607
- ET.QName(namespaces["ac"], "align"): "center",
608
- ET.QName(namespaces["ac"], "layout"): "center",
546
+ AC_ATTR("align"): "center",
547
+ AC_ATTR("layout"): "center",
609
548
  }
610
549
  if attrs.width is not None:
611
- attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
550
+ attributes.update({AC_ATTR("width"): attrs.width})
612
551
  if attrs.height is not None:
613
- attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
552
+ attributes.update({AC_ATTR("height"): attrs.height})
614
553
 
615
554
  elements: list[ET._Element] = []
616
555
  elements.append(
617
- RI(
556
+ RI_ELEM(
618
557
  "attachment",
619
558
  # refers to an attachment uploaded alongside the page
620
- {ET.QName(namespaces["ri"], "filename"): image_name},
559
+ {RI_ATTR("filename"): image_name},
621
560
  )
622
561
  )
623
562
  if attrs.caption is not None:
624
- elements.append(AC("caption", HTML.p(attrs.caption)))
563
+ elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
625
564
 
626
- return AC("image", attributes, *elements)
565
+ return AC_ELEM("image", attributes, *elements)
627
566
 
628
567
  def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
629
568
  "A draw.io diagram embedded into the page, linking to an attachment."
630
569
 
631
570
  parameters: list[ET._Element] = [
632
- AC(
571
+ AC_ELEM(
633
572
  "parameter",
634
- {ET.QName(namespaces["ac"], "name"): "diagramName"},
573
+ {AC_ATTR("name"): "diagramName"},
635
574
  filename,
636
575
  ),
637
576
  ]
638
577
  if attrs.width is not None:
639
578
  parameters.append(
640
- AC(
579
+ AC_ELEM(
641
580
  "parameter",
642
- {ET.QName(namespaces["ac"], "name"): "width"},
581
+ {AC_ATTR("name"): "width"},
643
582
  attrs.width,
644
583
  ),
645
584
  )
646
585
  if attrs.height is not None:
647
586
  parameters.append(
648
- AC(
587
+ AC_ELEM(
649
588
  "parameter",
650
- {ET.QName(namespaces["ac"], "name"): "height"},
589
+ {AC_ATTR("name"): "height"},
651
590
  attrs.height,
652
591
  ),
653
592
  )
654
593
 
655
594
  local_id = str(uuid.uuid4())
656
595
  macro_id = str(uuid.uuid4())
657
- return AC(
596
+ return AC_ELEM(
658
597
  "structured-macro",
659
598
  {
660
- ET.QName(namespaces["ac"], "name"): "drawio",
661
- ET.QName(namespaces["ac"], "schema-version"): "1",
599
+ AC_ATTR("name"): "drawio",
600
+ AC_ATTR("schema-version"): "1",
662
601
  "data-layout": "default",
663
- ET.QName(namespaces["ac"], "local-id"): local_id,
664
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
602
+ AC_ATTR("local-id"): local_id,
603
+ AC_ATTR("macro-id"): macro_id,
665
604
  },
666
605
  *parameters,
667
606
  )
@@ -672,21 +611,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
672
611
  message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
673
612
  if caption is not None:
674
613
  content = [
675
- AC(
614
+ AC_ELEM(
676
615
  "parameter",
677
- {ET.QName(namespaces["ac"], "name"): "title"},
616
+ {AC_ATTR("name"): "title"},
678
617
  caption,
679
618
  ),
680
- AC("rich-text-body", {}, message),
619
+ AC_ELEM("rich-text-body", {}, message),
681
620
  ]
682
621
  else:
683
- content = [AC("rich-text-body", {}, message)]
622
+ content = [AC_ELEM("rich-text-body", {}, message)]
684
623
 
685
- return AC(
624
+ return AC_ELEM(
686
625
  "structured-macro",
687
626
  {
688
- ET.QName(namespaces["ac"], "name"): "warning",
689
- ET.QName(namespaces["ac"], "schema-version"): "1",
627
+ AC_ATTR("name"): "warning",
628
+ AC_ATTR("schema-version"): "1",
690
629
  },
691
630
  *content,
692
631
  )
@@ -701,100 +640,125 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
701
640
  language = m.group(1)
702
641
  else:
703
642
  language = "none"
704
- if language not in _languages:
643
+ if language not in _LANGUAGES:
705
644
  language = "none"
706
645
  content: str = code.text or ""
707
646
  content = content.rstrip()
708
647
 
709
648
  if language == "mermaid":
710
- return self._transform_mermaid(content)
649
+ return self._transform_inline_mermaid(content)
711
650
 
712
- return AC(
651
+ return AC_ELEM(
713
652
  "structured-macro",
714
653
  {
715
- ET.QName(namespaces["ac"], "name"): "code",
716
- ET.QName(namespaces["ac"], "schema-version"): "1",
654
+ AC_ATTR("name"): "code",
655
+ AC_ATTR("schema-version"): "1",
717
656
  },
718
- AC(
657
+ AC_ELEM(
719
658
  "parameter",
720
- {ET.QName(namespaces["ac"], "name"): "theme"},
659
+ {AC_ATTR("name"): "theme"},
721
660
  "Default",
722
661
  ),
723
- AC(
662
+ AC_ELEM(
724
663
  "parameter",
725
- {ET.QName(namespaces["ac"], "name"): "language"},
664
+ {AC_ATTR("name"): "language"},
726
665
  language,
727
666
  ),
728
- AC("plain-text-body", ET.CDATA(content)),
667
+ AC_ELEM("plain-text-body", ET.CDATA(content)),
729
668
  )
730
669
 
731
- def _transform_mermaid(self, content: str) -> ET._Element:
732
- "Transforms a Mermaid diagram code block."
670
+ def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
671
+ "Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
672
+
673
+ if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
674
+ raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
733
675
 
734
676
  if self.options.render_mermaid:
735
- image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
736
- image_hash = hashlib.md5(image_data).hexdigest()
737
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
738
- self.embedded_images[image_filename] = image_data
739
- return self._create_attached_image(image_filename, ImageAttributes(None, None, None))
677
+ with open(absolute_path, "r", encoding="utf-8") as f:
678
+ content = f.read()
679
+ return self._create_mermaid_image(content, attrs)
740
680
  else:
741
- local_id = str(uuid.uuid4())
742
- macro_id = str(uuid.uuid4())
743
- return AC(
744
- "structured-macro",
745
- {
746
- ET.QName(namespaces["ac"], "name"): "macro-diagram",
747
- ET.QName(namespaces["ac"], "schema-version"): "1",
748
- "data-layout": "default",
749
- ET.QName(namespaces["ac"], "local-id"): local_id,
750
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
751
- },
752
- AC(
753
- "parameter",
754
- {ET.QName(namespaces["ac"], "name"): "sourceType"},
755
- "MacroBody",
756
- ),
757
- AC(
758
- "parameter",
759
- {ET.QName(namespaces["ac"], "name"): "attachmentPageId"},
760
- ),
761
- AC(
762
- "parameter",
763
- {ET.QName(namespaces["ac"], "name"): "syntax"},
764
- "Mermaid",
765
- ),
766
- AC(
767
- "parameter",
768
- {ET.QName(namespaces["ac"], "name"): "attachmentId"},
769
- ),
770
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "url"}),
771
- AC("plain-text-body", ET.CDATA(content)),
772
- )
681
+ self.images.append(absolute_path)
682
+ mermaid_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
683
+ return self._create_mermaid_embed(mermaid_filename)
684
+
685
+ def _transform_inline_mermaid(self, content: str) -> ET._Element:
686
+ "Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a code block."
687
+
688
+ if self.options.render_mermaid:
689
+ return self._create_mermaid_image(content, ImageAttributes(None, None, None))
690
+ else:
691
+ mermaid_data = content.encode("utf-8")
692
+ mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
693
+ mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
694
+ self.embedded_files[mermaid_filename] = mermaid_data
695
+ return self._create_mermaid_embed(mermaid_filename)
696
+
697
+ def _create_mermaid_image(self, content: str, attrs: ImageAttributes) -> ET._Element:
698
+ "A rendered Mermaid diagram, linking to an attachment uploaded as an image."
699
+
700
+ image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
701
+ image_hash = hashlib.md5(image_data).hexdigest()
702
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
703
+ self.embedded_files[image_filename] = image_data
704
+ return self._create_attached_image(image_filename, attrs)
705
+
706
+ def _create_mermaid_embed(self, filename: str) -> ET._Element:
707
+ "A Mermaid diagram, linking to an attachment that captures the Mermaid source."
708
+
709
+ local_id = str(uuid.uuid4())
710
+ macro_id = str(uuid.uuid4())
711
+ return AC_ELEM(
712
+ "structured-macro",
713
+ {
714
+ AC_ATTR("name"): "mermaid-cloud",
715
+ AC_ATTR("schema-version"): "1",
716
+ "data-layout": "default",
717
+ AC_ATTR("local-id"): local_id,
718
+ AC_ATTR("macro-id"): macro_id,
719
+ },
720
+ AC_ELEM(
721
+ "parameter",
722
+ {AC_ATTR("name"): "filename"},
723
+ filename,
724
+ ),
725
+ AC_ELEM(
726
+ "parameter",
727
+ {AC_ATTR("name"): "toolbar"},
728
+ "bottom",
729
+ ),
730
+ AC_ELEM(
731
+ "parameter",
732
+ {AC_ATTR("name"): "zoom"},
733
+ "fit",
734
+ ),
735
+ AC_ELEM("parameter", {AC_ATTR("name"): "revision"}, "1"),
736
+ )
773
737
 
774
738
  def _transform_toc(self, code: ET._Element) -> ET._Element:
775
739
  "Creates a table of contents, constructed from headings in the document."
776
740
 
777
- return AC(
741
+ return AC_ELEM(
778
742
  "structured-macro",
779
743
  {
780
- ET.QName(namespaces["ac"], "name"): "toc",
781
- ET.QName(namespaces["ac"], "schema-version"): "1",
744
+ AC_ATTR("name"): "toc",
745
+ AC_ATTR("schema-version"): "1",
782
746
  },
783
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "outline"}, "clear"),
784
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "style"}, "default"),
747
+ AC_ELEM("parameter", {AC_ATTR("name"): "outline"}, "clear"),
748
+ AC_ELEM("parameter", {AC_ATTR("name"): "style"}, "default"),
785
749
  )
786
750
 
787
751
  def _transform_listing(self, code: ET._Element) -> ET._Element:
788
752
  "Creates a list of child pages."
789
753
 
790
- return AC(
754
+ return AC_ELEM(
791
755
  "structured-macro",
792
756
  {
793
- ET.QName(namespaces["ac"], "name"): "children",
794
- ET.QName(namespaces["ac"], "schema-version"): "2",
757
+ AC_ATTR("name"): "children",
758
+ AC_ATTR("schema-version"): "2",
795
759
  "data-layout": "default",
796
760
  },
797
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "allChildren"}, "true"),
761
+ AC_ELEM("parameter", {AC_ATTR("name"): "allChildren"}, "true"),
798
762
  )
799
763
 
800
764
  def _transform_admonition(self, elem: ET._Element) -> ET._Element:
@@ -826,21 +790,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
826
790
  # <p class="admonition-title">Note</p>
827
791
  if "admonition-title" in elem[0].attrib.get("class", "").split(" "):
828
792
  content = [
829
- AC(
793
+ AC_ELEM(
830
794
  "parameter",
831
- {ET.QName(namespaces["ac"], "name"): "title"},
795
+ {AC_ATTR("name"): "title"},
832
796
  elem[0].text or "",
833
797
  ),
834
- AC("rich-text-body", {}, *list(elem[1:])),
798
+ AC_ELEM("rich-text-body", {}, *list(elem[1:])),
835
799
  ]
836
800
  else:
837
- content = [AC("rich-text-body", {}, *list(elem))]
801
+ content = [AC_ELEM("rich-text-body", {}, *list(elem))]
838
802
 
839
- return AC(
803
+ return AC_ELEM(
840
804
  "structured-macro",
841
805
  {
842
- ET.QName(namespaces["ac"], "name"): class_name,
843
- ET.QName(namespaces["ac"], "schema-version"): "1",
806
+ AC_ATTR("name"): class_name,
807
+ AC_ATTR("schema-version"): "1",
844
808
  },
845
809
  *content,
846
810
  )
@@ -914,10 +878,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
914
878
  """
915
879
  Creates an info, tip, note or warning panel from a GitHub or GitLab alert.
916
880
 
917
- Transforms
918
- [GitHub alert](https://docs.github.com/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts)
919
- or [GitLab alert](https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes)
920
- syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
881
+ Transforms GitHub alert or GitLab alert syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
882
+
883
+ :see: https://docs.github.com/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
884
+ :see: https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes
921
885
  """
922
886
 
923
887
  content = elem[0]
@@ -931,22 +895,22 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
931
895
  self.visit(e)
932
896
 
933
897
  content.text = content.text[skip:]
934
- return AC(
898
+ return AC_ELEM(
935
899
  "structured-macro",
936
900
  {
937
- ET.QName(namespaces["ac"], "name"): class_name,
938
- ET.QName(namespaces["ac"], "schema-version"): "1",
901
+ AC_ATTR("name"): class_name,
902
+ AC_ATTR("schema-version"): "1",
939
903
  },
940
- AC("rich-text-body", {}, *list(elem)),
904
+ AC_ELEM("rich-text-body", {}, *list(elem)),
941
905
  )
942
906
 
943
907
  def _transform_section(self, elem: ET._Element) -> ET._Element:
944
908
  """
945
909
  Creates a collapsed section.
946
910
 
947
- Transforms
948
- [GitHub collapsed section](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections)
949
- syntax into the Confluence structured macro *expand*.
911
+ Transforms a GitHub collapsed section syntax into the Confluence structured macro *expand*.
912
+
913
+ :see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections
950
914
  """
951
915
 
952
916
  if elem[0].tag != "summary":
@@ -954,23 +918,24 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
954
918
  if elem[0].tail is not None:
955
919
  raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
956
920
 
957
- summary = "".join(elem[0].itertext()).strip()
921
+ summary = element_to_text(elem[0])
958
922
  elem.remove(elem[0])
959
923
 
924
+ # transform Markdown to Confluence within collapsed section content
960
925
  self.visit(elem)
961
926
 
962
- return AC(
927
+ return AC_ELEM(
963
928
  "structured-macro",
964
929
  {
965
- ET.QName(namespaces["ac"], "name"): "expand",
966
- ET.QName(namespaces["ac"], "schema-version"): "1",
930
+ AC_ATTR("name"): "expand",
931
+ AC_ATTR("schema-version"): "1",
967
932
  },
968
- AC(
933
+ AC_ELEM(
969
934
  "parameter",
970
- {ET.QName(namespaces["ac"], "name"): "title"},
935
+ {AC_ATTR("name"): "title"},
971
936
  summary,
972
937
  ),
973
- AC("rich-text-body", {}, *list(elem)),
938
+ AC_ELEM("rich-text-body", {}, *list(elem)),
974
939
  )
975
940
 
976
941
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
@@ -978,20 +943,18 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
978
943
  Inserts an inline emoji character.
979
944
  """
980
945
 
981
- shortname = elem.attrib.get("data-emoji-shortname", "")
982
- unicode = elem.attrib.get("data-emoji-unicode", None)
946
+ shortname = elem.attrib.get("data-shortname", "")
947
+ unicode = elem.attrib.get("data-unicode", None)
983
948
  alt = elem.text or ""
984
949
 
985
950
  # <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="&#128521;"/>
986
- # <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_plus_sign:" ac:emoji-id="2795" ac:emoji-fallback="&#10133;"/>
987
- # <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_minus_sign:" ac:emoji-id="2796" ac:emoji-fallback="&#10134;"/>
988
- return AC(
951
+ return AC_ELEM(
989
952
  "emoticon",
990
953
  {
991
- ET.QName(namespaces["ac"], "name"): shortname,
992
- ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
993
- ET.QName(namespaces["ac"], "emoji-id"): unicode,
994
- ET.QName(namespaces["ac"], "emoji-fallback"): alt,
954
+ AC_ATTR("name"): shortname,
955
+ AC_ATTR("emoji-shortname"): f":{shortname}:",
956
+ AC_ATTR("emoji-id"): unicode,
957
+ AC_ATTR("emoji-fallback"): alt,
995
958
  },
996
959
  )
997
960
 
@@ -1010,20 +973,20 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1010
973
 
1011
974
  local_id = str(uuid.uuid4())
1012
975
  macro_id = str(uuid.uuid4())
1013
- macro = AC(
976
+ macro = AC_ELEM(
1014
977
  "structured-macro",
1015
978
  {
1016
- ET.QName(namespaces["ac"], "name"): "eazy-math-inline",
1017
- ET.QName(namespaces["ac"], "schema-version"): "1",
1018
- ET.QName(namespaces["ac"], "local-id"): local_id,
1019
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
979
+ AC_ATTR("name"): "eazy-math-inline",
980
+ AC_ATTR("schema-version"): "1",
981
+ AC_ATTR("local-id"): local_id,
982
+ AC_ATTR("macro-id"): macro_id,
1020
983
  },
1021
- AC(
984
+ AC_ELEM(
1022
985
  "parameter",
1023
- {ET.QName(namespaces["ac"], "name"): "body"},
986
+ {AC_ATTR("name"): "body"},
1024
987
  content,
1025
988
  ),
1026
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
989
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
1027
990
  )
1028
991
  macro.tail = elem.tail # chain sibling text node that immediately follows original element
1029
992
  return macro
@@ -1044,21 +1007,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1044
1007
  local_id = str(uuid.uuid4())
1045
1008
  macro_id = str(uuid.uuid4())
1046
1009
 
1047
- return AC(
1010
+ return AC_ELEM(
1048
1011
  "structured-macro",
1049
1012
  {
1050
- ET.QName(namespaces["ac"], "name"): "easy-math-block",
1051
- ET.QName(namespaces["ac"], "schema-version"): "1",
1013
+ AC_ATTR("name"): "easy-math-block",
1014
+ AC_ATTR("schema-version"): "1",
1052
1015
  "data-layout": "default",
1053
- ET.QName(namespaces["ac"], "local-id"): local_id,
1054
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
1016
+ AC_ATTR("local-id"): local_id,
1017
+ AC_ATTR("macro-id"): macro_id,
1055
1018
  },
1056
- AC(
1019
+ AC_ELEM(
1057
1020
  "parameter",
1058
- {ET.QName(namespaces["ac"], "name"): "body"},
1021
+ {AC_ATTR("name"): "body"},
1059
1022
  content,
1060
1023
  ),
1061
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1024
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
1062
1025
  )
1063
1026
 
1064
1027
  def _transform_footnote_ref(self, elem: ET._Element) -> None:
@@ -1090,26 +1053,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1090
1053
  elem.remove(link)
1091
1054
 
1092
1055
  # build new anchor for footnote reference
1093
- ref_anchor = AC(
1056
+ ref_anchor = AC_ELEM(
1094
1057
  "structured-macro",
1095
1058
  {
1096
- ET.QName(namespaces["ac"], "name"): "anchor",
1097
- ET.QName(namespaces["ac"], "schema-version"): "1",
1059
+ AC_ATTR("name"): "anchor",
1060
+ AC_ATTR("schema-version"): "1",
1098
1061
  },
1099
- AC(
1062
+ AC_ELEM(
1100
1063
  "parameter",
1101
- {ET.QName(namespaces["ac"], "name"): ""},
1064
+ {AC_ATTR("name"): ""},
1102
1065
  f"footnote-ref-{footnote_ref}",
1103
1066
  ),
1104
1067
  )
1105
1068
 
1106
1069
  # build new link to footnote definition at the end of page
1107
- def_link = AC(
1070
+ def_link = AC_ELEM(
1108
1071
  "link",
1109
1072
  {
1110
- ET.QName(namespaces["ac"], "anchor"): f"footnote-def-{footnote_def}",
1073
+ AC_ATTR("anchor"): f"footnote-def-{footnote_def}",
1111
1074
  },
1112
- AC("link-body", ET.CDATA(text)),
1075
+ AC_ELEM("link-body", ET.CDATA(text)),
1113
1076
  )
1114
1077
 
1115
1078
  # append children synthesized for Confluence
@@ -1152,26 +1115,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1152
1115
  paragraph.remove(ref_anchor)
1153
1116
 
1154
1117
  # build new anchor for footnote definition
1155
- def_anchor = AC(
1118
+ def_anchor = AC_ELEM(
1156
1119
  "structured-macro",
1157
1120
  {
1158
- ET.QName(namespaces["ac"], "name"): "anchor",
1159
- ET.QName(namespaces["ac"], "schema-version"): "1",
1121
+ AC_ATTR("name"): "anchor",
1122
+ AC_ATTR("schema-version"): "1",
1160
1123
  },
1161
- AC(
1124
+ AC_ELEM(
1162
1125
  "parameter",
1163
- {ET.QName(namespaces["ac"], "name"): ""},
1126
+ {AC_ATTR("name"): ""},
1164
1127
  f"footnote-def-{footnote_def}",
1165
1128
  ),
1166
1129
  )
1167
1130
 
1168
1131
  # build new link to footnote reference in page body
1169
- ref_link = AC(
1132
+ ref_link = AC_ELEM(
1170
1133
  "link",
1171
1134
  {
1172
- ET.QName(namespaces["ac"], "anchor"): f"footnote-ref-{footnote_ref}",
1135
+ AC_ATTR("anchor"): f"footnote-ref-{footnote_ref}",
1173
1136
  },
1174
- AC("link-body", ET.CDATA("↩")),
1137
+ AC_ELEM("link-body", ET.CDATA("↩")),
1175
1138
  )
1176
1139
 
1177
1140
  # append children synthesized for Confluence
@@ -1180,6 +1143,52 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1180
1143
  paragraph.text = None
1181
1144
  paragraph.append(ref_link)
1182
1145
 
1146
+ def _transform_tasklist(self, elem: ET._Element) -> ET._Element:
1147
+ """
1148
+ Transforms a list of tasks into an action widget.
1149
+
1150
+ :see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-tasklists
1151
+ """
1152
+
1153
+ if elem.tag != "ul":
1154
+ raise DocumentError("expected: `<ul>` as the HTML element for a tasklist")
1155
+
1156
+ for item in elem:
1157
+ if item.tag != "li":
1158
+ raise DocumentError("expected: `<li>` as the HTML element for a task")
1159
+ if not element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]):
1160
+ raise DocumentError("expected: each `<li>` in a task list starting with [ ] or [x]")
1161
+
1162
+ # transform Markdown to Confluence within tasklist content
1163
+ self.visit(elem)
1164
+
1165
+ tasks: list[ET._Element] = []
1166
+ for index, item in enumerate(elem, start=1):
1167
+ if item.text is None:
1168
+ raise NotImplementedError("pre-condition check not exhaustive")
1169
+ match = re.match(r"^\[([x X])\]", item.text)
1170
+ if match is None:
1171
+ raise NotImplementedError("pre-condition check not exhaustive")
1172
+
1173
+ status = "incomplete" if match.group(1).isspace() else "complete"
1174
+
1175
+ body = AC_ELEM("task-body")
1176
+ body.text = item.text[3:]
1177
+ for child in item:
1178
+ body.append(child)
1179
+ tasks.append(
1180
+ AC_ELEM(
1181
+ "task",
1182
+ {},
1183
+ AC_ELEM("task-id", str(index)),
1184
+ AC_ELEM("task-uuid", str(uuid.uuid4())),
1185
+ AC_ELEM("task-status", status),
1186
+ body,
1187
+ ),
1188
+ )
1189
+ return AC_ELEM("task-list", {}, *tasks)
1190
+
1191
+ @override
1183
1192
  def transform(self, child: ET._Element) -> Optional[ET._Element]:
1184
1193
  """
1185
1194
  Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
@@ -1208,51 +1217,74 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1208
1217
  self._transform_heading(child)
1209
1218
  return None
1210
1219
 
1211
- # <p><img src="..." /></p>
1212
- if child.tag == "p" and len(child) == 1 and child[0].tag == "img":
1213
- return self._transform_image(child[0])
1214
-
1215
- # <p>[[_TOC_]]</p>
1216
- # <p>[TOC]</p>
1217
- elif child.tag == "p" and "".join(child.itertext()) in ["[[TOC]]", "[TOC]"]:
1218
- return self._transform_toc(child)
1219
-
1220
- # <p>[[_LISTING_]]</p>
1221
- elif child.tag == "p" and "".join(child.itertext()) in ["[[LISTING]]", "[LISTING]"]:
1222
- return self._transform_listing(child)
1223
-
1224
- # <div class="admonition note">
1225
- # <p class="admonition-title">Note</p>
1226
- # <p>...</p>
1227
- # </div>
1228
- #
1229
- # --- OR ---
1230
- #
1231
- # <div class="admonition note">
1232
1220
  # <p>...</p>
1233
- # </div>
1234
- elif child.tag == "div" and "admonition" in child.attrib.get("class", ""):
1235
- return self._transform_admonition(child)
1236
-
1237
- # Alerts in GitHub
1238
- # <blockquote>
1239
- # <p>[!TIP] ...</p>
1240
- # </blockquote>
1241
- elif child.tag == "blockquote" and len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1242
- return self._transform_github_alert(child)
1243
-
1244
- # Alerts in GitLab
1245
- # <blockquote>
1246
- # <p>DISCLAIMER: ...</p>
1247
- # </blockquote>
1248
- elif (
1249
- child.tag == "blockquote"
1250
- and len(child) > 0
1251
- and child[0].tag == "p"
1252
- and child[0].text is not None
1253
- and starts_with_any(child[0].text, ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"])
1254
- ):
1255
- return self._transform_gitlab_alert(child)
1221
+ if child.tag == "p":
1222
+ # <p><img src="..." /></p>
1223
+ if len(child) == 1 and child[0].tag == "img":
1224
+ return self._transform_image(child[0])
1225
+
1226
+ # <p>[[_TOC_]]</p> (represented as <p>[[<em>TOC</em>]]</p>)
1227
+ # <p>[TOC]</p>
1228
+ elif element_to_text(child) in ["[[TOC]]", "[TOC]"]:
1229
+ return self._transform_toc(child)
1230
+
1231
+ # <p>[[_LISTING_]]</p> (represented as <p>[[<em>LISTING</em>]]</p>)
1232
+ elif element_to_text(child) in ["[[LISTING]]", "[LISTING]"]:
1233
+ return self._transform_listing(child)
1234
+
1235
+ # <div>...</div>
1236
+ elif child.tag == "div":
1237
+ classes = child.attrib.get("class", "").split(" ")
1238
+
1239
+ # <div class="arithmatex">...</div>
1240
+ if "arithmatex" in classes:
1241
+ return self._transform_block_math(child)
1242
+
1243
+ # <div><ac:structured-macro ...>...</ac:structured-macro></div>
1244
+ elif "csf" in classes:
1245
+ if len(child) != 1:
1246
+ raise DocumentError("expected: single child in Confluence Storage Format block")
1247
+
1248
+ return child[0]
1249
+
1250
+ # <div class="footnote">
1251
+ # <hr/>
1252
+ # <ol>
1253
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1254
+ # </ol>
1255
+ # </div>
1256
+ elif "footnote" in classes:
1257
+ self._transform_footnote_def(child)
1258
+ return None
1259
+
1260
+ # <div class="admonition note">
1261
+ # <p class="admonition-title">Note</p>
1262
+ # <p>...</p>
1263
+ # </div>
1264
+ #
1265
+ # --- OR ---
1266
+ #
1267
+ # <div class="admonition note">
1268
+ # <p>...</p>
1269
+ # </div>
1270
+ elif "admonition" in classes:
1271
+ return self._transform_admonition(child)
1272
+
1273
+ # <blockquote>...</blockquote>
1274
+ elif child.tag == "blockquote":
1275
+ # Alerts in GitHub
1276
+ # <blockquote>
1277
+ # <p>[!TIP] ...</p>
1278
+ # </blockquote>
1279
+ if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1280
+ return self._transform_github_alert(child)
1281
+
1282
+ # Alerts in GitLab
1283
+ # <blockquote>
1284
+ # <p>DISCLAIMER: ...</p>
1285
+ # </blockquote>
1286
+ elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
1287
+ return self._transform_gitlab_alert(child)
1256
1288
 
1257
1289
  # <details markdown="1">
1258
1290
  # <summary>...</summary>
@@ -1261,6 +1293,17 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1261
1293
  elif child.tag == "details" and len(child) > 1 and child[0].tag == "summary":
1262
1294
  return self._transform_section(child)
1263
1295
 
1296
+ # <ul>
1297
+ # <li>[ ] ...</li>
1298
+ # <li>[x] ...</li>
1299
+ # </ul>
1300
+ elif child.tag == "ul" and len(child) > 0 and element_text_starts_with_any(child[0], ["[ ]", "[x]", "[X]"]):
1301
+ return self._transform_tasklist(child)
1302
+
1303
+ # <pre><code class="language-java"> ... </code></pre>
1304
+ elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1305
+ return self._transform_code_block(child[0])
1306
+
1264
1307
  # <img src="..." alt="..." />
1265
1308
  elif child.tag == "img":
1266
1309
  return self._transform_image(child)
@@ -1269,36 +1312,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1269
1312
  elif child.tag == "a":
1270
1313
  return self._transform_link(child)
1271
1314
 
1272
- # <pre><code class="language-java"> ... </code></pre>
1273
- elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1274
- return self._transform_code_block(child[0])
1275
-
1276
- # <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
1277
- elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
1278
- return self._transform_emoji(child)
1279
-
1280
- # <div class="arithmatex">...</div>
1281
- elif child.tag == "div" and "arithmatex" in child.attrib.get("class", "").split(" "):
1282
- return self._transform_block_math(child)
1315
+ # <span>...</span>
1316
+ elif child.tag == "span":
1317
+ classes = child.attrib.get("class", "").split(" ")
1283
1318
 
1284
- # <span class="arithmatex">...</span>
1285
- elif child.tag == "span" and "arithmatex" in child.attrib.get("class", "").split(" "):
1286
- return self._transform_inline_math(child)
1319
+ # <span class="arithmatex">...</span>
1320
+ if "arithmatex" in classes:
1321
+ return self._transform_inline_math(child)
1287
1322
 
1288
1323
  # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1289
1324
  elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1290
1325
  self._transform_footnote_ref(child)
1291
1326
  return None
1292
1327
 
1293
- # <div class="footnote">
1294
- # <hr/>
1295
- # <ol>
1296
- # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1297
- # </ol>
1298
- # </div>
1299
- elif child.tag == "div" and "footnote" in child.attrib.get("class", "").split(" "):
1300
- self._transform_footnote_def(child)
1301
- return None
1328
+ # <input type="date" value="1984-01-01" />
1329
+ elif child.tag == "input" and child.attrib.get("type", "") == "date":
1330
+ return HTML("time", {"datetime": child.attrib.get("value", "")})
1331
+
1332
+ # <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
1333
+ elif child.tag == "x-emoji":
1334
+ return self._transform_emoji(child)
1302
1335
 
1303
1336
  return None
1304
1337
 
@@ -1355,10 +1388,18 @@ class ConfluenceDocument:
1355
1388
  site_metadata: ConfluenceSiteMetadata,
1356
1389
  page_metadata: ConfluencePageCollection,
1357
1390
  ) -> None:
1391
+ "Converts a single Markdown document to Confluence Storage Format."
1392
+
1358
1393
  self.options = options
1359
1394
 
1395
+ # register auxiliary URL substitutions
1396
+ lines: list[str] = []
1397
+ for data_uri, color in status_images.items():
1398
+ lines.append(f"[STATUS-{color.upper()}]: {data_uri}")
1399
+ lines.append(document.text)
1400
+
1360
1401
  # convert to HTML
1361
- html = markdown_to_html(document.text)
1402
+ html = markdown_to_html("\n".join(lines))
1362
1403
 
1363
1404
  # parse Markdown document
1364
1405
  if self.options.generated_by is not None:
@@ -1390,10 +1431,13 @@ class ConfluenceDocument:
1390
1431
  site_metadata,
1391
1432
  page_metadata,
1392
1433
  )
1393
- converter.visit(self.root)
1434
+ try:
1435
+ converter.visit(self.root)
1436
+ except DocumentError as ex:
1437
+ raise ConversionError(path) from ex
1394
1438
  self.links = converter.links
1395
1439
  self.images = converter.images
1396
- self.embedded_images = converter.embedded_images
1440
+ self.embedded_files = converter.embedded_files
1397
1441
 
1398
1442
  self.title = document.title or converter.toc.get_title()
1399
1443
  self.labels = document.tags
@@ -1433,41 +1477,3 @@ def attachment_name(ref: Union[Path, str]) -> str:
1433
1477
 
1434
1478
  parts = [replace_part(p) for p in path.parts]
1435
1479
  return Path(*parts).as_posix().replace("/", "_")
1436
-
1437
-
1438
- def elements_to_string(root: ET._Element) -> str:
1439
- xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
1440
- m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
1441
- if m:
1442
- return m.group(1)
1443
- else:
1444
- raise ValueError("expected: Confluence content")
1445
-
1446
-
1447
- def _content_to_string(dtd_path: Path, content: str) -> str:
1448
- parser = ET.XMLParser(
1449
- remove_blank_text=True,
1450
- remove_comments=True,
1451
- strip_cdata=False,
1452
- load_dtd=True,
1453
- )
1454
-
1455
- ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
1456
-
1457
- data = [
1458
- '<?xml version="1.0"?>',
1459
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
1460
- ]
1461
- data.append(content)
1462
- data.append("</root>")
1463
-
1464
- tree = ET.fromstringlist(data, parser=parser)
1465
- return ET.tostring(tree, pretty_print=True).decode("utf-8")
1466
-
1467
-
1468
- def content_to_string(content: str) -> str:
1469
- "Converts a Confluence Storage Format document returned by the API into a readable XML document."
1470
-
1471
- resource_path = resources.files(__package__).joinpath("entities.dtd")
1472
- with resources.as_file(resource_path) as dtd_path:
1473
- return _content_to_string(dtd_path, content)