markdown-to-confluence 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -6,61 +6,57 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- # mypy: disable-error-code="dict-item"
10
-
9
+ import dataclasses
11
10
  import hashlib
12
- import importlib.resources as resources
13
11
  import logging
14
12
  import os.path
15
13
  import re
16
14
  import uuid
17
- import xml.etree.ElementTree
15
+ from abc import ABC, abstractmethod
18
16
  from dataclasses import dataclass
19
17
  from pathlib import Path
20
18
  from typing import Any, Literal, Optional, Union
21
19
  from urllib.parse import ParseResult, quote_plus, urlparse, urlunparse
22
20
 
23
21
  import lxml.etree as ET
24
- import markdown
25
- from lxml.builder import ElementMaker
26
22
  from strong_typing.core import JsonType
27
23
 
28
- from md2conf.drawio import extract_diagram
29
-
24
+ from . import drawio, mermaid
30
25
  from .collection import ConfluencePageCollection
31
- from .extra import path_relative_to
32
- from .mermaid import render_diagram
26
+ from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string
27
+ from .domain import ConfluenceDocumentOptions, ConfluencePageID
28
+ from .extra import override, path_relative_to
29
+ from .markdown import markdown_to_html
33
30
  from .metadata import ConfluenceSiteMetadata
34
31
  from .properties import PageError
35
32
  from .scanner import ScannedDocument, Scanner
36
-
37
- namespaces = {
38
- "ac": "http://atlassian.com/content",
39
- "ri": "http://atlassian.com/resource/identifier",
40
- }
41
- for key, value in namespaces.items():
42
- ET.register_namespace(key, value)
33
+ from .toc import TableOfContentsBuilder
34
+ from .uri import is_absolute_url, to_uuid_urn
35
+ from .xml import element_to_text
43
36
 
44
37
 
45
- def get_volatile_attributes() -> list[ET.QName]:
38
+ def get_volatile_attributes() -> list[str]:
46
39
  "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
47
40
 
48
41
  return [
49
- ET.QName(namespaces["ac"], "local-id"),
50
- ET.QName(namespaces["ac"], "macro-id"),
51
- ET.QName(namespaces["ri"], "version-at-save"),
42
+ AC_ATTR("local-id"),
43
+ AC_ATTR("macro-id"),
44
+ RI_ATTR("version-at-save"),
52
45
  ]
53
46
 
54
47
 
55
- HTML = ElementMaker()
56
- AC = ElementMaker(namespace=namespaces["ac"])
57
- RI = ElementMaker(namespace=namespaces["ri"])
48
+ def get_volatile_elements() -> list[str]:
49
+ "Returns a list of volatile elements whose content frequently changes as a Confluence storage format XHTML document is updated."
50
+
51
+ return [AC_ATTR("task-uuid")]
58
52
 
59
- LOGGER = logging.getLogger(__name__)
60
53
 
54
+ status_images: dict[str, str] = {
55
+ to_uuid_urn(f'<svg height="10" width="10" xmlns="http://www.w3.org/2000/svg"><circle r="5" cx="5" cy="5" fill="{color}" /></svg>'): color
56
+ for color in ["gray", "purple", "blue", "red", "yellow", "green"]
57
+ }
61
58
 
62
- class ParseError(RuntimeError):
63
- pass
59
+ LOGGER = logging.getLogger(__name__)
64
60
 
65
61
 
66
62
  def starts_with_any(text: str, prefixes: list[str]) -> bool:
@@ -72,16 +68,6 @@ def starts_with_any(text: str, prefixes: list[str]) -> bool:
72
68
  return False
73
69
 
74
70
 
75
- def is_absolute_url(url: str) -> bool:
76
- urlparts = urlparse(url)
77
- return bool(urlparts.scheme) or bool(urlparts.netloc)
78
-
79
-
80
- def is_relative_url(url: str) -> bool:
81
- urlparts = urlparse(url)
82
- return not bool(urlparts.scheme) and not bool(urlparts.netloc)
83
-
84
-
85
71
  def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
86
72
  "True if the absolute path is nested within the base path."
87
73
 
@@ -101,134 +87,8 @@ def encode_title(text: str) -> str:
101
87
  return quote_plus(text.strip())
102
88
 
103
89
 
104
- def emoji_generator(
105
- index: str,
106
- shortname: str,
107
- alias: Optional[str],
108
- uc: Optional[str],
109
- alt: str,
110
- title: Optional[str],
111
- category: Optional[str],
112
- options: dict[str, Any],
113
- md: markdown.Markdown,
114
- ) -> xml.etree.ElementTree.Element:
115
- """
116
- Custom generator for `pymdownx.emoji`.
117
- """
118
-
119
- name = (alias or shortname).strip(":")
120
- span = xml.etree.ElementTree.Element("span", {"data-emoji-shortname": name})
121
- if uc is not None:
122
- span.attrib["data-emoji-unicode"] = uc
123
-
124
- # convert series of Unicode code point hexadecimal values into characters
125
- span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
126
- else:
127
- span.text = alt
128
- return span
129
-
130
-
131
- def math_formatter(
132
- source: str,
133
- language: str,
134
- css_class: str,
135
- options: dict[str, Any],
136
- md: markdown.Markdown,
137
- classes: Optional[list[str]] = None,
138
- id_value: str = "",
139
- attrs: Optional[dict[str, str]] = None,
140
- **kwargs: Any,
141
- ) -> str:
142
- """
143
- Custom formatter for language `math` in `pymdownx.superfences`.
144
- """
145
-
146
- if classes is None:
147
- classes = [css_class]
148
- else:
149
- classes.insert(0, css_class)
150
-
151
- html_id = f' id="{id_value}"' if id_value else ""
152
- html_class = ' class="{}"'.format(" ".join(classes))
153
- html_attrs = " " + " ".join(f'{k}="{v}"' for k, v in attrs.items()) if attrs else ""
154
-
155
- return f"<div{html_id}{html_class}{html_attrs}>{source}</div>"
156
-
157
-
158
- def markdown_to_html(content: str) -> str:
159
- return markdown.markdown(
160
- content,
161
- extensions=[
162
- "admonition",
163
- "footnotes",
164
- "markdown.extensions.tables",
165
- "md_in_html",
166
- "pymdownx.arithmatex",
167
- "pymdownx.emoji",
168
- "pymdownx.highlight", # required by `pymdownx.superfences`
169
- "pymdownx.magiclink",
170
- "pymdownx.superfences",
171
- "pymdownx.tilde",
172
- "sane_lists",
173
- ],
174
- extension_configs={
175
- "footnotes": {"BACKLINK_TITLE": ""},
176
- "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
177
- "pymdownx.emoji": {
178
- "emoji_generator": emoji_generator,
179
- },
180
- "pymdownx.highlight": {
181
- "use_pygments": False,
182
- },
183
- "pymdownx.superfences": {"custom_fences": [{"name": "math", "class": "arithmatex", "format": math_formatter}]},
184
- },
185
- )
186
-
187
-
188
- def _elements_from_strings(dtd_path: Path, items: list[str]) -> ET._Element:
189
- """
190
- Creates a fragment of several XML nodes from their string representation wrapped in a root element.
191
-
192
- :param dtd_path: Path to a DTD document that defines entities like &cent; or &copy;.
193
- :param items: Strings to parse into XML fragments.
194
- :returns: An XML document as an element tree.
195
- """
196
-
197
- parser = ET.XMLParser(
198
- remove_blank_text=True,
199
- remove_comments=True,
200
- strip_cdata=False,
201
- load_dtd=True,
202
- )
203
-
204
- ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
205
-
206
- data = [
207
- '<?xml version="1.0"?>',
208
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
209
- ]
210
- data.extend(items)
211
- data.append("</root>")
212
-
213
- try:
214
- return ET.fromstringlist(data, parser=parser)
215
- except ET.XMLSyntaxError as ex:
216
- raise ParseError() from ex
217
-
218
-
219
- def elements_from_strings(items: list[str]) -> ET._Element:
220
- "Creates a fragment of several XML nodes from their string representation wrapped in a root element."
221
-
222
- resource_path = resources.files(__package__).joinpath("entities.dtd")
223
- with resources.as_file(resource_path) as dtd_path:
224
- return _elements_from_strings(dtd_path, items)
225
-
226
-
227
- def elements_from_string(content: str) -> ET._Element:
228
- return elements_from_strings([content])
229
-
230
-
231
- _languages = [
90
+ # supported code block languages, for which syntax highlighting is available
91
+ _LANGUAGES = [
232
92
  "abap",
233
93
  "actionscript3",
234
94
  "ada",
@@ -310,7 +170,7 @@ _languages = [
310
170
  ]
311
171
 
312
172
 
313
- class NodeVisitor:
173
+ class NodeVisitor(ABC):
314
174
  def visit(self, node: ET._Element) -> None:
315
175
  "Recursively visits all descendants of this node."
316
176
 
@@ -325,73 +185,42 @@ class NodeVisitor:
325
185
  else:
326
186
  self.visit(source)
327
187
 
328
- def transform(self, child: ET._Element) -> Optional[ET._Element]:
329
- pass
188
+ @abstractmethod
189
+ def transform(self, child: ET._Element) -> Optional[ET._Element]: ...
330
190
 
331
191
 
332
192
  def title_to_identifier(title: str) -> str:
333
193
  "Converts a section heading title to a GitHub-style Markdown same-page anchor."
334
194
 
335
195
  s = title.strip().lower()
336
- s = re.sub("[^ A-Za-z0-9]", "", s)
337
- s = s.replace(" ", "-")
196
+ s = re.sub(r"[^\sA-Za-z0-9_\-]", "", s)
197
+ s = re.sub(r"\s+", "-", s)
338
198
  return s
339
199
 
340
200
 
341
- def element_to_text(node: ET._Element) -> str:
342
- "Returns all text contained in an element as a concatenated string."
201
+ def element_text_starts_with_any(node: ET._Element, prefixes: list[str]) -> bool:
202
+ "True if the text contained in an element starts with any of the specified prefix strings."
343
203
 
344
- return "".join(node.itertext()).strip()
204
+ if node.text is None:
205
+ return False
206
+ return starts_with_any(node.text, prefixes)
345
207
 
346
208
 
347
209
  @dataclass
348
210
  class ImageAttributes:
211
+ """
212
+ Attributes applied to an `<img>` element.
213
+
214
+ :param caption: Caption text (`alt` attribute).
215
+ :param width: Natural image width in pixels.
216
+ :param height: Natural image height in pixels.
217
+ """
218
+
349
219
  caption: Optional[str]
350
220
  width: Optional[str]
351
221
  height: Optional[str]
352
222
 
353
223
 
354
- @dataclass
355
- class TableOfContentsEntry:
356
- level: int
357
- text: str
358
-
359
-
360
- class TableOfContents:
361
- "Builds a table of contents from Markdown headings."
362
-
363
- headings: list[TableOfContentsEntry]
364
-
365
- def __init__(self) -> None:
366
- self.headings = []
367
-
368
- def add(self, level: int, text: str) -> None:
369
- """
370
- Adds a heading to the table of contents.
371
-
372
- :param level: Markdown heading level (e.g. `1` for first-level heading).
373
- :param text: Markdown heading text.
374
- """
375
-
376
- self.headings.append(TableOfContentsEntry(level, text))
377
-
378
- def get_title(self) -> Optional[str]:
379
- """
380
- Returns a proposed document title (if unique).
381
-
382
- :returns: Title text, or `None` if no unique title can be inferred.
383
- """
384
-
385
- for level in range(1, 7):
386
- try:
387
- (title,) = (item.text for item in self.headings if item.level == level)
388
- return title
389
- except ValueError:
390
- pass
391
-
392
- return None
393
-
394
-
395
224
  @dataclass
396
225
  class ConfluenceConverterOptions:
397
226
  """
@@ -424,10 +253,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
424
253
  path: Path
425
254
  base_dir: Path
426
255
  root_dir: Path
427
- toc: TableOfContents
256
+ toc: TableOfContentsBuilder
428
257
  links: list[str]
429
258
  images: list[Path]
430
- embedded_images: dict[str, bytes]
259
+ embedded_files: dict[str, bytes]
431
260
  site_metadata: ConfluenceSiteMetadata
432
261
  page_metadata: ConfluencePageCollection
433
262
 
@@ -448,10 +277,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
448
277
  self.path = path
449
278
  self.base_dir = path.parent
450
279
  self.root_dir = root_dir
451
- self.toc = TableOfContents()
280
+ self.toc = TableOfContentsBuilder()
452
281
  self.links = []
453
282
  self.images = []
454
- self.embedded_images = {}
283
+ self.embedded_files = {}
455
284
  self.site_metadata = site_metadata
456
285
  self.page_metadata = page_metadata
457
286
 
@@ -461,15 +290,15 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
461
290
  for e in heading:
462
291
  self.visit(e)
463
292
 
464
- anchor = AC(
293
+ anchor = AC_ELEM(
465
294
  "structured-macro",
466
295
  {
467
- ET.QName(namespaces["ac"], "name"): "anchor",
468
- ET.QName(namespaces["ac"], "schema-version"): "1",
296
+ AC_ATTR("name"): "anchor",
297
+ AC_ATTR("schema-version"): "1",
469
298
  },
470
- AC(
299
+ AC_ELEM(
471
300
  "parameter",
472
- {ET.QName(namespaces["ac"], "name"): ""},
301
+ {AC_ATTR("name"): ""},
473
302
  title_to_identifier(element_to_text(heading)),
474
303
  ),
475
304
  )
@@ -480,7 +309,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
480
309
  heading.text = None
481
310
 
482
311
  def _warn_or_raise(self, msg: str) -> None:
483
- "Emit a warning or raise an exception when a path points to a resource that doesn't exist."
312
+ "Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
484
313
 
485
314
  if self.options.ignore_invalid_url:
486
315
  LOGGER.warning(msg)
@@ -508,12 +337,12 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
508
337
  if self.options.heading_anchors:
509
338
  # <ac:link ac:anchor="anchor"><ac:link-body>...</ac:link-body></ac:link>
510
339
  target = relative_url.fragment.lstrip("#")
511
- link_body = AC("link-body", {}, *list(anchor))
340
+ link_body = AC_ELEM("link-body", {}, *list(anchor))
512
341
  link_body.text = anchor.text
513
- link_wrapper = AC(
342
+ link_wrapper = AC_ELEM(
514
343
  "link",
515
344
  {
516
- ET.QName(namespaces["ac"], "anchor"): target,
345
+ AC_ATTR("anchor"): target,
517
346
  },
518
347
  link_body,
519
348
  )
@@ -569,15 +398,38 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
569
398
  anchor.attrib["href"] = transformed_url
570
399
  return None
571
400
 
401
+ def _transform_status(self, color: str, caption: str) -> ET._Element:
402
+ macro_id = str(uuid.uuid4())
403
+ return AC_ELEM(
404
+ "structured-macro",
405
+ {
406
+ AC_ATTR("name"): "status",
407
+ AC_ATTR("schema-version"): "1",
408
+ AC_ATTR("macro-id"): macro_id,
409
+ },
410
+ AC_ELEM(
411
+ "parameter",
412
+ {AC_ATTR("name"): "colour"},
413
+ color.title(),
414
+ ),
415
+ AC_ELEM(
416
+ "parameter",
417
+ {AC_ATTR("name"): "title"},
418
+ caption,
419
+ ),
420
+ )
421
+
572
422
  def _transform_image(self, image: ET._Element) -> ET._Element:
573
423
  "Inserts an attached or external image."
574
424
 
575
425
  src = image.attrib.get("src")
576
-
577
426
  if not src:
578
427
  raise DocumentError("image lacks `src` attribute")
579
428
 
580
429
  caption = image.attrib.get("alt")
430
+ if caption is not None and src.startswith("urn:uuid:") and (color := status_images.get(src)) is not None:
431
+ return self._transform_status(color, caption)
432
+
581
433
  width = image.attrib.get("width")
582
434
  height = image.attrib.get("height")
583
435
  attrs = ImageAttributes(caption, width, height)
@@ -594,9 +446,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
594
446
  if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
595
447
  return self._transform_drawio_image(absolute_path, attrs)
596
448
  elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
597
- self.images.append(absolute_path)
598
- image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
599
- return self._create_drawio(image_filename, attrs)
449
+ return self._transform_drawio(absolute_path, attrs)
450
+ elif absolute_path.name.endswith(".mmd") or absolute_path.name.endswith(".mermaid"):
451
+ return self._transform_external_mermaid(absolute_path, attrs)
600
452
  else:
601
453
  return self._transform_attached_image(absolute_path, attrs)
602
454
 
@@ -604,26 +456,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
604
456
  "Emits Confluence Storage Format XHTML for an external image."
605
457
 
606
458
  attributes: dict[str, Any] = {
607
- ET.QName(namespaces["ac"], "align"): "center",
608
- ET.QName(namespaces["ac"], "layout"): "center",
459
+ AC_ATTR("align"): "center",
460
+ AC_ATTR("layout"): "center",
609
461
  }
610
462
  if attrs.width is not None:
611
- attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
463
+ attributes.update({AC_ATTR("width"): attrs.width})
612
464
  if attrs.height is not None:
613
- attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
465
+ attributes.update({AC_ATTR("height"): attrs.height})
614
466
 
615
467
  elements: list[ET._Element] = []
616
468
  elements.append(
617
- RI(
469
+ RI_ELEM(
618
470
  "url",
619
471
  # refers to an external image
620
- {ET.QName(namespaces["ri"], "value"): url},
472
+ {RI_ATTR("value"): url},
621
473
  )
622
474
  )
623
475
  if attrs.caption is not None:
624
- elements.append(AC("caption", HTML.p(attrs.caption)))
476
+ elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
625
477
 
626
- return AC("image", attributes, *elements)
478
+ return AC_ELEM("image", attributes, *elements)
627
479
 
628
480
  def _verify_image_path(self, path: Path) -> Optional[Path]:
629
481
  "Checks whether an image path is safe to use."
@@ -651,10 +503,28 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
651
503
  absolute_path = png_file
652
504
 
653
505
  self.images.append(absolute_path)
654
- return self._create_image(absolute_path, attrs)
506
+ image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
507
+ return self._create_attached_image(image_name, attrs)
508
+
509
+ def _transform_drawio(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
510
+ "Emits Confluence Storage Format XHTML for a draw.io diagram."
511
+
512
+ if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
513
+ raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
514
+
515
+ if self.options.render_drawio:
516
+ image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
517
+ image_hash = hashlib.md5(image_data).hexdigest()
518
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
519
+ self.embedded_files[image_filename] = image_data
520
+ return self._create_attached_image(image_filename, attrs)
521
+ else:
522
+ self.images.append(absolute_path)
523
+ image_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
524
+ return self._create_drawio(image_filename, attrs)
655
525
 
656
526
  def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
657
- "Emits Confluence Storage Format XHTML for a draw.io image."
527
+ "Emits Confluence Storage Format XHTML for a draw.io diagram embedded in a PNG or SVG image."
658
528
 
659
529
  if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
660
530
  raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
@@ -663,76 +533,74 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
663
533
  return self._transform_attached_image(absolute_path, attrs)
664
534
  else:
665
535
  # extract embedded editable diagram and upload as *.drawio
666
- image_data = extract_diagram(absolute_path)
536
+ image_data = drawio.extract_diagram(absolute_path)
667
537
  image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
668
- self.embedded_images[image_filename] = image_data
538
+ self.embedded_files[image_filename] = image_data
669
539
 
670
540
  return self._create_drawio(image_filename, attrs)
671
541
 
672
- def _create_image(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
542
+ def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ET._Element:
673
543
  "An image embedded into the page, linking to an attachment."
674
544
 
675
- image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
676
-
677
545
  attributes: dict[str, Any] = {
678
- ET.QName(namespaces["ac"], "align"): "center",
679
- ET.QName(namespaces["ac"], "layout"): "center",
546
+ AC_ATTR("align"): "center",
547
+ AC_ATTR("layout"): "center",
680
548
  }
681
549
  if attrs.width is not None:
682
- attributes.update({ET.QName(namespaces["ac"], "width"): attrs.width})
550
+ attributes.update({AC_ATTR("width"): attrs.width})
683
551
  if attrs.height is not None:
684
- attributes.update({ET.QName(namespaces["ac"], "height"): attrs.height})
552
+ attributes.update({AC_ATTR("height"): attrs.height})
685
553
 
686
554
  elements: list[ET._Element] = []
687
555
  elements.append(
688
- RI(
556
+ RI_ELEM(
689
557
  "attachment",
690
558
  # refers to an attachment uploaded alongside the page
691
- {ET.QName(namespaces["ri"], "filename"): image_name},
559
+ {RI_ATTR("filename"): image_name},
692
560
  )
693
561
  )
694
562
  if attrs.caption is not None:
695
- elements.append(AC("caption", HTML.p(attrs.caption)))
563
+ elements.append(AC_ELEM("caption", HTML.p(attrs.caption)))
696
564
 
697
- return AC("image", attributes, *elements)
565
+ return AC_ELEM("image", attributes, *elements)
698
566
 
699
567
  def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ET._Element:
700
568
  "A draw.io diagram embedded into the page, linking to an attachment."
701
569
 
702
570
  parameters: list[ET._Element] = [
703
- AC(
571
+ AC_ELEM(
704
572
  "parameter",
705
- {ET.QName(namespaces["ac"], "name"): "diagramName"},
573
+ {AC_ATTR("name"): "diagramName"},
706
574
  filename,
707
575
  ),
708
576
  ]
709
577
  if attrs.width is not None:
710
578
  parameters.append(
711
- AC(
579
+ AC_ELEM(
712
580
  "parameter",
713
- {ET.QName(namespaces["ac"], "name"): "width"},
581
+ {AC_ATTR("name"): "width"},
714
582
  attrs.width,
715
583
  ),
716
584
  )
717
585
  if attrs.height is not None:
718
586
  parameters.append(
719
- AC(
587
+ AC_ELEM(
720
588
  "parameter",
721
- {ET.QName(namespaces["ac"], "name"): "height"},
589
+ {AC_ATTR("name"): "height"},
722
590
  attrs.height,
723
591
  ),
724
592
  )
725
593
 
726
594
  local_id = str(uuid.uuid4())
727
595
  macro_id = str(uuid.uuid4())
728
- return AC(
596
+ return AC_ELEM(
729
597
  "structured-macro",
730
598
  {
731
- ET.QName(namespaces["ac"], "name"): "drawio",
732
- ET.QName(namespaces["ac"], "schema-version"): "1",
599
+ AC_ATTR("name"): "drawio",
600
+ AC_ATTR("schema-version"): "1",
733
601
  "data-layout": "default",
734
- ET.QName(namespaces["ac"], "local-id"): local_id,
735
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
602
+ AC_ATTR("local-id"): local_id,
603
+ AC_ATTR("macro-id"): macro_id,
736
604
  },
737
605
  *parameters,
738
606
  )
@@ -743,21 +611,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
743
611
  message = HTML.p("Missing image: ", HTML.code(path.as_posix()))
744
612
  if caption is not None:
745
613
  content = [
746
- AC(
614
+ AC_ELEM(
747
615
  "parameter",
748
- {ET.QName(namespaces["ac"], "name"): "title"},
616
+ {AC_ATTR("name"): "title"},
749
617
  caption,
750
618
  ),
751
- AC("rich-text-body", {}, message),
619
+ AC_ELEM("rich-text-body", {}, message),
752
620
  ]
753
621
  else:
754
- content = [AC("rich-text-body", {}, message)]
622
+ content = [AC_ELEM("rich-text-body", {}, message)]
755
623
 
756
- return AC(
624
+ return AC_ELEM(
757
625
  "structured-macro",
758
626
  {
759
- ET.QName(namespaces["ac"], "name"): "warning",
760
- ET.QName(namespaces["ac"], "schema-version"): "1",
627
+ AC_ATTR("name"): "warning",
628
+ AC_ATTR("schema-version"): "1",
761
629
  },
762
630
  *content,
763
631
  )
@@ -772,110 +640,125 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
772
640
  language = m.group(1)
773
641
  else:
774
642
  language = "none"
775
- if language not in _languages:
643
+ if language not in _LANGUAGES:
776
644
  language = "none"
777
645
  content: str = code.text or ""
778
646
  content = content.rstrip()
779
647
 
780
648
  if language == "mermaid":
781
- return self._transform_mermaid(content)
649
+ return self._transform_inline_mermaid(content)
782
650
 
783
- return AC(
651
+ return AC_ELEM(
784
652
  "structured-macro",
785
653
  {
786
- ET.QName(namespaces["ac"], "name"): "code",
787
- ET.QName(namespaces["ac"], "schema-version"): "1",
654
+ AC_ATTR("name"): "code",
655
+ AC_ATTR("schema-version"): "1",
788
656
  },
789
- AC(
657
+ AC_ELEM(
790
658
  "parameter",
791
- {ET.QName(namespaces["ac"], "name"): "theme"},
659
+ {AC_ATTR("name"): "theme"},
792
660
  "Default",
793
661
  ),
794
- AC(
662
+ AC_ELEM(
795
663
  "parameter",
796
- {ET.QName(namespaces["ac"], "name"): "language"},
664
+ {AC_ATTR("name"): "language"},
797
665
  language,
798
666
  ),
799
- AC("plain-text-body", ET.CDATA(content)),
667
+ AC_ELEM("plain-text-body", ET.CDATA(content)),
800
668
  )
801
669
 
802
- def _transform_mermaid(self, content: str) -> ET._Element:
803
- "Transforms a Mermaid diagram code block."
670
+ def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ET._Element:
671
+ "Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
672
+
673
+ if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
674
+ raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
804
675
 
805
676
  if self.options.render_mermaid:
806
- image_data = render_diagram(content, self.options.diagram_output_format)
807
- image_hash = hashlib.md5(image_data).hexdigest()
808
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
809
- self.embedded_images[image_filename] = image_data
810
- return AC(
811
- "image",
812
- {
813
- ET.QName(namespaces["ac"], "align"): "center",
814
- ET.QName(namespaces["ac"], "layout"): "center",
815
- },
816
- RI(
817
- "attachment",
818
- {ET.QName(namespaces["ri"], "filename"): image_filename},
819
- ),
820
- )
677
+ with open(absolute_path, "r", encoding="utf-8") as f:
678
+ content = f.read()
679
+ return self._create_mermaid_image(content, attrs)
821
680
  else:
822
- local_id = str(uuid.uuid4())
823
- macro_id = str(uuid.uuid4())
824
- return AC(
825
- "structured-macro",
826
- {
827
- ET.QName(namespaces["ac"], "name"): "macro-diagram",
828
- ET.QName(namespaces["ac"], "schema-version"): "1",
829
- "data-layout": "default",
830
- ET.QName(namespaces["ac"], "local-id"): local_id,
831
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
832
- },
833
- AC(
834
- "parameter",
835
- {ET.QName(namespaces["ac"], "name"): "sourceType"},
836
- "MacroBody",
837
- ),
838
- AC(
839
- "parameter",
840
- {ET.QName(namespaces["ac"], "name"): "attachmentPageId"},
841
- ),
842
- AC(
843
- "parameter",
844
- {ET.QName(namespaces["ac"], "name"): "syntax"},
845
- "Mermaid",
846
- ),
847
- AC(
848
- "parameter",
849
- {ET.QName(namespaces["ac"], "name"): "attachmentId"},
850
- ),
851
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "url"}),
852
- AC("plain-text-body", ET.CDATA(content)),
853
- )
681
+ self.images.append(absolute_path)
682
+ mermaid_filename = attachment_name(path_relative_to(absolute_path, self.base_dir))
683
+ return self._create_mermaid_embed(mermaid_filename)
684
+
685
+ def _transform_inline_mermaid(self, content: str) -> ET._Element:
686
+ "Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a code block."
687
+
688
+ if self.options.render_mermaid:
689
+ return self._create_mermaid_image(content, ImageAttributes(None, None, None))
690
+ else:
691
+ mermaid_data = content.encode("utf-8")
692
+ mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
693
+ mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
694
+ self.embedded_files[mermaid_filename] = mermaid_data
695
+ return self._create_mermaid_embed(mermaid_filename)
696
+
697
+ def _create_mermaid_image(self, content: str, attrs: ImageAttributes) -> ET._Element:
698
+ "A rendered Mermaid diagram, linking to an attachment uploaded as an image."
699
+
700
+ image_data = mermaid.render_diagram(content, self.options.diagram_output_format)
701
+ image_hash = hashlib.md5(image_data).hexdigest()
702
+ image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
703
+ self.embedded_files[image_filename] = image_data
704
+ return self._create_attached_image(image_filename, attrs)
705
+
706
+ def _create_mermaid_embed(self, filename: str) -> ET._Element:
707
+ "A Mermaid diagram, linking to an attachment that captures the Mermaid source."
708
+
709
+ local_id = str(uuid.uuid4())
710
+ macro_id = str(uuid.uuid4())
711
+ return AC_ELEM(
712
+ "structured-macro",
713
+ {
714
+ AC_ATTR("name"): "mermaid-cloud",
715
+ AC_ATTR("schema-version"): "1",
716
+ "data-layout": "default",
717
+ AC_ATTR("local-id"): local_id,
718
+ AC_ATTR("macro-id"): macro_id,
719
+ },
720
+ AC_ELEM(
721
+ "parameter",
722
+ {AC_ATTR("name"): "filename"},
723
+ filename,
724
+ ),
725
+ AC_ELEM(
726
+ "parameter",
727
+ {AC_ATTR("name"): "toolbar"},
728
+ "bottom",
729
+ ),
730
+ AC_ELEM(
731
+ "parameter",
732
+ {AC_ATTR("name"): "zoom"},
733
+ "fit",
734
+ ),
735
+ AC_ELEM("parameter", {AC_ATTR("name"): "revision"}, "1"),
736
+ )
854
737
 
855
738
  def _transform_toc(self, code: ET._Element) -> ET._Element:
856
739
  "Creates a table of contents, constructed from headings in the document."
857
740
 
858
- return AC(
741
+ return AC_ELEM(
859
742
  "structured-macro",
860
743
  {
861
- ET.QName(namespaces["ac"], "name"): "toc",
862
- ET.QName(namespaces["ac"], "schema-version"): "1",
744
+ AC_ATTR("name"): "toc",
745
+ AC_ATTR("schema-version"): "1",
863
746
  },
864
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "outline"}, "clear"),
865
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "style"}, "default"),
747
+ AC_ELEM("parameter", {AC_ATTR("name"): "outline"}, "clear"),
748
+ AC_ELEM("parameter", {AC_ATTR("name"): "style"}, "default"),
866
749
  )
867
750
 
868
751
  def _transform_listing(self, code: ET._Element) -> ET._Element:
869
752
  "Creates a list of child pages."
870
753
 
871
- return AC(
754
+ return AC_ELEM(
872
755
  "structured-macro",
873
756
  {
874
- ET.QName(namespaces["ac"], "name"): "children",
875
- ET.QName(namespaces["ac"], "schema-version"): "2",
757
+ AC_ATTR("name"): "children",
758
+ AC_ATTR("schema-version"): "2",
876
759
  "data-layout": "default",
877
760
  },
878
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "allChildren"}, "true"),
761
+ AC_ELEM("parameter", {AC_ATTR("name"): "allChildren"}, "true"),
879
762
  )
880
763
 
881
764
  def _transform_admonition(self, elem: ET._Element) -> ET._Element:
@@ -907,21 +790,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
907
790
  # <p class="admonition-title">Note</p>
908
791
  if "admonition-title" in elem[0].attrib.get("class", "").split(" "):
909
792
  content = [
910
- AC(
793
+ AC_ELEM(
911
794
  "parameter",
912
- {ET.QName(namespaces["ac"], "name"): "title"},
795
+ {AC_ATTR("name"): "title"},
913
796
  elem[0].text or "",
914
797
  ),
915
- AC("rich-text-body", {}, *list(elem[1:])),
798
+ AC_ELEM("rich-text-body", {}, *list(elem[1:])),
916
799
  ]
917
800
  else:
918
- content = [AC("rich-text-body", {}, *list(elem))]
801
+ content = [AC_ELEM("rich-text-body", {}, *list(elem))]
919
802
 
920
- return AC(
803
+ return AC_ELEM(
921
804
  "structured-macro",
922
805
  {
923
- ET.QName(namespaces["ac"], "name"): class_name,
924
- ET.QName(namespaces["ac"], "schema-version"): "1",
806
+ AC_ATTR("name"): class_name,
807
+ AC_ATTR("schema-version"): "1",
925
808
  },
926
809
  *content,
927
810
  )
@@ -995,10 +878,10 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
995
878
  """
996
879
  Creates an info, tip, note or warning panel from a GitHub or GitLab alert.
997
880
 
998
- Transforms
999
- [GitHub alert](https://docs.github.com/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts)
1000
- or [GitLab alert](https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes)
1001
- syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
881
+ Transforms GitHub alert or GitLab alert syntax into one of the Confluence structured macros *info*, *tip*, *note*, or *warning*.
882
+
883
+ :see: https://docs.github.com/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
884
+ :see: https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes
1002
885
  """
1003
886
 
1004
887
  content = elem[0]
@@ -1012,22 +895,22 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1012
895
  self.visit(e)
1013
896
 
1014
897
  content.text = content.text[skip:]
1015
- return AC(
898
+ return AC_ELEM(
1016
899
  "structured-macro",
1017
900
  {
1018
- ET.QName(namespaces["ac"], "name"): class_name,
1019
- ET.QName(namespaces["ac"], "schema-version"): "1",
901
+ AC_ATTR("name"): class_name,
902
+ AC_ATTR("schema-version"): "1",
1020
903
  },
1021
- AC("rich-text-body", {}, *list(elem)),
904
+ AC_ELEM("rich-text-body", {}, *list(elem)),
1022
905
  )
1023
906
 
1024
907
  def _transform_section(self, elem: ET._Element) -> ET._Element:
1025
908
  """
1026
909
  Creates a collapsed section.
1027
910
 
1028
- Transforms
1029
- [GitHub collapsed section](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections)
1030
- syntax into the Confluence structured macro *expand*.
911
+ Transforms a GitHub collapsed section syntax into the Confluence structured macro *expand*.
912
+
913
+ :see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/organizing-information-with-collapsed-sections
1031
914
  """
1032
915
 
1033
916
  if elem[0].tag != "summary":
@@ -1035,23 +918,24 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1035
918
  if elem[0].tail is not None:
1036
919
  raise DocumentError('expected: attribute `markdown="1"` on `<details>`')
1037
920
 
1038
- summary = "".join(elem[0].itertext()).strip()
921
+ summary = element_to_text(elem[0])
1039
922
  elem.remove(elem[0])
1040
923
 
924
+ # transform Markdown to Confluence within collapsed section content
1041
925
  self.visit(elem)
1042
926
 
1043
- return AC(
927
+ return AC_ELEM(
1044
928
  "structured-macro",
1045
929
  {
1046
- ET.QName(namespaces["ac"], "name"): "expand",
1047
- ET.QName(namespaces["ac"], "schema-version"): "1",
930
+ AC_ATTR("name"): "expand",
931
+ AC_ATTR("schema-version"): "1",
1048
932
  },
1049
- AC(
933
+ AC_ELEM(
1050
934
  "parameter",
1051
- {ET.QName(namespaces["ac"], "name"): "title"},
935
+ {AC_ATTR("name"): "title"},
1052
936
  summary,
1053
937
  ),
1054
- AC("rich-text-body", {}, *list(elem)),
938
+ AC_ELEM("rich-text-body", {}, *list(elem)),
1055
939
  )
1056
940
 
1057
941
  def _transform_emoji(self, elem: ET._Element) -> ET._Element:
@@ -1059,20 +943,18 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1059
943
  Inserts an inline emoji character.
1060
944
  """
1061
945
 
1062
- shortname = elem.attrib.get("data-emoji-shortname", "")
1063
- unicode = elem.attrib.get("data-emoji-unicode", None)
946
+ shortname = elem.attrib.get("data-shortname", "")
947
+ unicode = elem.attrib.get("data-unicode", None)
1064
948
  alt = elem.text or ""
1065
949
 
1066
950
  # <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="&#128521;"/>
1067
- # <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_plus_sign:" ac:emoji-id="2795" ac:emoji-fallback="&#10133;"/>
1068
- # <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_minus_sign:" ac:emoji-id="2796" ac:emoji-fallback="&#10134;"/>
1069
- return AC(
951
+ return AC_ELEM(
1070
952
  "emoticon",
1071
953
  {
1072
- ET.QName(namespaces["ac"], "name"): shortname,
1073
- ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
1074
- ET.QName(namespaces["ac"], "emoji-id"): unicode,
1075
- ET.QName(namespaces["ac"], "emoji-fallback"): alt,
954
+ AC_ATTR("name"): shortname,
955
+ AC_ATTR("emoji-shortname"): f":{shortname}:",
956
+ AC_ATTR("emoji-id"): unicode,
957
+ AC_ATTR("emoji-fallback"): alt,
1076
958
  },
1077
959
  )
1078
960
 
@@ -1091,20 +973,20 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1091
973
 
1092
974
  local_id = str(uuid.uuid4())
1093
975
  macro_id = str(uuid.uuid4())
1094
- macro = AC(
976
+ macro = AC_ELEM(
1095
977
  "structured-macro",
1096
978
  {
1097
- ET.QName(namespaces["ac"], "name"): "eazy-math-inline",
1098
- ET.QName(namespaces["ac"], "schema-version"): "1",
1099
- ET.QName(namespaces["ac"], "local-id"): local_id,
1100
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
979
+ AC_ATTR("name"): "eazy-math-inline",
980
+ AC_ATTR("schema-version"): "1",
981
+ AC_ATTR("local-id"): local_id,
982
+ AC_ATTR("macro-id"): macro_id,
1101
983
  },
1102
- AC(
984
+ AC_ELEM(
1103
985
  "parameter",
1104
- {ET.QName(namespaces["ac"], "name"): "body"},
986
+ {AC_ATTR("name"): "body"},
1105
987
  content,
1106
988
  ),
1107
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
989
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
1108
990
  )
1109
991
  macro.tail = elem.tail # chain sibling text node that immediately follows original element
1110
992
  return macro
@@ -1125,21 +1007,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1125
1007
  local_id = str(uuid.uuid4())
1126
1008
  macro_id = str(uuid.uuid4())
1127
1009
 
1128
- return AC(
1010
+ return AC_ELEM(
1129
1011
  "structured-macro",
1130
1012
  {
1131
- ET.QName(namespaces["ac"], "name"): "easy-math-block",
1132
- ET.QName(namespaces["ac"], "schema-version"): "1",
1013
+ AC_ATTR("name"): "easy-math-block",
1014
+ AC_ATTR("schema-version"): "1",
1133
1015
  "data-layout": "default",
1134
- ET.QName(namespaces["ac"], "local-id"): local_id,
1135
- ET.QName(namespaces["ac"], "macro-id"): macro_id,
1016
+ AC_ATTR("local-id"): local_id,
1017
+ AC_ATTR("macro-id"): macro_id,
1136
1018
  },
1137
- AC(
1019
+ AC_ELEM(
1138
1020
  "parameter",
1139
- {ET.QName(namespaces["ac"], "name"): "body"},
1021
+ {AC_ATTR("name"): "body"},
1140
1022
  content,
1141
1023
  ),
1142
- AC("parameter", {ET.QName(namespaces["ac"], "name"): "align"}, "center"),
1024
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, "center"),
1143
1025
  )
1144
1026
 
1145
1027
  def _transform_footnote_ref(self, elem: ET._Element) -> None:
@@ -1171,26 +1053,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1171
1053
  elem.remove(link)
1172
1054
 
1173
1055
  # build new anchor for footnote reference
1174
- ref_anchor = AC(
1056
+ ref_anchor = AC_ELEM(
1175
1057
  "structured-macro",
1176
1058
  {
1177
- ET.QName(namespaces["ac"], "name"): "anchor",
1178
- ET.QName(namespaces["ac"], "schema-version"): "1",
1059
+ AC_ATTR("name"): "anchor",
1060
+ AC_ATTR("schema-version"): "1",
1179
1061
  },
1180
- AC(
1062
+ AC_ELEM(
1181
1063
  "parameter",
1182
- {ET.QName(namespaces["ac"], "name"): ""},
1064
+ {AC_ATTR("name"): ""},
1183
1065
  f"footnote-ref-{footnote_ref}",
1184
1066
  ),
1185
1067
  )
1186
1068
 
1187
1069
  # build new link to footnote definition at the end of page
1188
- def_link = AC(
1070
+ def_link = AC_ELEM(
1189
1071
  "link",
1190
1072
  {
1191
- ET.QName(namespaces["ac"], "anchor"): f"footnote-def-{footnote_def}",
1073
+ AC_ATTR("anchor"): f"footnote-def-{footnote_def}",
1192
1074
  },
1193
- AC("link-body", ET.CDATA(text)),
1075
+ AC_ELEM("link-body", ET.CDATA(text)),
1194
1076
  )
1195
1077
 
1196
1078
  # append children synthesized for Confluence
@@ -1233,26 +1115,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1233
1115
  paragraph.remove(ref_anchor)
1234
1116
 
1235
1117
  # build new anchor for footnote definition
1236
- def_anchor = AC(
1118
+ def_anchor = AC_ELEM(
1237
1119
  "structured-macro",
1238
1120
  {
1239
- ET.QName(namespaces["ac"], "name"): "anchor",
1240
- ET.QName(namespaces["ac"], "schema-version"): "1",
1121
+ AC_ATTR("name"): "anchor",
1122
+ AC_ATTR("schema-version"): "1",
1241
1123
  },
1242
- AC(
1124
+ AC_ELEM(
1243
1125
  "parameter",
1244
- {ET.QName(namespaces["ac"], "name"): ""},
1126
+ {AC_ATTR("name"): ""},
1245
1127
  f"footnote-def-{footnote_def}",
1246
1128
  ),
1247
1129
  )
1248
1130
 
1249
1131
  # build new link to footnote reference in page body
1250
- ref_link = AC(
1132
+ ref_link = AC_ELEM(
1251
1133
  "link",
1252
1134
  {
1253
- ET.QName(namespaces["ac"], "anchor"): f"footnote-ref-{footnote_ref}",
1135
+ AC_ATTR("anchor"): f"footnote-ref-{footnote_ref}",
1254
1136
  },
1255
- AC("link-body", ET.CDATA("↩")),
1137
+ AC_ELEM("link-body", ET.CDATA("↩")),
1256
1138
  )
1257
1139
 
1258
1140
  # append children synthesized for Confluence
@@ -1261,6 +1143,52 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1261
1143
  paragraph.text = None
1262
1144
  paragraph.append(ref_link)
1263
1145
 
1146
+ def _transform_tasklist(self, elem: ET._Element) -> ET._Element:
1147
+ """
1148
+ Transforms a list of tasks into an action widget.
1149
+
1150
+ :see: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-tasklists
1151
+ """
1152
+
1153
+ if elem.tag != "ul":
1154
+ raise DocumentError("expected: `<ul>` as the HTML element for a tasklist")
1155
+
1156
+ for item in elem:
1157
+ if item.tag != "li":
1158
+ raise DocumentError("expected: `<li>` as the HTML element for a task")
1159
+ if not element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]):
1160
+ raise DocumentError("expected: each `<li>` in a task list starting with [ ] or [x]")
1161
+
1162
+ # transform Markdown to Confluence within tasklist content
1163
+ self.visit(elem)
1164
+
1165
+ tasks: list[ET._Element] = []
1166
+ for index, item in enumerate(elem, start=1):
1167
+ if item.text is None:
1168
+ raise NotImplementedError("pre-condition check not exhaustive")
1169
+ match = re.match(r"^\[([x X])\]", item.text)
1170
+ if match is None:
1171
+ raise NotImplementedError("pre-condition check not exhaustive")
1172
+
1173
+ status = "incomplete" if match.group(1).isspace() else "complete"
1174
+
1175
+ body = AC_ELEM("task-body")
1176
+ body.text = item.text[3:]
1177
+ for child in item:
1178
+ body.append(child)
1179
+ tasks.append(
1180
+ AC_ELEM(
1181
+ "task",
1182
+ {},
1183
+ AC_ELEM("task-id", str(index)),
1184
+ AC_ELEM("task-uuid", str(uuid.uuid4())),
1185
+ AC_ELEM("task-status", status),
1186
+ body,
1187
+ ),
1188
+ )
1189
+ return AC_ELEM("task-list", {}, *tasks)
1190
+
1191
+ @override
1264
1192
  def transform(self, child: ET._Element) -> Optional[ET._Element]:
1265
1193
  """
1266
1194
  Transforms an HTML element tree obtained from a Markdown document into a Confluence Storage Format element tree.
@@ -1289,51 +1217,74 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1289
1217
  self._transform_heading(child)
1290
1218
  return None
1291
1219
 
1292
- # <p><img src="..." /></p>
1293
- if child.tag == "p" and len(child) == 1 and child[0].tag == "img":
1294
- return self._transform_image(child[0])
1295
-
1296
- # <p>[[_TOC_]]</p>
1297
- # <p>[TOC]</p>
1298
- elif child.tag == "p" and "".join(child.itertext()) in ["[[TOC]]", "[TOC]"]:
1299
- return self._transform_toc(child)
1300
-
1301
- # <p>[[_LISTING_]]</p>
1302
- elif child.tag == "p" and "".join(child.itertext()) in ["[[LISTING]]", "[LISTING]"]:
1303
- return self._transform_listing(child)
1304
-
1305
- # <div class="admonition note">
1306
- # <p class="admonition-title">Note</p>
1307
- # <p>...</p>
1308
- # </div>
1309
- #
1310
- # --- OR ---
1311
- #
1312
- # <div class="admonition note">
1313
1220
  # <p>...</p>
1314
- # </div>
1315
- elif child.tag == "div" and "admonition" in child.attrib.get("class", ""):
1316
- return self._transform_admonition(child)
1317
-
1318
- # Alerts in GitHub
1319
- # <blockquote>
1320
- # <p>[!TIP] ...</p>
1321
- # </blockquote>
1322
- elif child.tag == "blockquote" and len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1323
- return self._transform_github_alert(child)
1324
-
1325
- # Alerts in GitLab
1326
- # <blockquote>
1327
- # <p>DISCLAIMER: ...</p>
1328
- # </blockquote>
1329
- elif (
1330
- child.tag == "blockquote"
1331
- and len(child) > 0
1332
- and child[0].tag == "p"
1333
- and child[0].text is not None
1334
- and starts_with_any(child[0].text, ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"])
1335
- ):
1336
- return self._transform_gitlab_alert(child)
1221
+ if child.tag == "p":
1222
+ # <p><img src="..." /></p>
1223
+ if len(child) == 1 and child[0].tag == "img":
1224
+ return self._transform_image(child[0])
1225
+
1226
+ # <p>[[_TOC_]]</p> (represented as <p>[[<em>TOC</em>]]</p>)
1227
+ # <p>[TOC]</p>
1228
+ elif element_to_text(child) in ["[[TOC]]", "[TOC]"]:
1229
+ return self._transform_toc(child)
1230
+
1231
+ # <p>[[_LISTING_]]</p> (represented as <p>[[<em>LISTING</em>]]</p>)
1232
+ elif element_to_text(child) in ["[[LISTING]]", "[LISTING]"]:
1233
+ return self._transform_listing(child)
1234
+
1235
+ # <div>...</div>
1236
+ elif child.tag == "div":
1237
+ classes = child.attrib.get("class", "").split(" ")
1238
+
1239
+ # <div class="arithmatex">...</div>
1240
+ if "arithmatex" in classes:
1241
+ return self._transform_block_math(child)
1242
+
1243
+ # <div><ac:structured-macro ...>...</ac:structured-macro></div>
1244
+ elif "csf" in classes:
1245
+ if len(child) != 1:
1246
+ raise DocumentError("expected: single child in Confluence Storage Format block")
1247
+
1248
+ return child[0]
1249
+
1250
+ # <div class="footnote">
1251
+ # <hr/>
1252
+ # <ol>
1253
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1254
+ # </ol>
1255
+ # </div>
1256
+ elif "footnote" in classes:
1257
+ self._transform_footnote_def(child)
1258
+ return None
1259
+
1260
+ # <div class="admonition note">
1261
+ # <p class="admonition-title">Note</p>
1262
+ # <p>...</p>
1263
+ # </div>
1264
+ #
1265
+ # --- OR ---
1266
+ #
1267
+ # <div class="admonition note">
1268
+ # <p>...</p>
1269
+ # </div>
1270
+ elif "admonition" in classes:
1271
+ return self._transform_admonition(child)
1272
+
1273
+ # <blockquote>...</blockquote>
1274
+ elif child.tag == "blockquote":
1275
+ # Alerts in GitHub
1276
+ # <blockquote>
1277
+ # <p>[!TIP] ...</p>
1278
+ # </blockquote>
1279
+ if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1280
+ return self._transform_github_alert(child)
1281
+
1282
+ # Alerts in GitLab
1283
+ # <blockquote>
1284
+ # <p>DISCLAIMER: ...</p>
1285
+ # </blockquote>
1286
+ elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
1287
+ return self._transform_gitlab_alert(child)
1337
1288
 
1338
1289
  # <details markdown="1">
1339
1290
  # <summary>...</summary>
@@ -1342,6 +1293,17 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1342
1293
  elif child.tag == "details" and len(child) > 1 and child[0].tag == "summary":
1343
1294
  return self._transform_section(child)
1344
1295
 
1296
+ # <ul>
1297
+ # <li>[ ] ...</li>
1298
+ # <li>[x] ...</li>
1299
+ # </ul>
1300
+ elif child.tag == "ul" and len(child) > 0 and element_text_starts_with_any(child[0], ["[ ]", "[x]", "[X]"]):
1301
+ return self._transform_tasklist(child)
1302
+
1303
+ # <pre><code class="language-java"> ... </code></pre>
1304
+ elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1305
+ return self._transform_code_block(child[0])
1306
+
1345
1307
  # <img src="..." alt="..." />
1346
1308
  elif child.tag == "img":
1347
1309
  return self._transform_image(child)
@@ -1350,36 +1312,26 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1350
1312
  elif child.tag == "a":
1351
1313
  return self._transform_link(child)
1352
1314
 
1353
- # <pre><code class="language-java"> ... </code></pre>
1354
- elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1355
- return self._transform_code_block(child[0])
1315
+ # <span>...</span>
1316
+ elif child.tag == "span":
1317
+ classes = child.attrib.get("class", "").split(" ")
1356
1318
 
1357
- # <span data-emoji-shortname="..." data-emoji-unicode="...">...</span>
1358
- elif child.tag == "span" and child.attrib.has_key("data-emoji-shortname"):
1359
- return self._transform_emoji(child)
1360
-
1361
- # <div class="arithmatex">...</div>
1362
- elif child.tag == "div" and "arithmatex" in child.attrib.get("class", "").split(" "):
1363
- return self._transform_block_math(child)
1364
-
1365
- # <span class="arithmatex">...</span>
1366
- elif child.tag == "span" and "arithmatex" in child.attrib.get("class", "").split(" "):
1367
- return self._transform_inline_math(child)
1319
+ # <span class="arithmatex">...</span>
1320
+ if "arithmatex" in classes:
1321
+ return self._transform_inline_math(child)
1368
1322
 
1369
1323
  # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1370
1324
  elif child.tag == "sup" and child.attrib.get("id", "").startswith("fnref:"):
1371
1325
  self._transform_footnote_ref(child)
1372
1326
  return None
1373
1327
 
1374
- # <div class="footnote">
1375
- # <hr/>
1376
- # <ol>
1377
- # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1378
- # </ol>
1379
- # </div>
1380
- elif child.tag == "div" and "footnote" in child.attrib.get("class", "").split(" "):
1381
- self._transform_footnote_def(child)
1382
- return None
1328
+ # <input type="date" value="1984-01-01" />
1329
+ elif child.tag == "input" and child.attrib.get("type", "") == "date":
1330
+ return HTML("time", {"datetime": child.attrib.get("value", "")})
1331
+
1332
+ # <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
1333
+ elif child.tag == "x-emoji":
1334
+ return self._transform_emoji(child)
1383
1335
 
1384
1336
  return None
1385
1337
 
@@ -1388,48 +1340,6 @@ class DocumentError(RuntimeError):
1388
1340
  "Raised when a converted Markdown document has an unexpected element or attribute."
1389
1341
 
1390
1342
 
1391
- @dataclass
1392
- class ConfluencePageID:
1393
- page_id: str
1394
-
1395
-
1396
- @dataclass
1397
- class ConfluenceQualifiedID:
1398
- page_id: str
1399
- space_key: str
1400
-
1401
-
1402
- @dataclass
1403
- class ConfluenceDocumentOptions:
1404
- """
1405
- Options that control the generated page content.
1406
-
1407
- :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
1408
- plain text; when false, raise an exception.
1409
- :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
1410
- conversion rules for the identifier.
1411
- :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
1412
- :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
1413
- :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
1414
- :param prefer_raster: Whether to choose PNG files over SVG files when available.
1415
- :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
1416
- :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
1417
- :param diagram_output_format: Target image format for diagrams.
1418
- :param webui_links: When true, convert relative URLs to Confluence Web UI links.
1419
- """
1420
-
1421
- ignore_invalid_url: bool = False
1422
- heading_anchors: bool = False
1423
- generated_by: Optional[str] = "This page has been generated with a tool."
1424
- root_page_id: Optional[ConfluencePageID] = None
1425
- keep_hierarchy: bool = False
1426
- prefer_raster: bool = True
1427
- render_drawio: bool = False
1428
- render_mermaid: bool = False
1429
- diagram_output_format: Literal["png", "svg"] = "png"
1430
- webui_links: bool = False
1431
-
1432
-
1433
1343
  class ConversionError(RuntimeError):
1434
1344
  "Raised when a Markdown document cannot be converted to Confluence Storage Format."
1435
1345
 
@@ -1478,10 +1388,18 @@ class ConfluenceDocument:
1478
1388
  site_metadata: ConfluenceSiteMetadata,
1479
1389
  page_metadata: ConfluencePageCollection,
1480
1390
  ) -> None:
1391
+ "Converts a single Markdown document to Confluence Storage Format."
1392
+
1481
1393
  self.options = options
1482
1394
 
1395
+ # register auxiliary URL substitutions
1396
+ lines: list[str] = []
1397
+ for data_uri, color in status_images.items():
1398
+ lines.append(f"[STATUS-{color.upper()}]: {data_uri}")
1399
+ lines.append(document.text)
1400
+
1483
1401
  # convert to HTML
1484
- html = markdown_to_html(document.text)
1402
+ html = markdown_to_html("\n".join(lines))
1485
1403
 
1486
1404
  # parse Markdown document
1487
1405
  if self.options.generated_by is not None:
@@ -1507,24 +1425,19 @@ class ConfluenceDocument:
1507
1425
  raise ConversionError(path) from ex
1508
1426
 
1509
1427
  converter = ConfluenceStorageFormatConverter(
1510
- ConfluenceConverterOptions(
1511
- ignore_invalid_url=self.options.ignore_invalid_url,
1512
- heading_anchors=self.options.heading_anchors,
1513
- prefer_raster=self.options.prefer_raster,
1514
- render_drawio=self.options.render_drawio,
1515
- render_mermaid=self.options.render_mermaid,
1516
- diagram_output_format=self.options.diagram_output_format,
1517
- webui_links=self.options.webui_links,
1518
- ),
1428
+ ConfluenceConverterOptions(**{field.name: getattr(self.options, field.name) for field in dataclasses.fields(ConfluenceConverterOptions)}),
1519
1429
  path,
1520
1430
  root_dir,
1521
1431
  site_metadata,
1522
1432
  page_metadata,
1523
1433
  )
1524
- converter.visit(self.root)
1434
+ try:
1435
+ converter.visit(self.root)
1436
+ except DocumentError as ex:
1437
+ raise ConversionError(path) from ex
1525
1438
  self.links = converter.links
1526
1439
  self.images = converter.images
1527
- self.embedded_images = converter.embedded_images
1440
+ self.embedded_files = converter.embedded_files
1528
1441
 
1529
1442
  self.title = document.title or converter.toc.get_title()
1530
1443
  self.labels = document.tags
@@ -1564,41 +1477,3 @@ def attachment_name(ref: Union[Path, str]) -> str:
1564
1477
 
1565
1478
  parts = [replace_part(p) for p in path.parts]
1566
1479
  return Path(*parts).as_posix().replace("/", "_")
1567
-
1568
-
1569
- def elements_to_string(root: ET._Element) -> str:
1570
- xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
1571
- m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
1572
- if m:
1573
- return m.group(1)
1574
- else:
1575
- raise ValueError("expected: Confluence content")
1576
-
1577
-
1578
- def _content_to_string(dtd_path: Path, content: str) -> str:
1579
- parser = ET.XMLParser(
1580
- remove_blank_text=True,
1581
- remove_comments=True,
1582
- strip_cdata=False,
1583
- load_dtd=True,
1584
- )
1585
-
1586
- ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in namespaces.items())
1587
-
1588
- data = [
1589
- '<?xml version="1.0"?>',
1590
- f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path.as_posix()}"><root{ns_attr_list}>',
1591
- ]
1592
- data.append(content)
1593
- data.append("</root>")
1594
-
1595
- tree = ET.fromstringlist(data, parser=parser)
1596
- return ET.tostring(tree, pretty_print=True).decode("utf-8")
1597
-
1598
-
1599
- def content_to_string(content: str) -> str:
1600
- "Converts a Confluence Storage Format document returned by the API into a readable XML document."
1601
-
1602
- resource_path = resources.files(__package__).joinpath("entities.dtd")
1603
- with resources.as_file(resource_path) as dtd_path:
1604
- return _content_to_string(dtd_path, content)