markdown-to-confluence 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/METADATA +160 -11
  2. markdown_to_confluence-0.5.3.dist-info/RECORD +55 -0
  3. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/licenses/LICENSE +1 -1
  4. md2conf/__init__.py +2 -2
  5. md2conf/__main__.py +94 -29
  6. md2conf/api.py +55 -10
  7. md2conf/attachment.py +72 -0
  8. md2conf/coalesce.py +43 -0
  9. md2conf/collection.py +1 -1
  10. md2conf/{extra.py → compatibility.py} +1 -1
  11. md2conf/converter.py +417 -590
  12. md2conf/csf.py +13 -11
  13. md2conf/drawio/__init__.py +0 -0
  14. md2conf/drawio/extension.py +116 -0
  15. md2conf/{drawio.py → drawio/render.py} +1 -1
  16. md2conf/emoticon.py +3 -3
  17. md2conf/environment.py +2 -2
  18. md2conf/extension.py +78 -0
  19. md2conf/external.py +49 -0
  20. md2conf/formatting.py +135 -0
  21. md2conf/frontmatter.py +70 -0
  22. md2conf/image.py +127 -0
  23. md2conf/latex.py +7 -186
  24. md2conf/local.py +8 -8
  25. md2conf/markdown.py +1 -1
  26. md2conf/matcher.py +1 -1
  27. md2conf/mermaid/__init__.py +0 -0
  28. md2conf/mermaid/config.py +20 -0
  29. md2conf/mermaid/extension.py +109 -0
  30. md2conf/{mermaid.py → mermaid/render.py} +10 -38
  31. md2conf/mermaid/scanner.py +55 -0
  32. md2conf/metadata.py +1 -1
  33. md2conf/options.py +116 -0
  34. md2conf/plantuml/__init__.py +0 -0
  35. md2conf/plantuml/config.py +20 -0
  36. md2conf/plantuml/extension.py +158 -0
  37. md2conf/plantuml/render.py +139 -0
  38. md2conf/plantuml/scanner.py +56 -0
  39. md2conf/png.py +202 -0
  40. md2conf/processor.py +32 -11
  41. md2conf/publisher.py +17 -18
  42. md2conf/scanner.py +31 -128
  43. md2conf/serializer.py +2 -2
  44. md2conf/svg.py +341 -0
  45. md2conf/text.py +1 -1
  46. md2conf/toc.py +1 -1
  47. md2conf/uri.py +1 -1
  48. md2conf/xml.py +1 -1
  49. markdown_to_confluence-0.5.1.dist-info/RECORD +0 -35
  50. md2conf/domain.py +0 -52
  51. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/WHEEL +0 -0
  52. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/entry_points.txt +0 -0
  53. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/top_level.txt +0 -0
  54. {markdown_to_confluence-0.5.1.dist-info → markdown_to_confluence-0.5.3.dist-info}/zip-safe +0 -0
md2conf/converter.py CHANGED
@@ -1,13 +1,12 @@
1
1
  """
2
2
  Publish Markdown files to Confluence wiki.
3
3
 
4
- Copyright 2022-2025, Levente Hunyadi
4
+ Copyright 2022-2026, Levente Hunyadi
5
5
 
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
- import dataclasses
10
- import enum
9
+ import copy
11
10
  import hashlib
12
11
  import logging
13
12
  import os.path
@@ -16,24 +15,30 @@ import uuid
16
15
  from abc import ABC, abstractmethod
17
16
  from dataclasses import dataclass
18
17
  from pathlib import Path
19
- from typing import ClassVar, Literal
18
+ from typing import ClassVar
20
19
  from urllib.parse import ParseResult, quote_plus, urlparse
21
20
 
22
21
  import lxml.etree as ET
23
- from cattrs import BaseValidationError
24
22
 
25
- from . import drawio, mermaid
23
+ from .attachment import AttachmentCatalog, EmbeddedFileData, ImageData, attachment_name
24
+ from .coalesce import coalesce
26
25
  from .collection import ConfluencePageCollection
26
+ from .compatibility import override, path_relative_to
27
27
  from .csf import AC_ATTR, AC_ELEM, HTML, RI_ATTR, RI_ELEM, ParseError, elements_from_strings, elements_to_string, normalize_inline
28
- from .domain import ConfluenceDocumentOptions, ConfluencePageID
28
+ from .drawio.extension import DrawioExtension
29
29
  from .emoticon import emoji_to_emoticon
30
30
  from .environment import PageError
31
- from .extra import override, path_relative_to
32
- from .latex import get_png_dimensions, remove_png_chunks, render_latex
31
+ from .extension import ExtensionOptions, MarketplaceExtension
32
+ from .formatting import FormattingContext, ImageAlignment, ImageAttributes
33
+ from .image import ImageGenerator, ImageGeneratorOptions
34
+ from .latex import render_latex
33
35
  from .markdown import markdown_to_html
34
- from .mermaid import MermaidConfigProperties
36
+ from .mermaid.extension import MermaidExtension
35
37
  from .metadata import ConfluenceSiteMetadata
36
- from .scanner import MermaidScanner, ScannedDocument, Scanner
38
+ from .options import ConfluencePageID, ConverterOptions, DocumentOptions
39
+ from .plantuml.extension import PlantUMLExtension
40
+ from .png import extract_png_dimensions, remove_png_chunks
41
+ from .scanner import ScannedDocument, Scanner
37
42
  from .serializer import JsonType
38
43
  from .toc import TableOfContentsBuilder
39
44
  from .uri import is_absolute_url, to_uuid_urn
@@ -42,6 +47,28 @@ from .xml import element_to_text
42
47
  ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
43
48
 
44
49
 
50
+ def apply_generated_by_template(template: str, path: Path) -> str:
51
+ """Apply template substitution to the generated_by string.
52
+
53
+ Supported placeholders:
54
+ - %{filepath}: Full path to the file (relative to the source directory)
55
+ - %{filename}: Just the filename
56
+ - %{filedir}: Dirname of the full path to the file (relative to the source directory)
57
+ - %{filestem}: Just the filename without the extension
58
+
59
+ :param template: The template string with placeholders
60
+ :param path: The path to the file being converted
61
+ :returns: The template string with placeholders replaced
62
+ """
63
+
64
+ return (
65
+ template.replace("%{filepath}", path.as_posix())
66
+ .replace("%{filename}", path.name)
67
+ .replace("%{filedir}", path.parent.as_posix())
68
+ .replace("%{filestem}", path.stem)
69
+ )
70
+
71
+
45
72
  def get_volatile_attributes() -> list[str]:
46
73
  "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
47
74
 
@@ -81,6 +108,12 @@ def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
81
108
  return absolute_path.as_posix().startswith(base_path.as_posix())
82
109
 
83
110
 
111
+ def fix_absolute_path(path: Path, root_path: Path) -> Path:
112
+ "Make absolute path relative to another root path."
113
+
114
+ return root_path / path.relative_to(path.root)
115
+
116
+
84
117
  def encode_title(text: str) -> str:
85
118
  "Converts a title string such that it is safe to embed into a Confluence URL."
86
119
 
@@ -95,6 +128,7 @@ def encode_title(text: str) -> str:
95
128
 
96
129
 
97
130
  # supported code block languages, for which syntax highlighting is available
131
+ # spellchecker: disable
98
132
  _LANGUAGES = {
99
133
  "abap": "abap",
100
134
  "actionscript3": "actionscript3",
@@ -137,7 +171,6 @@ _LANGUAGES = {
137
171
  "kotlin": "kotlin",
138
172
  "livescript": "livescript",
139
173
  "lua": "lua",
140
- "mermaid": "mermaid",
141
174
  "mathematica": "mathematica",
142
175
  "matlab": "matlab",
143
176
  "objectivec": "objectivec",
@@ -179,6 +212,7 @@ _LANGUAGES = {
179
212
  "xquery": "xquery",
180
213
  "yaml": "yaml",
181
214
  }
215
+ # spellchecker: enable
182
216
 
183
217
 
184
218
  class NodeVisitor(ABC):
@@ -241,148 +275,6 @@ def is_placeholder_for(node: ElementType, name: str) -> bool:
241
275
  return True
242
276
 
243
277
 
244
- @enum.unique
245
- class FormattingContext(enum.Enum):
246
- "Identifies the formatting context for the element."
247
-
248
- BLOCK = "block"
249
- INLINE = "inline"
250
-
251
-
252
- @enum.unique
253
- class ImageAlignment(enum.Enum):
254
- "Determines whether to align block-level images to center, left or right."
255
-
256
- CENTER = "center"
257
- LEFT = "left"
258
- RIGHT = "right"
259
-
260
-
261
- @dataclass
262
- class ImageAttributes:
263
- """
264
- Attributes applied to an `<img>` element.
265
-
266
- :param context: Identifies the formatting context for the element (block or inline).
267
- :param width: Natural image width in pixels.
268
- :param height: Natural image height in pixels.
269
- :param alt: Alternate text.
270
- :param title: Title text (a.k.a. image tooltip).
271
- :param caption: Caption text (shown below figure).
272
- :param alignment: Alignment for block-level images.
273
- """
274
-
275
- context: FormattingContext
276
- width: int | None
277
- height: int | None
278
- alt: str | None
279
- title: str | None
280
- caption: str | None
281
- alignment: ImageAlignment = ImageAlignment.CENTER
282
-
283
- def __post_init__(self) -> None:
284
- if self.caption is None and self.context is FormattingContext.BLOCK:
285
- self.caption = self.title or self.alt
286
-
287
- def as_dict(self) -> dict[str, str]:
288
- attributes: dict[str, str] = {}
289
- if self.context is FormattingContext.BLOCK:
290
- if self.alignment is ImageAlignment.LEFT:
291
- attributes[AC_ATTR("align")] = "left"
292
- attributes[AC_ATTR("layout")] = "align-start"
293
- elif self.alignment is ImageAlignment.RIGHT:
294
- attributes[AC_ATTR("align")] = "right"
295
- attributes[AC_ATTR("layout")] = "align-end"
296
- else:
297
- attributes[AC_ATTR("align")] = "center"
298
- attributes[AC_ATTR("layout")] = "center"
299
-
300
- if self.width is not None:
301
- attributes[AC_ATTR("original-width")] = str(self.width)
302
- if self.height is not None:
303
- attributes[AC_ATTR("original-height")] = str(self.height)
304
- if self.width is not None:
305
- attributes[AC_ATTR("custom-width")] = "true"
306
- attributes[AC_ATTR("width")] = str(self.width)
307
-
308
- elif self.context is FormattingContext.INLINE:
309
- if self.width is not None:
310
- attributes[AC_ATTR("width")] = str(self.width)
311
- if self.height is not None:
312
- attributes[AC_ATTR("height")] = str(self.height)
313
- else:
314
- raise NotImplementedError("match not exhaustive for enumeration")
315
-
316
- if self.alt is not None:
317
- attributes.update({AC_ATTR("alt"): self.alt})
318
- if self.title is not None:
319
- attributes.update({AC_ATTR("title"): self.title})
320
- return attributes
321
-
322
- EMPTY_BLOCK: ClassVar["ImageAttributes"]
323
- EMPTY_INLINE: ClassVar["ImageAttributes"]
324
-
325
- @classmethod
326
- def empty(cls, context: FormattingContext) -> "ImageAttributes":
327
- if context is FormattingContext.BLOCK:
328
- return cls.EMPTY_BLOCK
329
- elif context is FormattingContext.INLINE:
330
- return cls.EMPTY_INLINE
331
- else:
332
- raise NotImplementedError("match not exhaustive for enumeration")
333
-
334
-
335
- ImageAttributes.EMPTY_BLOCK = ImageAttributes(
336
- FormattingContext.BLOCK, width=None, height=None, alt=None, title=None, caption=None, alignment=ImageAlignment.CENTER
337
- )
338
- ImageAttributes.EMPTY_INLINE = ImageAttributes(
339
- FormattingContext.INLINE, width=None, height=None, alt=None, title=None, caption=None, alignment=ImageAlignment.CENTER
340
- )
341
-
342
-
343
- @dataclass
344
- class ConfluenceConverterOptions:
345
- """
346
- Options for converting an HTML tree into Confluence storage format.
347
-
348
- :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
349
- plain text; when false, raise an exception.
350
- :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
351
- conversion rules for the identifier.
352
- :param prefer_raster: Whether to choose PNG files over SVG files when available.
353
- :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
354
- :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
355
- :param render_latex: Whether to pre-render LaTeX formulas into PNG/SVG images.
356
- :param diagram_output_format: Target image format for diagrams.
357
- :param webui_links: When true, convert relative URLs to Confluence Web UI links.
358
- :param alignment: Alignment for block-level images and formulas.
359
- :param use_panel: Whether to transform admonitions and alerts into a Confluence custom panel.
360
- """
361
-
362
- ignore_invalid_url: bool = False
363
- heading_anchors: bool = False
364
- prefer_raster: bool = True
365
- render_drawio: bool = False
366
- render_mermaid: bool = False
367
- render_latex: bool = False
368
- diagram_output_format: Literal["png", "svg"] = "png"
369
- webui_links: bool = False
370
- alignment: Literal["center", "left", "right"] = "center"
371
- use_panel: bool = False
372
-
373
-
374
- @dataclass
375
- class ImageData:
376
- path: Path
377
- description: str | None = None
378
-
379
-
380
- @dataclass
381
- class EmbeddedFileData:
382
- data: bytes
383
- description: str | None = None
384
-
385
-
386
278
  @dataclass
387
279
  class ConfluencePanel:
388
280
  emoji: str
@@ -423,20 +315,22 @@ ConfluencePanel.from_class = {
423
315
  class ConfluenceStorageFormatConverter(NodeVisitor):
424
316
  "Transforms a plain HTML tree into Confluence Storage Format."
425
317
 
426
- options: ConfluenceConverterOptions
318
+ options: ConverterOptions
427
319
  path: Path
428
320
  base_dir: Path
429
321
  root_dir: Path
430
322
  toc: TableOfContentsBuilder
431
323
  links: list[str]
432
- images: list[ImageData]
433
- embedded_files: dict[str, EmbeddedFileData]
324
+ attachments: AttachmentCatalog
434
325
  site_metadata: ConfluenceSiteMetadata
435
326
  page_metadata: ConfluencePageCollection
436
327
 
328
+ image_generator: ImageGenerator
329
+ extensions: list[MarketplaceExtension]
330
+
437
331
  def __init__(
438
332
  self,
439
- options: ConfluenceConverterOptions,
333
+ options: ConverterOptions,
440
334
  path: Path,
441
335
  root_dir: Path,
442
336
  site_metadata: ConfluenceSiteMetadata,
@@ -453,11 +347,22 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
453
347
  self.root_dir = root_dir
454
348
  self.toc = TableOfContentsBuilder()
455
349
  self.links = []
456
- self.images = []
457
- self.embedded_files = {}
350
+ self.attachments = AttachmentCatalog()
458
351
  self.site_metadata = site_metadata
459
352
  self.page_metadata = page_metadata
460
353
 
354
+ self.image_generator = ImageGenerator(
355
+ self.base_dir,
356
+ self.attachments,
357
+ ImageGeneratorOptions(self.options.diagram_output_format, self.options.prefer_raster, self.options.layout.image.max_width),
358
+ )
359
+
360
+ self.extensions = [
361
+ DrawioExtension(self.image_generator, ExtensionOptions(render=self.options.render_drawio)),
362
+ MermaidExtension(self.image_generator, ExtensionOptions(render=self.options.render_mermaid)),
363
+ PlantUMLExtension(self.image_generator, ExtensionOptions(render=self.options.render_plantuml)),
364
+ ]
365
+
461
366
  def _transform_heading(self, heading: ElementType) -> None:
462
367
  """
463
368
  Adds anchors to headings in the same document (if *heading anchors* is enabled).
@@ -545,9 +450,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
545
450
 
546
451
  # discard original value: relative links always require transformation
547
452
  anchor.attrib.pop("href")
548
-
549
- # convert the relative URL to absolute path based on the base path value
550
- absolute_path = (self.base_dir / relative_url.path).resolve()
453
+ if relative_url.path.startswith("/"):
454
+ absolute_path = fix_absolute_path(path=Path(relative_url.path), root_path=self.root_dir).resolve()
455
+ else:
456
+ # convert the relative URL to absolute path based on the base path value
457
+ absolute_path = (self.base_dir / relative_url.path).resolve()
551
458
 
552
459
  # look up the absolute path in the page metadata dictionary to discover the relative path within Confluence that should be used
553
460
  if not is_directory_within(absolute_path, self.root_dir):
@@ -606,7 +513,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
606
513
  return None
607
514
 
608
515
  file_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
609
- self.images.append(ImageData(absolute_path))
516
+ self.attachments.add_image(ImageData(absolute_path))
610
517
 
611
518
  link_body = AC_ELEM("link-body", {}, *list(anchor))
612
519
  link_body.text = anchor.text
@@ -668,7 +575,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
668
575
  pixel_width = int(width) if width is not None and width.isdecimal() else None
669
576
  pixel_height = int(height) if height is not None and height.isdecimal() else None
670
577
  attrs = ImageAttributes(
671
- context, width=pixel_width, height=pixel_height, alt=alt, title=title, caption=None, alignment=ImageAlignment(self.options.alignment)
578
+ context,
579
+ width=pixel_width,
580
+ height=pixel_height,
581
+ alt=alt,
582
+ title=title,
583
+ caption=None,
584
+ alignment=ImageAlignment(self.options.layout.get_image_alignment()),
672
585
  )
673
586
 
674
587
  if is_absolute_url(src):
@@ -680,14 +593,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
680
593
  if absolute_path is None:
681
594
  return self._create_missing(path, attrs)
682
595
 
683
- if absolute_path.name.endswith(".drawio.png") or absolute_path.name.endswith(".drawio.svg"):
684
- return self._transform_drawio_image(absolute_path, attrs)
685
- elif absolute_path.name.endswith(".drawio.xml") or absolute_path.name.endswith(".drawio"):
686
- return self._transform_drawio(absolute_path, attrs)
687
- elif absolute_path.name.endswith(".mmd") or absolute_path.name.endswith(".mermaid"):
688
- return self._transform_external_mermaid(absolute_path, attrs)
689
- else:
690
- return self._transform_attached_image(absolute_path, attrs)
596
+ for extension in self.extensions:
597
+ if extension.matches_image(absolute_path):
598
+ return extension.transform_image(absolute_path, attrs)
599
+
600
+ return self.image_generator.transform_attached_image(absolute_path, attrs)
691
601
 
692
602
  def _transform_external_image(self, url: str, attrs: ImageAttributes) -> ElementType:
693
603
  "Emits Confluence Storage Format XHTML for an external image."
@@ -703,7 +613,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
703
613
  if attrs.caption:
704
614
  elements.append(AC_ELEM("caption", attrs.caption))
705
615
 
706
- return AC_ELEM("image", attrs.as_dict(), *elements)
616
+ return AC_ELEM("image", attrs.as_dict(max_width=self.options.layout.image.max_width), *elements)
707
617
 
708
618
  def _warn_or_raise(self, msg: str) -> None:
709
619
  "Emit a warning or raise an exception when a path points to a resource that doesn't exist or is outside of the permitted hierarchy."
@@ -716,8 +626,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
716
626
  def _verify_image_path(self, path: Path) -> Path | None:
717
627
  "Checks whether an image path is safe to use."
718
628
 
719
- # resolve relative path into absolute path w.r.t. base dir
720
- absolute_path = (self.base_dir / path).resolve()
629
+ if path.is_absolute():
630
+ absolute_path = fix_absolute_path(path=path, root_path=self.root_dir).resolve()
631
+ else:
632
+ # resolve relative path into absolute path w.r.t. base dir
633
+ absolute_path = (self.base_dir / path).resolve()
721
634
 
722
635
  if not absolute_path.exists():
723
636
  self._warn_or_raise(f"path to image {path} does not exist")
@@ -729,117 +642,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
729
642
 
730
643
  return absolute_path
731
644
 
732
- def _transform_attached_image(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
733
- "Emits Confluence Storage Format XHTML for an attached raster or vector image."
734
-
735
- if self.options.prefer_raster and absolute_path.suffix == ".svg":
736
- # prefer PNG over SVG; Confluence displays SVG in wrong size, and text labels are truncated
737
- png_file = absolute_path.with_suffix(".png")
738
- if png_file.exists():
739
- absolute_path = png_file
740
-
741
- self.images.append(ImageData(absolute_path, attrs.alt))
742
- image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
743
- return self._create_attached_image(image_name, attrs)
744
-
745
- def _transform_drawio(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
746
- "Emits Confluence Storage Format XHTML for a draw.io diagram."
747
-
748
- if not absolute_path.name.endswith(".drawio.xml") and not absolute_path.name.endswith(".drawio"):
749
- raise DocumentError("invalid image format; expected: `*.drawio.xml` or `*.drawio`")
750
-
751
- relative_path = path_relative_to(absolute_path, self.base_dir)
752
- if self.options.render_drawio:
753
- image_data = drawio.render_diagram(absolute_path, self.options.diagram_output_format)
754
- image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
755
- self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
756
- return self._create_attached_image(image_filename, attrs)
757
- else:
758
- self.images.append(ImageData(absolute_path, attrs.alt))
759
- image_filename = attachment_name(relative_path)
760
- return self._create_drawio(image_filename, attrs)
761
-
762
- def _transform_drawio_image(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
763
- "Emits Confluence Storage Format XHTML for a draw.io diagram embedded in a PNG or SVG image."
764
-
765
- if not absolute_path.name.endswith(".drawio.png") and not absolute_path.name.endswith(".drawio.svg"):
766
- raise DocumentError("invalid image format; expected: `*.drawio.png` or `*.drawio.svg`")
767
-
768
- if self.options.render_drawio:
769
- return self._transform_attached_image(absolute_path, attrs)
770
- else:
771
- # extract embedded editable diagram and upload as *.drawio
772
- image_data = drawio.extract_diagram(absolute_path)
773
- image_filename = attachment_name(path_relative_to(absolute_path.with_suffix(".xml"), self.base_dir))
774
- self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
775
-
776
- return self._create_drawio(image_filename, attrs)
777
-
778
- def _create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ElementType:
779
- "An image embedded into the page, linking to an attachment."
780
-
781
- elements: list[ElementType] = []
782
- elements.append(
783
- RI_ELEM(
784
- "attachment",
785
- # refers to an attachment uploaded alongside the page
786
- {RI_ATTR("filename"): image_name},
787
- )
788
- )
789
- if attrs.caption:
790
- elements.append(AC_ELEM("caption", attrs.caption))
791
-
792
- return AC_ELEM("image", attrs.as_dict(), *elements)
793
-
794
- def _create_drawio(self, filename: str, attrs: ImageAttributes) -> ElementType:
795
- "A draw.io diagram embedded into the page, linking to an attachment."
796
-
797
- parameters: list[ElementType] = [
798
- AC_ELEM(
799
- "parameter",
800
- {AC_ATTR("name"): "diagramName"},
801
- filename,
802
- ),
803
- ]
804
- if attrs.width is not None:
805
- parameters.append(
806
- AC_ELEM(
807
- "parameter",
808
- {AC_ATTR("name"): "width"},
809
- str(attrs.width),
810
- ),
811
- )
812
- if attrs.height is not None:
813
- parameters.append(
814
- AC_ELEM(
815
- "parameter",
816
- {AC_ATTR("name"): "height"},
817
- str(attrs.height),
818
- ),
819
- )
820
- if attrs.alignment is ImageAlignment.CENTER:
821
- parameters.append(
822
- AC_ELEM(
823
- "parameter",
824
- {AC_ATTR("name"): "pCenter"},
825
- str(1),
826
- ),
827
- )
828
-
829
- local_id = str(uuid.uuid4())
830
- macro_id = str(uuid.uuid4())
831
- return AC_ELEM(
832
- "structured-macro",
833
- {
834
- AC_ATTR("name"): "drawio",
835
- AC_ATTR("schema-version"): "1",
836
- "data-layout": "default",
837
- AC_ATTR("local-id"): local_id,
838
- AC_ATTR("macro-id"): macro_id,
839
- },
840
- *parameters,
841
- )
842
-
843
645
  def _create_missing(self, path: Path, attrs: ImageAttributes) -> ElementType:
844
646
  "A warning panel for a missing image."
845
647
 
@@ -871,6 +673,9 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
871
673
  def _transform_code_block(self, code: ElementType) -> ElementType:
872
674
  "Transforms a code block."
873
675
 
676
+ content: str = code.text or ""
677
+ content = content.rstrip()
678
+
874
679
  if language_class := code.get("class"):
875
680
  if m := re.match("^language-(.*)$", language_class):
876
681
  language_name = m.group(1)
@@ -881,16 +686,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
881
686
 
882
687
  # translate name to standard name for (programming) language
883
688
  if language_name is not None:
689
+ for extension in self.extensions:
690
+ if extension.matches_fenced(language_name, content):
691
+ return extension.transform_fenced(content)
692
+
884
693
  language_id = _LANGUAGES.get(language_name)
885
694
  else:
886
695
  language_id = None
887
696
 
888
- content: str = code.text or ""
889
- content = content.rstrip()
890
-
891
- if language_id == "mermaid":
892
- return self._transform_fenced_mermaid(content)
893
-
894
697
  return AC_ELEM(
895
698
  "structured-macro",
896
699
  {
@@ -905,84 +708,6 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
905
708
  AC_ELEM("plain-text-body", ET.CDATA(content)),
906
709
  )
907
710
 
908
- def _extract_mermaid_config(self, content: str) -> MermaidConfigProperties | None:
909
- """Extract scale from Mermaid YAML front matter configuration."""
910
- try:
911
- properties = MermaidScanner().read(content)
912
- return properties.config
913
- except BaseValidationError as ex:
914
- LOGGER.warning("Failed to extract Mermaid properties: %s", ex)
915
- return None
916
-
917
- def _transform_external_mermaid(self, absolute_path: Path, attrs: ImageAttributes) -> ElementType:
918
- "Emits Confluence Storage Format XHTML for a Mermaid diagram read from an external file."
919
-
920
- if not absolute_path.name.endswith(".mmd") and not absolute_path.name.endswith(".mermaid"):
921
- raise DocumentError("invalid image format; expected: `*.mmd` or `*.mermaid`")
922
-
923
- relative_path = path_relative_to(absolute_path, self.base_dir)
924
- if self.options.render_mermaid:
925
- with open(absolute_path, "r", encoding="utf-8") as f:
926
- content = f.read()
927
- config = self._extract_mermaid_config(content)
928
- image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
929
- image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
930
- self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
931
- return self._create_attached_image(image_filename, attrs)
932
- else:
933
- self.images.append(ImageData(absolute_path, attrs.alt))
934
- mermaid_filename = attachment_name(relative_path)
935
- return self._create_mermaid_embed(mermaid_filename)
936
-
937
- def _transform_fenced_mermaid(self, content: str) -> ElementType:
938
- "Emits Confluence Storage Format XHTML for a Mermaid diagram defined in a fenced code block."
939
-
940
- if self.options.render_mermaid:
941
- config = self._extract_mermaid_config(content)
942
- image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
943
- image_hash = hashlib.md5(image_data).hexdigest()
944
- image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
945
- self.embedded_files[image_filename] = EmbeddedFileData(image_data)
946
- return self._create_attached_image(image_filename, ImageAttributes.EMPTY_BLOCK)
947
- else:
948
- mermaid_data = content.encode("utf-8")
949
- mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
950
- mermaid_filename = attachment_name(f"embedded_{mermaid_hash}.mmd")
951
- self.embedded_files[mermaid_filename] = EmbeddedFileData(mermaid_data)
952
- return self._create_mermaid_embed(mermaid_filename)
953
-
954
- def _create_mermaid_embed(self, filename: str) -> ElementType:
955
- "A Mermaid diagram, linking to an attachment that captures the Mermaid source."
956
-
957
- local_id = str(uuid.uuid4())
958
- macro_id = str(uuid.uuid4())
959
- return AC_ELEM(
960
- "structured-macro",
961
- {
962
- AC_ATTR("name"): "mermaid-cloud",
963
- AC_ATTR("schema-version"): "1",
964
- "data-layout": "default",
965
- AC_ATTR("local-id"): local_id,
966
- AC_ATTR("macro-id"): macro_id,
967
- },
968
- AC_ELEM(
969
- "parameter",
970
- {AC_ATTR("name"): "filename"},
971
- filename,
972
- ),
973
- AC_ELEM(
974
- "parameter",
975
- {AC_ATTR("name"): "toolbar"},
976
- "bottom",
977
- ),
978
- AC_ELEM(
979
- "parameter",
980
- {AC_ATTR("name"): "zoom"},
981
- "fit",
982
- ),
983
- AC_ELEM("parameter", {AC_ATTR("name"): "revision"}, "1"),
984
- )
985
-
986
711
  def _transform_toc(self, code: ElementType) -> ElementType:
987
712
  "Creates a table of contents, constructed from headings in the document."
988
713
 
@@ -1299,16 +1024,24 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1299
1024
 
1300
1025
  image_data = render_latex(content, format=self.options.diagram_output_format)
1301
1026
  if self.options.diagram_output_format == "png":
1302
- width, height = get_png_dimensions(data=image_data)
1027
+ width, height = extract_png_dimensions(data=image_data)
1303
1028
  image_data = remove_png_chunks(["pHYs"], source_data=image_data)
1304
- attrs = ImageAttributes(context, width=width, height=height, alt=content, title=None, caption="", alignment=ImageAlignment(self.options.alignment))
1029
+ attrs = ImageAttributes(
1030
+ context,
1031
+ width=width,
1032
+ height=height,
1033
+ alt=content,
1034
+ title=None,
1035
+ caption="",
1036
+ alignment=ImageAlignment(self.options.layout.get_image_alignment()),
1037
+ )
1305
1038
  else:
1306
1039
  attrs = ImageAttributes.empty(context)
1307
1040
 
1308
1041
  image_hash = hashlib.md5(image_data).hexdigest()
1309
1042
  image_filename = attachment_name(f"formula_{image_hash}.{self.options.diagram_output_format}")
1310
- self.embedded_files[image_filename] = EmbeddedFileData(image_data, content)
1311
- image = self._create_attached_image(image_filename, attrs)
1043
+ self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, content))
1044
+ image = self.image_generator.create_attached_image(image_filename, attrs)
1312
1045
  return image
1313
1046
 
1314
1047
  def _transform_inline_math(self, elem: ElementType) -> ElementType:
@@ -1342,7 +1075,7 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1342
1075
  {AC_ATTR("name"): "body"},
1343
1076
  content,
1344
1077
  ),
1345
- AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.alignment),
1078
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.layout.get_image_alignment()),
1346
1079
  )
1347
1080
  return macro
1348
1081
 
@@ -1379,15 +1112,23 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1379
1112
  {AC_ATTR("name"): "body"},
1380
1113
  content,
1381
1114
  ),
1382
- AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.alignment),
1115
+ AC_ELEM("parameter", {AC_ATTR("name"): "align"}, self.options.layout.get_image_alignment()),
1383
1116
  )
1384
1117
 
1385
1118
  def _transform_footnote_ref(self, elem: ElementType) -> None:
1386
1119
  """
1387
1120
  Transforms a footnote reference.
1388
1121
 
1122
+ When a footnote is referenced multiple times, Python-Markdown generates
1123
+ different `id` attributes for each reference:
1124
+ - First reference: `fnref:NAME`
1125
+ - Second reference: `fnref2:NAME`
1126
+ - Third reference: `fnref3:NAME`
1127
+ - etc.
1128
+
1389
1129
  ```
1390
1130
  <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
1131
+ <sup id="fnref2:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
1391
1132
  ```
1392
1133
  """
1393
1134
 
@@ -1395,9 +1136,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1395
1136
  raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
1396
1137
 
1397
1138
  ref_id = elem.attrib.pop("id", "")
1398
- if not ref_id.startswith("fnref:"):
1399
- raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1400
- footnote_ref = ref_id.removeprefix("fnref:")
1139
+ # Match fnref:NAME, fnref2:NAME, fnref3:NAME, etc.
1140
+ match = re.match(r"^fnref(\d*):(.+)$", ref_id)
1141
+ if match is None:
1142
+ raise DocumentError("expected: attribute `id` of format `fnref:NAME` or `fnrefN:NAME` applied on `<sup>` for a footnote reference")
1143
+ numeric_suffix = match.group(1)
1144
+ footnote_name = match.group(2)
1145
+ # Build anchor name: first reference uses NAME, subsequent references use NAME-N
1146
+ footnote_ref = f"{footnote_name}-{numeric_suffix}" if numeric_suffix else footnote_name
1401
1147
 
1402
1148
  link = next((elem.iterchildren(tag="a")), None)
1403
1149
  if link is None:
@@ -1443,6 +1189,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1443
1189
  """
1444
1190
  Transforms the footnote definition block.
1445
1191
 
1192
+ When a footnote is referenced multiple times, Python-Markdown generates
1193
+ multiple back-reference links in the footnote definition:
1194
+ - First reference: `#fnref:NAME`
1195
+ - Second reference: `#fnref2:NAME`
1196
+ - Third reference: `#fnref3:NAME`
1197
+ - etc.
1198
+
1446
1199
  ```
1447
1200
  <div class="footnote">
1448
1201
  <hr/>
@@ -1453,6 +1206,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1453
1206
  </ol>
1454
1207
  </div>
1455
1208
  ```
1209
+
1210
+ With multiple references to the same footnote:
1211
+ ```
1212
+ <li id="fn:NAME">
1213
+ <p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a><a class="footnote-backref" href="#fnref2:NAME">↩</a></p>
1214
+ </li>
1215
+ ```
1456
1216
  """
1457
1217
 
1458
1218
  ordered_list = next((elem.iterchildren(tag="ol")), None)
@@ -1468,21 +1228,33 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1468
1228
  raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1469
1229
  footnote_def = def_id.removeprefix("fn:")
1470
1230
 
1471
- paragraph = next((list_item.iterchildren(tag="p")), None)
1472
- if paragraph is None:
1231
+ # find the last paragraph, which is where the backref links are placed
1232
+ paragraphs = list(list_item.iterchildren(tag="p"))
1233
+ if not paragraphs:
1473
1234
  raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
1235
+ last_paragraph = paragraphs[-1]
1236
+
1237
+ # collect all backref anchors (there may be multiple when a footnote is referenced multiple times)
1238
+ # pattern matches #fnref:NAME, #fnref2:NAME, #fnref3:NAME, etc.
1239
+ # store tuples of (anchor_element, number, footnote_name)
1240
+ backref_info: list[tuple[ElementType, int | None, str]] = []
1241
+ for anchor in list(last_paragraph.iterchildren(tag="a")):
1242
+ href = anchor.get("href", "")
1243
+ match = re.match(r"^#fnref(\d*):(.+)$", href)
1244
+ if match is not None:
1245
+ backref_info.append((anchor, int(match.group(1), base=10) if match.group(1) else None, match.group(2)))
1246
+
1247
+ if not backref_info:
1248
+ raise DocumentError(
1249
+ "expected: at least one `<a>` element with `href` attribute of format `#fnref:NAME` or `#fnrefN:NAME` in a footnote definition"
1250
+ )
1474
1251
 
1475
- ref_anchor = next((paragraph.iterchildren(tag="a", reversed=True)), None)
1476
- if ref_anchor is None:
1477
- raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1478
-
1479
- ref_href = ref_anchor.get("href", "")
1480
- if not ref_href.startswith("#fnref:"):
1481
- raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1482
- footnote_ref = ref_href.removeprefix("#fnref:")
1252
+ # remove all back-links generated by Python-Markdown
1253
+ for anchor, _, _ in backref_info:
1254
+ last_paragraph.remove(anchor)
1483
1255
 
1484
- # remove back-link generated by Python-Markdown
1485
- paragraph.remove(ref_anchor)
1256
+ # use the first paragraph for the anchor placement
1257
+ first_paragraph = paragraphs[0]
1486
1258
 
1487
1259
  # build new anchor for footnote definition
1488
1260
  def_anchor = AC_ELEM(
@@ -1498,20 +1270,40 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1498
1270
  ),
1499
1271
  )
1500
1272
 
1501
- # build new link to footnote reference in page body
1502
- ref_link = AC_ELEM(
1503
- "link",
1504
- {
1505
- AC_ATTR("anchor"): f"footnote-ref-{footnote_ref}",
1506
- },
1507
- AC_ELEM("link-body", ET.CDATA("↩")),
1508
- )
1273
+ # build back-links to each footnote reference in page body:
1274
+ # * for single reference: ↩
1275
+ # * for multiple references: ↩¹ ↩² ↩³ ...
1276
+ for _, number, footnote_name in backref_info:
1277
+ # build anchor name matching the reference anchor:
1278
+ # * first reference: footnote-ref-NAME
1279
+ # * subsequent references: footnote-ref-NAME-N
1280
+ if number is None:
1281
+ anchor_name = f"footnote-ref-{footnote_name}"
1282
+ if len(backref_info) > 1:
1283
+ link_text = "↩¹"
1284
+ else:
1285
+ link_text = "↩"
1286
+ else:
1287
+ anchor_name = f"footnote-ref-{footnote_name}-{number}"
1288
+
1289
+ # use superscript numbers for references
1290
+ superscript_digits = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
1291
+ link_text = f"↩{str(number).translate(superscript_digits)}"
1292
+
1293
+ ref_link = AC_ELEM(
1294
+ "link",
1295
+ {
1296
+ AC_ATTR("anchor"): anchor_name,
1297
+ },
1298
+ AC_ELEM("link-body", ET.CDATA(link_text)),
1299
+ )
1300
+
1301
+ last_paragraph.append(ref_link)
1509
1302
 
1510
- # append children synthesized for Confluence
1511
- paragraph.insert(0, def_anchor)
1512
- def_anchor.tail = paragraph.text
1513
- paragraph.text = None
1514
- paragraph.append(ref_link)
1303
+ # append anchor to first paragraph
1304
+ first_paragraph.insert(0, def_anchor)
1305
+ def_anchor.tail = first_paragraph.text
1306
+ first_paragraph.text = None
1515
1307
 
1516
1308
  def _transform_tasklist(self, elem: ElementType) -> ElementType:
1517
1309
  """
@@ -1572,161 +1364,174 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1572
1364
  if not isinstance(child.tag, str):
1573
1365
  return None
1574
1366
 
1575
- # <p>...</p>
1576
- if child.tag == "p":
1577
- # <p><img src="..." /></p>
1578
- if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
1579
- return self._transform_image(FormattingContext.BLOCK, child[0])
1580
-
1581
- # <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
1582
- elif is_placeholder_for(child, "TOC"):
1583
- return self._transform_toc(child)
1584
-
1585
- # <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
1586
- elif is_placeholder_for(child, "LISTING"):
1587
- return self._transform_listing(child)
1588
-
1589
- # <div>...</div>
1590
- elif child.tag == "div":
1591
- classes = child.get("class", "").split(" ")
1592
-
1593
- # <div class="arithmatex">...</div>
1594
- if "arithmatex" in classes:
1595
- return self._transform_block_math(child)
1596
-
1597
- # <div><ac:structured-macro ...>...</ac:structured-macro></div>
1598
- elif "csf" in classes:
1599
- if len(child) != 1:
1600
- raise DocumentError("expected: single child in Confluence Storage Format block")
1601
-
1602
- return child[0]
1603
-
1604
- # <div class="footnote">
1605
- # <hr/>
1606
- # <ol>
1607
- # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1608
- # </ol>
1609
- # </div>
1610
- elif "footnote" in classes:
1611
- self._transform_footnote_def(child)
1612
- return None
1613
-
1614
- # <div class="admonition note">
1615
- # <p class="admonition-title">Note</p>
1616
- # <p>...</p>
1617
- # </div>
1618
- #
1619
- # --- OR ---
1620
- #
1621
- # <div class="admonition note">
1367
+ match child.tag:
1622
1368
  # <p>...</p>
1623
- # </div>
1624
- elif "admonition" in classes:
1625
- return self._transform_admonition(child)
1626
-
1627
- # <blockquote>...</blockquote>
1628
- elif child.tag == "blockquote":
1629
- # Alerts in GitHub
1630
- # <blockquote>
1631
- # <p>[!TIP] ...</p>
1632
- # </blockquote>
1633
- if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1634
- return self._transform_github_alert(child)
1635
-
1636
- # Alerts in GitLab
1637
- # <blockquote>
1638
- # <p>DISCLAIMER: ...</p>
1639
- # </blockquote>
1640
- elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
1641
- return self._transform_gitlab_alert(child)
1642
-
1643
- # <details markdown="1">
1644
- # <summary>...</summary>
1645
- # ...
1646
- # </details>
1647
- elif child.tag == "details" and len(child) > 1 and child[0].tag == "summary":
1648
- return self._transform_collapsed(child)
1649
-
1650
- # <ol>...</ol>
1651
- elif child.tag == "ol":
1652
- # Confluence adds the attribute `start` for every ordered list
1653
- child.set("start", "1")
1654
- return None
1369
+ case "p":
1370
+ # <p><img src="..." /></p>
1371
+ if len(child) == 1 and not child.text and child[0].tag == "img" and not child[0].tail:
1372
+ return self._transform_image(FormattingContext.BLOCK, child[0])
1373
+
1374
+ # <p>[[<em>TOC</em>]]</p> (represented in Markdown as `[[_TOC_]]`)
1375
+ elif is_placeholder_for(child, "TOC"):
1376
+ return self._transform_toc(child)
1377
+
1378
+ # <p>[[<em>LISTING</em>]]</p> (represented in Markdown as `[[_LISTING_]]`)
1379
+ elif is_placeholder_for(child, "LISTING"):
1380
+ return self._transform_listing(child)
1381
+
1382
+ # <div>...</div>
1383
+ case "div":
1384
+ classes = child.get("class", "").split(" ")
1385
+
1386
+ # <div class="arithmatex">...</div>
1387
+ if "arithmatex" in classes:
1388
+ return self._transform_block_math(child)
1389
+
1390
+ # <div><ac:structured-macro ...>...</ac:structured-macro></div>
1391
+ elif "csf" in classes:
1392
+ if len(child) != 1:
1393
+ raise DocumentError("expected: single child in Confluence Storage Format block")
1394
+
1395
+ return child[0]
1396
+
1397
+ # <div class="footnote">
1398
+ # <hr/>
1399
+ # <ol>
1400
+ # <li id="fn:NAME"><p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a></p></li>
1401
+ # </ol>
1402
+ # </div>
1403
+ elif "footnote" in classes:
1404
+ self._transform_footnote_def(child)
1405
+ return None
1406
+
1407
+ # <div class="admonition note">
1408
+ # <p class="admonition-title">Note</p>
1409
+ # <p>...</p>
1410
+ # </div>
1411
+ #
1412
+ # --- OR ---
1413
+ #
1414
+ # <div class="admonition note">
1415
+ # <p>...</p>
1416
+ # </div>
1417
+ elif "admonition" in classes:
1418
+ return self._transform_admonition(child)
1419
+
1420
+ # <blockquote>...</blockquote>
1421
+ case "blockquote":
1422
+ # Alerts in GitHub
1423
+ # <blockquote>
1424
+ # <p>[!TIP] ...</p>
1425
+ # </blockquote>
1426
+ if len(child) > 0 and child[0].tag == "p" and child[0].text is not None and child[0].text.startswith("[!"):
1427
+ return self._transform_github_alert(child)
1428
+
1429
+ # Alerts in GitLab
1430
+ # <blockquote>
1431
+ # <p>DISCLAIMER: ...</p>
1432
+ # </blockquote>
1433
+ elif len(child) > 0 and child[0].tag == "p" and element_text_starts_with_any(child[0], ["FLAG:", "NOTE:", "WARNING:", "DISCLAIMER:"]):
1434
+ return self._transform_gitlab_alert(child)
1435
+
1436
+ # <details markdown="1">
1437
+ # <summary>...</summary>
1438
+ # ...
1439
+ # </details>
1440
+ case "details" if len(child) > 1 and child[0].tag == "summary":
1441
+ return self._transform_collapsed(child)
1655
1442
 
1656
- # <ul>
1657
- # <li>[ ] ...</li>
1658
- # <li>[x] ...</li>
1659
- # </ul>
1660
- elif child.tag == "ul":
1661
- if len(child) > 0 and all(element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]) for item in child):
1662
- return self._transform_tasklist(child)
1443
+ # <ol>...</ol>
1444
+ case "ol":
1445
+ # Confluence adds the attribute `start` for every ordered list
1446
+ child.set("start", "1")
1447
+ return None
1663
1448
 
1664
- return None
1449
+ # <ul>
1450
+ # <li>[ ] ...</li>
1451
+ # <li>[x] ...</li>
1452
+ # </ul>
1453
+ case "ul":
1454
+ if len(child) > 0 and all(element_text_starts_with_any(item, ["[ ]", "[x]", "[X]"]) for item in child):
1455
+ return self._transform_tasklist(child)
1665
1456
 
1666
- elif child.tag == "li":
1667
- normalize_inline(child)
1668
- return None
1457
+ return None
1669
1458
 
1670
- # <pre><code class="language-java"> ... </code></pre>
1671
- elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
1672
- return self._transform_code_block(child[0])
1459
+ case "li":
1460
+ normalize_inline(child)
1461
+ return None
1673
1462
 
1674
- # <table>...</table>
1675
- elif child.tag == "table":
1676
- for td in child.iterdescendants("td", "th"):
1677
- normalize_inline(td)
1678
- child.set("data-layout", "default")
1679
- return None
1463
+ # <pre><code class="language-java"> ... </code></pre>
1464
+ case "pre" if len(child) == 1 and child[0].tag == "code":
1465
+ return self._transform_code_block(child[0])
1466
+
1467
+ # <table>...</table>
1468
+ case "table":
1469
+ for td in child.iterdescendants("td", "th"):
1470
+ normalize_inline(td)
1471
+ match self.options.layout.alignment:
1472
+ case "left":
1473
+ layout = "align-start"
1474
+ case _:
1475
+ layout = "default"
1476
+ child.set("data-layout", layout)
1477
+ if self.options.layout.table.display_mode == "fixed":
1478
+ child.set("data-table-display-mode", "fixed")
1479
+ if self.options.layout.table.width:
1480
+ child.set("data-table-width", str(self.options.layout.table.width))
1680
1481
 
1681
- # <img src="..." alt="..." />
1682
- elif child.tag == "img":
1683
- return self._transform_image(FormattingContext.INLINE, child)
1482
+ return None
1684
1483
 
1685
- # <a href="..."> ... </a>
1686
- elif child.tag == "a":
1687
- return self._transform_link(child)
1484
+ # <img src="..." alt="..." />
1485
+ case "img":
1486
+ return self._transform_image(FormattingContext.INLINE, child)
1688
1487
 
1689
- # <mark>...</mark>
1690
- elif child.tag == "mark":
1691
- return self._transform_mark(child)
1488
+ # <a href="..."> ... </a>
1489
+ case "a":
1490
+ return self._transform_link(child)
1692
1491
 
1693
- # <span>...</span>
1694
- elif child.tag == "span":
1695
- classes = child.get("class", "").split(" ")
1492
+ # <mark>...</mark>
1493
+ case "mark":
1494
+ return self._transform_mark(child)
1696
1495
 
1697
- # <span class="arithmatex">...</span>
1698
- if "arithmatex" in classes:
1699
- return self._transform_inline_math(child)
1496
+ # <span>...</span>
1497
+ case "span":
1498
+ classes = child.get("class", "").split(" ")
1700
1499
 
1701
- # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1702
- elif child.tag == "sup" and child.get("id", "").startswith("fnref:"):
1703
- self._transform_footnote_ref(child)
1704
- return None
1500
+ # <span class="arithmatex">...</span>
1501
+ if "arithmatex" in classes:
1502
+ return self._transform_inline_math(child)
1705
1503
 
1706
- # <input type="date" value="1984-01-01" />
1707
- elif child.tag == "input" and child.get("type", "") == "date":
1708
- return HTML("time", {"datetime": child.get("value", "")})
1504
+ # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1505
+ # Multiple references: <sup id="fnref2:NAME">...</sup>, <sup id="fnref3:NAME">...</sup>
1506
+ case "sup" if re.match(r"^fnref\d*:", child.get("id", "")):
1507
+ self._transform_footnote_ref(child)
1508
+ return None
1709
1509
 
1710
- # <ins>...</ins>
1711
- elif child.tag == "ins":
1712
- # Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
1713
- child.tag = "u"
1510
+ # <input type="date" value="1984-01-01" />
1511
+ case "input" if child.get("type", "") == "date":
1512
+ return HTML("time", {"datetime": child.get("value", "")})
1714
1513
 
1715
- # <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
1716
- elif child.tag == "x-emoji":
1717
- return self._transform_emoji(child)
1514
+ # <ins>...</ins>
1515
+ case "ins":
1516
+ # Confluence prefers <u> over <ins> for underline, and replaces <ins> with <u>
1517
+ child.tag = "u"
1718
1518
 
1719
- # <h1>...</h1>
1720
- # <h2>...</h2> ...
1721
- m = re.match(r"^h([1-6])$", child.tag, flags=re.IGNORECASE)
1722
- if m is not None:
1723
- level = int(m.group(1))
1724
- title = element_to_text(child)
1725
- self.toc.add(level, title)
1519
+ # <x-emoji data-shortname="wink" data-unicode="1f609">😉</x-emoji>
1520
+ case "x-emoji":
1521
+ return self._transform_emoji(child)
1726
1522
 
1727
- if self.options.heading_anchors:
1728
- self._transform_heading(child)
1729
- return None
1523
+ # <h1>...</h1>
1524
+ # <h2>...</h2> ...
1525
+ case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
1526
+ level = int(child.tag[1:])
1527
+ title = element_to_text(child)
1528
+ self.toc.add(level, title)
1529
+
1530
+ if self.options.heading_anchors:
1531
+ self._transform_heading(child)
1532
+ return None
1533
+ case _:
1534
+ pass
1730
1535
 
1731
1536
  return None
1732
1537
 
@@ -1750,14 +1555,14 @@ class ConfluenceDocument:
1750
1555
  images: list[ImageData]
1751
1556
  embedded_files: dict[str, EmbeddedFileData]
1752
1557
 
1753
- options: ConfluenceDocumentOptions
1558
+ options: DocumentOptions
1754
1559
  root: ElementType
1755
1560
 
1756
1561
  @classmethod
1757
1562
  def create(
1758
1563
  cls,
1759
1564
  path: Path,
1760
- options: ConfluenceDocumentOptions,
1565
+ options: DocumentOptions,
1761
1566
  root_dir: Path,
1762
1567
  site_metadata: ConfluenceSiteMetadata,
1763
1568
  page_metadata: ConfluencePageCollection,
@@ -1765,9 +1570,10 @@ class ConfluenceDocument:
1765
1570
  path = path.resolve(True)
1766
1571
 
1767
1572
  document = Scanner().read(path)
1573
+ props = document.properties
1768
1574
 
1769
- if document.page_id is not None:
1770
- page_id = ConfluencePageID(document.page_id)
1575
+ if props.page_id is not None:
1576
+ page_id = ConfluencePageID(props.page_id)
1771
1577
  else:
1772
1578
  # look up Confluence page ID in metadata
1773
1579
  metadata = page_metadata.get(path)
@@ -1782,13 +1588,14 @@ class ConfluenceDocument:
1782
1588
  self,
1783
1589
  path: Path,
1784
1590
  document: ScannedDocument,
1785
- options: ConfluenceDocumentOptions,
1591
+ options: DocumentOptions,
1786
1592
  root_dir: Path,
1787
1593
  site_metadata: ConfluenceSiteMetadata,
1788
1594
  page_metadata: ConfluencePageCollection,
1789
1595
  ) -> None:
1790
1596
  "Converts a single Markdown document to Confluence Storage Format."
1791
1597
 
1598
+ props = document.properties
1792
1599
  self.options = options
1793
1600
 
1794
1601
  # register auxiliary URL substitutions
@@ -1802,11 +1609,12 @@ class ConfluenceDocument:
1802
1609
 
1803
1610
  # modify HTML as necessary
1804
1611
  if self.options.generated_by is not None:
1805
- generated_by = document.generated_by or self.options.generated_by
1612
+ generated_by = props.generated_by or self.options.generated_by
1806
1613
  else:
1807
1614
  generated_by = None
1808
1615
 
1809
1616
  if generated_by is not None:
1617
+ generated_by = apply_generated_by_template(generated_by, path.relative_to(root_dir))
1810
1618
  generated_by_html = markdown_to_html(generated_by)
1811
1619
 
1812
1620
  content = [
@@ -1825,11 +1633,9 @@ class ConfluenceDocument:
1825
1633
  raise ConversionError(path) from ex
1826
1634
 
1827
1635
  # configure HTML-to-Confluence converter
1828
- converter_options = ConfluenceConverterOptions(
1829
- **{field.name: getattr(self.options, field.name) for field in dataclasses.fields(ConfluenceConverterOptions)}
1830
- )
1831
- if document.alignment is not None:
1832
- converter_options.alignment = document.alignment
1636
+ converter_options = copy.deepcopy(self.options.converter)
1637
+ if props.layout is not None:
1638
+ converter_options.layout = coalesce(props.layout, converter_options.layout)
1833
1639
  converter = ConfluenceStorageFormatConverter(converter_options, path, root_dir, site_metadata, page_metadata)
1834
1640
 
1835
1641
  # execute HTML-to-Confluence converter
@@ -1840,45 +1646,66 @@ class ConfluenceDocument:
1840
1646
 
1841
1647
  # extract information discovered by converter
1842
1648
  self.links = converter.links
1843
- self.images = converter.images
1844
- self.embedded_files = converter.embedded_files
1649
+ self.images = converter.attachments.images
1650
+ self.embedded_files = converter.attachments.embedded_files
1845
1651
 
1846
1652
  # assign global properties for document
1847
- self.title = document.title or converter.toc.get_title()
1848
- self.labels = document.tags
1849
- self.properties = document.properties
1850
-
1851
- def xhtml(self) -> str:
1852
- return elements_to_string(self.root)
1853
-
1854
-
1855
- def attachment_name(ref: Path | str) -> str:
1856
- """
1857
- Safe name for use with attachment uploads.
1858
-
1859
- Mutates a relative path such that it meets Confluence's attachment naming requirements.
1860
-
1861
- Allowed characters:
1862
-
1863
- * Alphanumeric characters: 0-9, a-z, A-Z
1864
- * Special characters: hyphen (-), underscore (_), period (.)
1865
- """
1866
-
1867
- if isinstance(ref, Path):
1868
- path = ref
1869
- else:
1870
- path = Path(ref)
1653
+ self.title = props.title or converter.toc.get_title()
1654
+ self.labels = props.tags
1655
+ self.properties = props.properties
1656
+
1657
+ # Remove the first heading if:
1658
+ # 1. The option is enabled
1659
+ # 2. Title was NOT from front-matter (document.title is None)
1660
+ # 3. A title was successfully extracted from heading (self.title is not None)
1661
+ if converter_options.skip_title_heading and props.title is None and self.title is not None:
1662
+ self._remove_first_heading()
1663
+
1664
+ def _remove_first_heading(self) -> None:
1665
+ """
1666
+ Removes the first heading element from the document root.
1871
1667
 
1872
- if path.drive or path.root:
1873
- raise ValueError(f"required: relative path; got: {ref}")
1668
+ This is used when the title was extracted from the first unique top-level heading
1669
+ and the user has requested to skip it from the body to avoid duplication.
1874
1670
 
1875
- regexp = re.compile(r"[^\-0-9A-Za-z_.]", re.UNICODE)
1671
+ Handles the case where a generated-by info panel may be present as the first child.
1672
+ """
1876
1673
 
1877
- def replace_part(part: str) -> str:
1878
- if part == "..":
1879
- return "PAR"
1880
- else:
1881
- return regexp.sub("_", part)
1674
+ # Find the first heading element (h1-h6) in the root
1675
+ heading_pattern = re.compile(r"^h[1-6]$", re.IGNORECASE)
1676
+
1677
+ for idx, child in enumerate(self.root):
1678
+ if not isinstance(child.tag, str):
1679
+ continue
1680
+
1681
+ if heading_pattern.match(child.tag) is None:
1682
+ continue
1683
+
1684
+ # Preserve any text that comes after the heading (tail text)
1685
+ tail = child.tail
1686
+
1687
+ # Remove the heading
1688
+ self.root.remove(child)
1689
+
1690
+ # If there was tail text, attach it to the previous sibling's tail
1691
+ # or to the parent's text if this was the first child
1692
+ if tail:
1693
+ if idx > 0:
1694
+ # Append to previous sibling's tail
1695
+ prev_sibling = self.root[idx - 1]
1696
+ if prev_sibling.tail:
1697
+ prev_sibling.tail += tail
1698
+ else:
1699
+ prev_sibling.tail = tail
1700
+ else:
1701
+ # No previous sibling, append to parent's text
1702
+ if self.root.text:
1703
+ self.root.text += tail
1704
+ else:
1705
+ self.root.text = tail
1706
+
1707
+ # Only remove the FIRST heading, then stop
1708
+ break
1882
1709
 
1883
- parts = [replace_part(p) for p in path.parts]
1884
- return Path(*parts).as_posix().replace("/", "_")
1710
+ def xhtml(self) -> str:
1711
+ return elements_to_string(self.root)