markdown-to-confluence 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2conf/converter.py CHANGED
@@ -35,6 +35,7 @@ from .mermaid import MermaidConfigProperties
35
35
  from .metadata import ConfluenceSiteMetadata
36
36
  from .scanner import MermaidScanner, ScannedDocument, Scanner
37
37
  from .serializer import JsonType
38
+ from .svg import fix_svg_dimensions, get_svg_dimensions, get_svg_dimensions_from_bytes
38
39
  from .toc import TableOfContentsBuilder
39
40
  from .uri import is_absolute_url, to_uuid_urn
40
41
  from .xml import element_to_text
@@ -42,6 +43,27 @@ from .xml import element_to_text
42
43
  ElementType = ET._Element # pyright: ignore [reportPrivateUsage]
43
44
 
44
45
 
46
+ def apply_generated_by_template(template: str, path: Path) -> str:
47
+ """Apply template substitution to the generated_by string.
48
+
49
+ Supported placeholders:
50
+ - %{filepath}: Full path to the file (relative to the root directory)
51
+ - %{filename}: Just the filename
52
+
53
+ :param template: The template string with placeholders
54
+ :param path: The path to the file being converted
55
+ :returns: The template string with placeholders replaced
56
+ """
57
+
58
+ return template.replace(
59
+ "%{filepath}",
60
+ path.as_posix(),
61
+ ).replace(
62
+ "%{filename}",
63
+ path.name,
64
+ )
65
+
66
+
45
67
  def get_volatile_attributes() -> list[str]:
46
68
  "Returns a list of volatile attributes that frequently change as a Confluence storage format XHTML document is updated."
47
69
 
@@ -81,6 +103,12 @@ def is_directory_within(absolute_path: Path, base_path: Path) -> bool:
81
103
  return absolute_path.as_posix().startswith(base_path.as_posix())
82
104
 
83
105
 
106
+ def fix_absolute_path(path: Path, root_path: Path) -> Path:
107
+ "Make absolute path relative to another root path."
108
+
109
+ return root_path / path.relative_to(path.root)
110
+
111
+
84
112
  def encode_title(text: str) -> str:
85
113
  "Converts a title string such that it is safe to embed into a Confluence URL."
86
114
 
@@ -95,6 +123,7 @@ def encode_title(text: str) -> str:
95
123
 
96
124
 
97
125
  # supported code block languages, for which syntax highlighting is available
126
+ # spellchecker: disable
98
127
  _LANGUAGES = {
99
128
  "abap": "abap",
100
129
  "actionscript3": "actionscript3",
@@ -179,6 +208,7 @@ _LANGUAGES = {
179
208
  "xquery": "xquery",
180
209
  "yaml": "yaml",
181
210
  }
211
+ # spellchecker: enable
182
212
 
183
213
 
184
214
  class NodeVisitor(ABC):
@@ -270,6 +300,7 @@ class ImageAttributes:
270
300
  :param title: Title text (a.k.a. image tooltip).
271
301
  :param caption: Caption text (shown below figure).
272
302
  :param alignment: Alignment for block-level images.
303
+ :param display_width: Constrained display width in pixels (if different from natural width).
273
304
  """
274
305
 
275
306
  context: FormattingContext
@@ -279,6 +310,7 @@ class ImageAttributes:
279
310
  title: str | None
280
311
  caption: str | None
281
312
  alignment: ImageAlignment = ImageAlignment.CENTER
313
+ display_width: int | None = None
282
314
 
283
315
  def __post_init__(self) -> None:
284
316
  if self.caption is None and self.context is FormattingContext.BLOCK:
@@ -303,7 +335,9 @@ class ImageAttributes:
303
335
  attributes[AC_ATTR("original-height")] = str(self.height)
304
336
  if self.width is not None:
305
337
  attributes[AC_ATTR("custom-width")] = "true"
306
- attributes[AC_ATTR("width")] = str(self.width)
338
+ # Use display_width if set, otherwise use natural width
339
+ effective_width = self.display_width or self.width
340
+ attributes[AC_ATTR("width")] = str(effective_width)
307
341
 
308
342
  elif self.context is FormattingContext.INLINE:
309
343
  if self.width is not None:
@@ -349,6 +383,7 @@ class ConfluenceConverterOptions:
349
383
  plain text; when false, raise an exception.
350
384
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
351
385
  conversion rules for the identifier.
386
+ :param skip_title_heading: Whether to remove the first heading from document body when used as page title.
352
387
  :param prefer_raster: Whether to choose PNG files over SVG files when available.
353
388
  :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
354
389
  :param render_mermaid: Whether to pre-render Mermaid diagrams into PNG/SVG images.
@@ -356,11 +391,13 @@ class ConfluenceConverterOptions:
356
391
  :param diagram_output_format: Target image format for diagrams.
357
392
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
358
393
  :param alignment: Alignment for block-level images and formulas.
394
+ :param max_image_width: Maximum display width for images in pixels.
359
395
  :param use_panel: Whether to transform admonitions and alerts into a Confluence custom panel.
360
396
  """
361
397
 
362
398
  ignore_invalid_url: bool = False
363
399
  heading_anchors: bool = False
400
+ skip_title_heading: bool = False
364
401
  prefer_raster: bool = True
365
402
  render_drawio: bool = False
366
403
  render_mermaid: bool = False
@@ -368,8 +405,23 @@ class ConfluenceConverterOptions:
368
405
  diagram_output_format: Literal["png", "svg"] = "png"
369
406
  webui_links: bool = False
370
407
  alignment: Literal["center", "left", "right"] = "center"
408
+ max_image_width: int | None = None
371
409
  use_panel: bool = False
372
410
 
411
+ def calculate_display_width(self, natural_width: int | None) -> int | None:
412
+ """
413
+ Calculate the display width for an image, applying max_image_width constraint if set.
414
+
415
+ :param natural_width: The natural width of the image in pixels.
416
+ :returns: The constrained display width, or None if no constraint is needed.
417
+ """
418
+
419
+ if natural_width is None or self.max_image_width is None:
420
+ return None
421
+ if natural_width <= self.max_image_width:
422
+ return None # no constraint needed, image is already within limits
423
+ return self.max_image_width
424
+
373
425
 
374
426
  @dataclass
375
427
  class ImageData:
@@ -545,9 +597,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
545
597
 
546
598
  # discard original value: relative links always require transformation
547
599
  anchor.attrib.pop("href")
548
-
549
- # convert the relative URL to absolute path based on the base path value
550
- absolute_path = (self.base_dir / relative_url.path).resolve()
600
+ if relative_url.path.startswith("/"):
601
+ absolute_path = fix_absolute_path(path=Path(relative_url.path), root_path=self.root_dir).resolve()
602
+ else:
603
+ # convert the relative URL to absolute path based on the base path value
604
+ absolute_path = (self.base_dir / relative_url.path).resolve()
551
605
 
552
606
  # look up the absolute path in the page metadata dictionary to discover the relative path within Confluence that should be used
553
607
  if not is_directory_within(absolute_path, self.root_dir):
@@ -668,7 +722,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
668
722
  pixel_width = int(width) if width is not None and width.isdecimal() else None
669
723
  pixel_height = int(height) if height is not None and height.isdecimal() else None
670
724
  attrs = ImageAttributes(
671
- context, width=pixel_width, height=pixel_height, alt=alt, title=title, caption=None, alignment=ImageAlignment(self.options.alignment)
725
+ context,
726
+ width=pixel_width,
727
+ height=pixel_height,
728
+ alt=alt,
729
+ title=title,
730
+ caption=None,
731
+ alignment=ImageAlignment(self.options.alignment),
732
+ display_width=self.options.calculate_display_width(pixel_width),
672
733
  )
673
734
 
674
735
  if is_absolute_url(src):
@@ -716,8 +777,11 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
716
777
  def _verify_image_path(self, path: Path) -> Path | None:
717
778
  "Checks whether an image path is safe to use."
718
779
 
719
- # resolve relative path into absolute path w.r.t. base dir
720
- absolute_path = (self.base_dir / path).resolve()
780
+ if path.is_absolute():
781
+ absolute_path = fix_absolute_path(path=path, root_path=self.root_dir).resolve()
782
+ else:
783
+ # resolve relative path into absolute path w.r.t. base dir
784
+ absolute_path = (self.base_dir / path).resolve()
721
785
 
722
786
  if not absolute_path.exists():
723
787
  self._warn_or_raise(f"path to image {path} does not exist")
@@ -738,6 +802,21 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
738
802
  if png_file.exists():
739
803
  absolute_path = png_file
740
804
 
805
+ # infer SVG dimensions if not already specified
806
+ if absolute_path.suffix == ".svg" and attrs.width is None and attrs.height is None:
807
+ svg_width, svg_height = get_svg_dimensions(absolute_path)
808
+ if svg_width is not None:
809
+ attrs = ImageAttributes(
810
+ context=attrs.context,
811
+ width=svg_width,
812
+ height=svg_height,
813
+ alt=attrs.alt,
814
+ title=attrs.title,
815
+ caption=attrs.caption,
816
+ alignment=attrs.alignment,
817
+ display_width=self.options.calculate_display_width(svg_width),
818
+ )
819
+
741
820
  self.images.append(ImageData(absolute_path, attrs.alt))
742
821
  image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
743
822
  return self._create_attached_image(image_name, attrs)
@@ -926,8 +1005,29 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
926
1005
  content = f.read()
927
1006
  config = self._extract_mermaid_config(content)
928
1007
  image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
1008
+
1009
+ # Extract dimensions and fix SVG if that's the output format
1010
+ if self.options.diagram_output_format == "svg":
1011
+ # Fix SVG to have explicit width/height instead of percentages
1012
+ image_data = fix_svg_dimensions(image_data)
1013
+
1014
+ if attrs.width is None and attrs.height is None:
1015
+ svg_width, svg_height = get_svg_dimensions_from_bytes(image_data)
1016
+ if svg_width is not None or svg_height is not None:
1017
+ attrs = ImageAttributes(
1018
+ context=attrs.context,
1019
+ width=svg_width,
1020
+ height=svg_height,
1021
+ alt=attrs.alt,
1022
+ title=attrs.title,
1023
+ caption=attrs.caption,
1024
+ alignment=attrs.alignment,
1025
+ display_width=self.options.calculate_display_width(svg_width),
1026
+ )
1027
+
929
1028
  image_filename = attachment_name(relative_path.with_suffix(f".{self.options.diagram_output_format}"))
930
1029
  self.embedded_files[image_filename] = EmbeddedFileData(image_data, attrs.alt)
1030
+
931
1031
  return self._create_attached_image(image_filename, attrs)
932
1032
  else:
933
1033
  self.images.append(ImageData(absolute_path, attrs.alt))
@@ -940,10 +1040,31 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
940
1040
  if self.options.render_mermaid:
941
1041
  config = self._extract_mermaid_config(content)
942
1042
  image_data = mermaid.render_diagram(content, self.options.diagram_output_format, config=config)
1043
+
1044
+ # Extract dimensions and fix SVG if that's the output format
1045
+ attrs = ImageAttributes.EMPTY_BLOCK
1046
+ if self.options.diagram_output_format == "svg":
1047
+ # Fix SVG to have explicit width/height instead of percentages
1048
+ image_data = fix_svg_dimensions(image_data)
1049
+
1050
+ svg_width, svg_height = get_svg_dimensions_from_bytes(image_data)
1051
+ if svg_width is not None or svg_height is not None:
1052
+ attrs = ImageAttributes(
1053
+ context=FormattingContext.BLOCK,
1054
+ width=svg_width,
1055
+ height=svg_height,
1056
+ alt=None,
1057
+ title=None,
1058
+ caption=None,
1059
+ alignment=ImageAlignment(self.options.alignment),
1060
+ display_width=self.options.calculate_display_width(svg_width),
1061
+ )
1062
+
943
1063
  image_hash = hashlib.md5(image_data).hexdigest()
944
1064
  image_filename = attachment_name(f"embedded_{image_hash}.{self.options.diagram_output_format}")
945
1065
  self.embedded_files[image_filename] = EmbeddedFileData(image_data)
946
- return self._create_attached_image(image_filename, ImageAttributes.EMPTY_BLOCK)
1066
+
1067
+ return self._create_attached_image(image_filename, attrs)
947
1068
  else:
948
1069
  mermaid_data = content.encode("utf-8")
949
1070
  mermaid_hash = hashlib.md5(mermaid_data).hexdigest()
@@ -1301,7 +1422,16 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1301
1422
  if self.options.diagram_output_format == "png":
1302
1423
  width, height = get_png_dimensions(data=image_data)
1303
1424
  image_data = remove_png_chunks(["pHYs"], source_data=image_data)
1304
- attrs = ImageAttributes(context, width=width, height=height, alt=content, title=None, caption="", alignment=ImageAlignment(self.options.alignment))
1425
+ attrs = ImageAttributes(
1426
+ context,
1427
+ width=width,
1428
+ height=height,
1429
+ alt=content,
1430
+ title=None,
1431
+ caption="",
1432
+ alignment=ImageAlignment(self.options.alignment),
1433
+ display_width=self.options.calculate_display_width(width),
1434
+ )
1305
1435
  else:
1306
1436
  attrs = ImageAttributes.empty(context)
1307
1437
 
@@ -1386,8 +1516,16 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1386
1516
  """
1387
1517
  Transforms a footnote reference.
1388
1518
 
1519
+ When a footnote is referenced multiple times, Python-Markdown generates
1520
+ different `id` attributes for each reference:
1521
+ - First reference: `fnref:NAME`
1522
+ - Second reference: `fnref2:NAME`
1523
+ - Third reference: `fnref3:NAME`
1524
+ - etc.
1525
+
1389
1526
  ```
1390
1527
  <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
1528
+ <sup id="fnref2:NAME"><a class="footnote-ref" href="#fn:NAME">REF</a></sup>
1391
1529
  ```
1392
1530
  """
1393
1531
 
@@ -1395,9 +1533,14 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1395
1533
  raise DocumentError("expected: `<sup>` as the HTML element for a footnote reference")
1396
1534
 
1397
1535
  ref_id = elem.attrib.pop("id", "")
1398
- if not ref_id.startswith("fnref:"):
1399
- raise DocumentError("expected: attribute `id` of format `fnref:NAME` applied on `<sup>` for a footnote reference")
1400
- footnote_ref = ref_id.removeprefix("fnref:")
1536
+ # Match fnref:NAME, fnref2:NAME, fnref3:NAME, etc.
1537
+ match = re.match(r"^fnref(\d*):(.+)$", ref_id)
1538
+ if match is None:
1539
+ raise DocumentError("expected: attribute `id` of format `fnref:NAME` or `fnrefN:NAME` applied on `<sup>` for a footnote reference")
1540
+ numeric_suffix = match.group(1)
1541
+ footnote_name = match.group(2)
1542
+ # Build anchor name: first reference uses NAME, subsequent references use NAME-N
1543
+ footnote_ref = f"{footnote_name}-{numeric_suffix}" if numeric_suffix else footnote_name
1401
1544
 
1402
1545
  link = next((elem.iterchildren(tag="a")), None)
1403
1546
  if link is None:
@@ -1443,6 +1586,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1443
1586
  """
1444
1587
  Transforms the footnote definition block.
1445
1588
 
1589
+ When a footnote is referenced multiple times, Python-Markdown generates
1590
+ multiple back-reference links in the footnote definition:
1591
+ - First reference: `#fnref:NAME`
1592
+ - Second reference: `#fnref2:NAME`
1593
+ - Third reference: `#fnref3:NAME`
1594
+ - etc.
1595
+
1446
1596
  ```
1447
1597
  <div class="footnote">
1448
1598
  <hr/>
@@ -1453,6 +1603,13 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1453
1603
  </ol>
1454
1604
  </div>
1455
1605
  ```
1606
+
1607
+ With multiple references to the same footnote:
1608
+ ```
1609
+ <li id="fn:NAME">
1610
+ <p>TEXT <a class="footnote-backref" href="#fnref:NAME">↩</a><a class="footnote-backref" href="#fnref2:NAME">↩</a></p>
1611
+ </li>
1612
+ ```
1456
1613
  """
1457
1614
 
1458
1615
  ordered_list = next((elem.iterchildren(tag="ol")), None)
@@ -1468,21 +1625,33 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1468
1625
  raise DocumentError("expected: attribute `id` of format `fn:NAME` applied on `<li>` for a footnote definition")
1469
1626
  footnote_def = def_id.removeprefix("fn:")
1470
1627
 
1471
- paragraph = next((list_item.iterchildren(tag="p")), None)
1472
- if paragraph is None:
1628
+ # find the last paragraph, which is where the backref links are placed
1629
+ paragraphs = list(list_item.iterchildren(tag="p"))
1630
+ if not paragraphs:
1473
1631
  raise DocumentError("expected: `<p>` as a child of `<li>` in a footnote definition")
1632
+ last_paragraph = paragraphs[-1]
1633
+
1634
+ # collect all backref anchors (there may be multiple when a footnote is referenced multiple times)
1635
+ # pattern matches #fnref:NAME, #fnref2:NAME, #fnref3:NAME, etc.
1636
+ # store tuples of (anchor_element, number, footnote_name)
1637
+ backref_info: list[tuple[ElementType, int | None, str]] = []
1638
+ for anchor in list(last_paragraph.iterchildren(tag="a")):
1639
+ href = anchor.get("href", "")
1640
+ match = re.match(r"^#fnref(\d*):(.+)$", href)
1641
+ if match is not None:
1642
+ backref_info.append((anchor, int(match.group(1), base=10) if match.group(1) else None, match.group(2)))
1643
+
1644
+ if not backref_info:
1645
+ raise DocumentError(
1646
+ "expected: at least one `<a>` element with `href` attribute of format `#fnref:NAME` or `#fnrefN:NAME` in a footnote definition"
1647
+ )
1474
1648
 
1475
- ref_anchor = next((paragraph.iterchildren(tag="a", reversed=True)), None)
1476
- if ref_anchor is None:
1477
- raise DocumentError("expected: `<a>` as the last HTML element in a footnote definition")
1478
-
1479
- ref_href = ref_anchor.get("href", "")
1480
- if not ref_href.startswith("#fnref:"):
1481
- raise DocumentError("expected: attribute `href` of format `#fnref:NAME` applied on last element `<a>` for a footnote definition")
1482
- footnote_ref = ref_href.removeprefix("#fnref:")
1649
+ # remove all back-links generated by Python-Markdown
1650
+ for anchor, _, _ in backref_info:
1651
+ last_paragraph.remove(anchor)
1483
1652
 
1484
- # remove back-link generated by Python-Markdown
1485
- paragraph.remove(ref_anchor)
1653
+ # use the first paragraph for the anchor placement
1654
+ first_paragraph = paragraphs[0]
1486
1655
 
1487
1656
  # build new anchor for footnote definition
1488
1657
  def_anchor = AC_ELEM(
@@ -1498,20 +1667,40 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1498
1667
  ),
1499
1668
  )
1500
1669
 
1501
- # build new link to footnote reference in page body
1502
- ref_link = AC_ELEM(
1503
- "link",
1504
- {
1505
- AC_ATTR("anchor"): f"footnote-ref-{footnote_ref}",
1506
- },
1507
- AC_ELEM("link-body", ET.CDATA("↩")),
1508
- )
1670
+ # build back-links to each footnote reference in page body:
1671
+ # * for single reference: ↩
1672
+ # * for multiple references: ↩¹ ↩² ↩³ ...
1673
+ for _, number, footnote_name in backref_info:
1674
+ # build anchor name matching the reference anchor:
1675
+ # * first reference: footnote-ref-NAME
1676
+ # * subsequent references: footnote-ref-NAME-N
1677
+ if number is None:
1678
+ anchor_name = f"footnote-ref-{footnote_name}"
1679
+ if len(backref_info) > 1:
1680
+ link_text = "↩¹"
1681
+ else:
1682
+ link_text = "↩"
1683
+ else:
1684
+ anchor_name = f"footnote-ref-{footnote_name}-{number}"
1685
+
1686
+ # use superscript numbers for references
1687
+ superscript_digits = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
1688
+ link_text = f"↩{str(number).translate(superscript_digits)}"
1689
+
1690
+ ref_link = AC_ELEM(
1691
+ "link",
1692
+ {
1693
+ AC_ATTR("anchor"): anchor_name,
1694
+ },
1695
+ AC_ELEM("link-body", ET.CDATA(link_text)),
1696
+ )
1697
+
1698
+ last_paragraph.append(ref_link)
1509
1699
 
1510
- # append children synthesized for Confluence
1511
- paragraph.insert(0, def_anchor)
1512
- def_anchor.tail = paragraph.text
1513
- paragraph.text = None
1514
- paragraph.append(ref_link)
1700
+ # append anchor to first paragraph
1701
+ first_paragraph.insert(0, def_anchor)
1702
+ def_anchor.tail = first_paragraph.text
1703
+ first_paragraph.text = None
1515
1704
 
1516
1705
  def _transform_tasklist(self, elem: ElementType) -> ElementType:
1517
1706
  """
@@ -1699,7 +1888,8 @@ class ConfluenceStorageFormatConverter(NodeVisitor):
1699
1888
  return self._transform_inline_math(child)
1700
1889
 
1701
1890
  # <sup id="fnref:NAME"><a class="footnote-ref" href="#fn:NAME">1</a></sup>
1702
- elif child.tag == "sup" and child.get("id", "").startswith("fnref:"):
1891
+ # Multiple references: <sup id="fnref2:NAME">...</sup>, <sup id="fnref3:NAME">...</sup>
1892
+ elif child.tag == "sup" and re.match(r"^fnref\d*:", child.get("id", "")):
1703
1893
  self._transform_footnote_ref(child)
1704
1894
  return None
1705
1895
 
@@ -1807,6 +1997,7 @@ class ConfluenceDocument:
1807
1997
  generated_by = None
1808
1998
 
1809
1999
  if generated_by is not None:
2000
+ generated_by = apply_generated_by_template(generated_by, path.relative_to(root_dir))
1810
2001
  generated_by_html = markdown_to_html(generated_by)
1811
2002
 
1812
2003
  content = [
@@ -1848,6 +2039,59 @@ class ConfluenceDocument:
1848
2039
  self.labels = document.tags
1849
2040
  self.properties = document.properties
1850
2041
 
2042
+ # Remove the first heading if:
2043
+ # 1. The option is enabled
2044
+ # 2. Title was NOT from front-matter (document.title is None)
2045
+ # 3. A title was successfully extracted from heading (self.title is not None)
2046
+ if converter_options.skip_title_heading and document.title is None and self.title is not None:
2047
+ self._remove_first_heading()
2048
+
2049
+ def _remove_first_heading(self) -> None:
2050
+ """
2051
+ Removes the first heading element from the document root.
2052
+
2053
+ This is used when the title was extracted from the first unique top-level heading
2054
+ and the user has requested to skip it from the body to avoid duplication.
2055
+
2056
+ Handles the case where a generated-by info panel may be present as the first child.
2057
+ """
2058
+
2059
+ # Find the first heading element (h1-h6) in the root
2060
+ heading_pattern = re.compile(r"^h[1-6]$", re.IGNORECASE)
2061
+
2062
+ for idx, child in enumerate(self.root):
2063
+ if not isinstance(child.tag, str):
2064
+ continue
2065
+
2066
+ if heading_pattern.match(child.tag) is None:
2067
+ continue
2068
+
2069
+ # Preserve any text that comes after the heading (tail text)
2070
+ tail = child.tail
2071
+
2072
+ # Remove the heading
2073
+ self.root.remove(child)
2074
+
2075
+ # If there was tail text, attach it to the previous sibling's tail
2076
+ # or to the parent's text if this was the first child
2077
+ if tail:
2078
+ if idx > 0:
2079
+ # Append to previous sibling's tail
2080
+ prev_sibling = self.root[idx - 1]
2081
+ if prev_sibling.tail:
2082
+ prev_sibling.tail += tail
2083
+ else:
2084
+ prev_sibling.tail = tail
2085
+ else:
2086
+ # No previous sibling, append to parent's text
2087
+ if self.root.text:
2088
+ self.root.text += tail
2089
+ else:
2090
+ self.root.text = tail
2091
+
2092
+ # Only remove the FIRST heading, then stop
2093
+ break
2094
+
1851
2095
  def xhtml(self) -> str:
1852
2096
  return elements_to_string(self.root)
1853
2097
 
md2conf/domain.py CHANGED
@@ -20,10 +20,12 @@ class ConfluenceDocumentOptions:
20
20
  """
21
21
  Options that control the generated page content.
22
22
 
23
- :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
24
- plain text; when false, raise an exception.
25
23
  :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
26
24
  conversion rules for the identifier.
25
+ :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
26
+ plain text; when false, raise an exception.
27
+ :param skip_title_heading: Whether to remove the first heading from document body when used as page title.
28
+ :param title_prefix: String to prepend to Confluence page title for each published page.
27
29
  :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
28
30
  :param root_page_id: Confluence page to assume root page role for publishing a directory of Markdown files.
29
31
  :param keep_hierarchy: Whether to maintain source directory structure when exporting to Confluence.
@@ -34,11 +36,15 @@ class ConfluenceDocumentOptions:
34
36
  :param diagram_output_format: Target image format for diagrams.
35
37
  :param webui_links: When true, convert relative URLs to Confluence Web UI links.
36
38
  :param alignment: Alignment for block-level images and formulas.
39
+ :param max_image_width: Maximum display width for images [px]. Wider images are scaled down for page display.
40
+ Original size kept for full-size viewing.
37
41
  :param use_panel: Whether to transform admonitions and alerts into a Confluence custom panel.
38
42
  """
39
43
 
40
- ignore_invalid_url: bool = False
41
44
  heading_anchors: bool = False
45
+ ignore_invalid_url: bool = False
46
+ skip_title_heading: bool = False
47
+ title_prefix: str | None = None
42
48
  generated_by: str | None = "This page has been generated with a tool."
43
49
  root_page_id: ConfluencePageID | None = None
44
50
  keep_hierarchy: bool = False
@@ -49,4 +55,5 @@ class ConfluenceDocumentOptions:
49
55
  diagram_output_format: Literal["png", "svg"] = "png"
50
56
  webui_links: bool = False
51
57
  alignment: Literal["center", "left", "right"] = "center"
58
+ max_image_width: int | None = None
52
59
  use_panel: bool = False
md2conf/latex.py CHANGED
@@ -44,16 +44,16 @@ else:
44
44
 
45
45
  def _render_latex(expression: str, f: BinaryIO, *, format: Literal["png", "svg"], dpi: int, font_size: int) -> None:
46
46
  # create a figure with no axis
47
- fig = plt.figure(dpi=dpi)
47
+ fig = plt.figure(dpi=dpi) # pyright: ignore[reportUnknownMemberType]
48
48
 
49
49
  # transparent background
50
50
  fig.patch.set_alpha(0)
51
51
 
52
52
  # add LaTeX text
53
- fig.text(x=0, y=0, s=f"${expression}$", fontsize=font_size)
53
+ fig.text(x=0, y=0, s=f"${expression}$", fontsize=font_size) # pyright: ignore[reportUnknownMemberType]
54
54
 
55
55
  # save the image
56
- fig.savefig(
56
+ fig.savefig( # pyright: ignore[reportUnknownMemberType]
57
57
  f,
58
58
  transparent=True,
59
59
  format=format,
@@ -209,7 +209,7 @@ def _get_png_dimensions(source_file: BinaryIO) -> tuple[int, int]:
209
209
 
210
210
  _read_signature(source_file)
211
211
 
212
- # validate IHDR chunk
212
+ # validate IHDR (Image Header) chunk
213
213
  ihdr = _read_chunk(source_file)
214
214
  if ihdr is None:
215
215
  raise ValueError("missing IHDR chunk")
md2conf/publisher.py CHANGED
@@ -81,6 +81,9 @@ class SynchronizingProcessor(Processor):
81
81
  digest = self._generate_hash(node.absolute_path)
82
82
  title = f"{node.absolute_path.stem} [{digest}]"
83
83
 
84
+ if self.options.title_prefix is not None:
85
+ title = f"{self.options.title_prefix} {title}"
86
+
84
87
  # look up page by (possibly auto-generated) title
85
88
  page = self.api.get_or_create_page(title, parent_id.page_id)
86
89
 
md2conf/scanner.py CHANGED
@@ -23,8 +23,8 @@ T = TypeVar("T")
23
23
  def extract_value(pattern: str, text: str) -> tuple[str | None, str]:
24
24
  values: list[str] = []
25
25
 
26
- def _repl_func(matchobj: re.Match[str]) -> str:
27
- values.append(matchobj.group(1))
26
+ def _repl_func(match: re.Match[str]) -> str:
27
+ values.append(match.group(1))
28
28
  return ""
29
29
 
30
30
  text = re.sub(pattern, _repl_func, text, count=1, flags=re.ASCII)
md2conf/serializer.py CHANGED
@@ -6,9 +6,11 @@ Copyright 2022-2025, Levente Hunyadi
6
6
  :see: https://github.com/hunyadi/md2conf
7
7
  """
8
8
 
9
+ import sys
10
+ from datetime import datetime
9
11
  from typing import TypeVar
10
12
 
11
- from cattrs.preconf.json import make_converter
13
+ from cattrs.preconf.orjson import make_converter
12
14
 
13
15
  JsonType = None | bool | int | float | str | dict[str, "JsonType"] | list["JsonType"]
14
16
  JsonComposite = dict[str, "JsonType"] | list["JsonType"]
@@ -19,6 +21,16 @@ T = TypeVar("T")
19
21
  _converter = make_converter(forbid_extra_keys=False)
20
22
 
21
23
 
24
+ if sys.version_info < (3, 11):
25
+
26
+ @_converter.register_structure_hook
27
+ def datetime_structure_hook(value: str, cls: type[datetime]) -> datetime:
28
+ if value.endswith("Z"):
29
+ # fromisoformat() prior to Python version 3.11 does not support military time zones like "Zulu" for UTC
30
+ value = f"{value[:-1]}+00:00"
31
+ return datetime.fromisoformat(value)
32
+
33
+
22
34
  @_converter.register_structure_hook
23
35
  def json_type_structure_hook(value: JsonType, cls: type[JsonType]) -> JsonType:
24
36
  return value
@@ -49,4 +61,4 @@ def object_to_json_payload(data: object) -> bytes:
49
61
  :returns: JSON string encoded in UTF-8.
50
62
  """
51
63
 
52
- return _converter.dumps(data).encode("utf-8")
64
+ return _converter.dumps(data)