PyPI - markdown-to-confluence - Versions diffs - 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

markdown-to-confluence 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/METADATA +95 -53
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/RECORD +29 -27
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +23 -172
md2conf/api.py +32 -67
md2conf/attachment.py +4 -3
md2conf/clio.py +226 -0
md2conf/compatibility.py +5 -0
md2conf/converter.py +235 -143
md2conf/csf.py +89 -9
md2conf/drawio/render.py +2 -0
md2conf/frontmatter.py +18 -6
md2conf/image.py +7 -5
md2conf/latex.py +8 -1
md2conf/markdown.py +68 -1
md2conf/options.py +93 -24
md2conf/plantuml/extension.py +1 -1
md2conf/publisher.py +81 -16
md2conf/reflection.py +74 -0
md2conf/scanner.py +9 -5
md2conf/serializer.py +12 -1
md2conf/svg.py +5 -2
md2conf/toc.py +1 -1
md2conf/xml.py +45 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe +0 -0

md2conf/csf.py CHANGED Viewed

@@ -18,16 +18,16 @@ from lxml.builder import ElementMaker
 ElementType = ET._Element  # pyright: ignore [reportPrivateUsage]
 # XML namespaces typically associated with Confluence Storage Format documents
-_namespaces = {
+_NAMESPACES = {
     "ac": "http://atlassian.com/content",
     "ri": "http://atlassian.com/resource/identifier",
 }
-for key, value in _namespaces.items():
+for key, value in _NAMESPACES.items():
     ET.register_namespace(key, value)
 HTML = ElementMaker()
-AC_ELEM = ElementMaker(namespace=_namespaces["ac"])
-RI_ELEM = ElementMaker(namespace=_namespaces["ri"])
+AC_ELEM = ElementMaker(namespace=_NAMESPACES["ac"])
+RI_ELEM = ElementMaker(namespace=_NAMESPACES["ri"])
 class ParseError(RuntimeError):
@@ -39,11 +39,11 @@ def _qname(namespace_uri: str, name: str) -> str:
 def AC_ATTR(name: str) -> str:
-    return _qname(_namespaces["ac"], name)
+    return _qname(_NAMESPACES["ac"], name)
 def RI_ATTR(name: str) -> str:
-    return _qname(_namespaces["ri"], name)
+    return _qname(_NAMESPACES["ri"], name)
 @contextmanager
@@ -77,7 +77,7 @@ def _elements_from_strings(dtd_path: Path, items: list[str]) -> ElementType:
         load_dtd=True,
     )
-    ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in _namespaces.items())
+    ns_attr_list = "".join(f' xmlns:{key}="{value}"' for key, value in _NAMESPACES.items())
     data = [
         '<?xml version="1.0"?>',
@@ -139,6 +139,9 @@ def content_to_string(content: str) -> str:
         return _content_to_string(dtd_path, content)
+_ROOT_REGEXP = re.compile(r"^<root\s+[^>]*>(.*)</root>\s*$", re.DOTALL)
 def elements_to_string(root: ElementType) -> str:
     """
     Converts a Confluence Storage Format element tree into an XML string to push to Confluence REST API.
@@ -148,8 +151,7 @@ def elements_to_string(root: ElementType) -> str:
     """
     xml = ET.tostring(root, encoding="utf8", method="xml").decode("utf8")
-    m = re.match(r"^<root\s+[^>]*>(.*)</root>\s*$", xml, re.DOTALL)
-    if m:
+    if m := _ROOT_REGEXP.match(xml):
         return m.group(1)
     else:
         raise ValueError("expected: Confluence content")
@@ -219,3 +221,81 @@ def normalize_inline(elem: ElementType) -> None:
         # ignore empty elements
         if item.tag != "p" or len(item) > 0 or item.text:
             elem.append(item)
+# elements in which whitespace is normalized
+_NORMALIZED_ELEMENTS = [
+    "a",
+    "b",
+    "blockquote",
+    "code",
+    "del",
+    "details",
+    "div",
+    "em",
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6",
+    "i",
+    "li",
+    "p",
+    "span",
+    "strong",
+    "sub",
+    "summary",
+    "sup",
+    "td",
+    "th",
+    "u",
+    "{" + _NAMESPACES["ac"] + "}link-body",
+    "{" + _NAMESPACES["ac"] + "}rich-text-body",
+    "{" + _NAMESPACES["ac"] + "}task-body",
+]
+# elements that are recursed into for whitespace normalization
+_PASSTHROUGH_ELEMENTS = _NORMALIZED_ELEMENTS + [
+    "ol",
+    "table",
+    "tbody",
+    "tfoot",
+    "thead",
+    "tr",
+    "ul",
+    "{" + _NAMESPACES["ac"] + "}link",
+    "{" + _NAMESPACES["ac"] + "}structured-macro",
+    "{" + _NAMESPACES["ac"] + "}task",
+    "{" + _NAMESPACES["ac"] + "}task-list",
+]
+def normalize_whitespace(elem: ElementType) -> None:
+    "Replaces linefeed with space in contexts where whitespace normalization is permitted."
+    if not elem.text and len(elem) < 1:
+        # empty element
+        return
+    if elem.tag not in _PASSTHROUGH_ELEMENTS:
+        # element whose descendants are to be skipped
+        return
+    if elem.tag in _NORMALIZED_ELEMENTS:
+        if elem.text:
+            elem.text = elem.text.replace("\n", " ")
+        for child in elem:
+            if child.tail:
+                child.tail = child.tail.replace("\n", " ")
+    for child in elem:
+        normalize_whitespace(child)
+def canonicalize(content: str) -> str:
+    "Converts a Confluence Storage Format (CSF) document to the normalized format."
+    root = elements_from_string(content)
+    for child in root:
+        normalize_whitespace(child)
+    return elements_to_string(root)

md2conf/drawio/render.py CHANGED Viewed

@@ -47,8 +47,10 @@ def inflate(data: bytes) -> bytes:
     :returns: Uncompressed data.
     """
+    # spellchecker: disable
     # -zlib.MAX_WBITS indicates raw DEFLATE stream (no zlib/gzip headers)
     return zlib.decompress(data, -zlib.MAX_WBITS)
+    # spellchecker: enable
 def decompress_diagram(xml_data: bytes | str) -> ElementType:

md2conf/frontmatter.py CHANGED Viewed

@@ -8,7 +8,8 @@ Copyright 2022-2026, Levente Hunyadi
 import re
 import typing
-from typing import Any, TypeVar
+from dataclasses import dataclass
+from typing import TypeVar
 import yaml
@@ -43,19 +44,30 @@ def extract_value(pattern: str, text: str) -> tuple[str | None, str]:
 def extract_frontmatter_block(text: str) -> tuple[str | None, str]:
     "Extracts the front-matter from a Markdown document as a blob of unparsed text."
-    return extract_value(r"(?ms)\A---$(.+?)^---$", text)
+    return extract_value(r"(?ms)\A---\n(.+?)^---\n", text)
-def extract_frontmatter_json(text: str) -> tuple[dict[str, JsonType] | None, str]:
+@dataclass
+class FrontMatterProperties:
+    data: dict[str, JsonType] | None
+    inner_line_count: int
+    @property
+    def outer_line_count(self) -> int:
+        return self.inner_line_count + 2  # account for enclosing `--` (double dash)
+def extract_frontmatter_json(text: str) -> tuple[FrontMatterProperties | None, str]:
     "Extracts the front-matter from a Markdown document as a dictionary."
     block, text = extract_frontmatter_block(text)
-    properties: dict[str, Any] | None = None
+    properties: FrontMatterProperties | None = None
     if block is not None:
+        inner_line_count = block.count("\n")
         data = yaml.safe_load(block)
         if isinstance(data, dict):
-            properties = typing.cast(dict[str, JsonType], data)
+            properties = FrontMatterProperties(typing.cast(dict[str, JsonType], data), inner_line_count)
     return properties, text
@@ -65,6 +77,6 @@ def extract_frontmatter_object(tp: type[D], text: str) -> tuple[D | None, str]:
     value_object: D | None = None
     if properties is not None:
-        value_object = json_to_object(tp, properties)
+        value_object = json_to_object(tp, properties.data)
     return value_object, text

md2conf/image.py CHANGED Viewed

@@ -75,7 +75,9 @@ class ImageGenerator:
         image_name = attachment_name(path_relative_to(absolute_path, self.base_dir))
         return self.create_attached_image(image_name, attrs)
-    def transform_attached_data(self, image_data: bytes, attrs: ImageAttributes, relative_path: Path | None = None) -> ElementType:
+    def transform_attached_data(
+        self, image_data: bytes, attrs: ImageAttributes, relative_path: Path | None = None, *, image_type: str = "embedded"
+    ) -> ElementType:
         "Emits Confluence Storage Format XHTML for an attached raster or vector image."
         # extract dimensions and update attributes based on format
@@ -100,15 +102,15 @@ class ImageGenerator:
                 alignment=attrs.alignment,
             )
-        # generate filename and add as attachment
+        # generate filename
         if relative_path is not None:
             image_filename = attachment_name(relative_path.with_suffix(f".{self.options.output_format}"))
-            self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, attrs.alt))
         else:
             image_hash = hashlib.md5(image_data).hexdigest()
-            image_filename = attachment_name(f"embedded_{image_hash}.{self.options.output_format}")
-            self.attachments.add_embed(image_filename, EmbeddedFileData(image_data))
+            image_filename = attachment_name(f"{image_type}_{image_hash}.{self.options.output_format}")
+        # add as attachment
+        self.attachments.add_embed(image_filename, EmbeddedFileData(image_data, attrs.alt))
         return self.create_attached_image(image_filename, attrs)
     def create_attached_image(self, image_name: str, attrs: ImageAttributes) -> ElementType:

md2conf/latex.py CHANGED Viewed

@@ -52,6 +52,13 @@ else:
         # spellchecker:disable-next-line
         fig.text(x=0, y=0, s=f"${expression}$", fontsize=font_size)  # pyright: ignore[reportUnknownMemberType]
+        metadata: dict[str, str | None] = {"Title": expression}
+        match format:
+            case "png":
+                metadata.update({"Software": None})
+            case "svg":
+                metadata.update({"Creator": None, "Date": None, "Format": None, "Type": None})
         # save the image
         fig.savefig(  # pyright: ignore[reportUnknownMemberType]
             f,
@@ -59,7 +66,7 @@ else:
             format=format,
             bbox_inches="tight",
             pad_inches=0.0,
-            metadata={"Title": expression} if format == "png" else None,
+            metadata=metadata,
         )
         # close the figure to free memory

md2conf/markdown.py CHANGED Viewed

@@ -6,6 +6,7 @@ Copyright 2022-2026, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
+import re
 import xml.etree.ElementTree
 from typing import Any
@@ -86,7 +87,7 @@ _CONVERTER = markdown.Markdown(
         "sane_lists",
     ],
     extension_configs={
-        "footnotes": {"BACKLINK_TITLE": ""},
+        "footnotes": {"BACKLINK_TITLE": ""},  # spellchecker:disable-line
         "pymdownx.arithmatex": {"generic": True, "preview": False, "tex_inline_wrap": ["", ""], "tex_block_wrap": ["", ""]},
         "pymdownx.emoji": {"emoji_generator": _emoji_generator},
         "pymdownx.highlight": {
@@ -114,3 +115,69 @@ def markdown_to_html(content: str) -> str:
     _CONVERTER.reset()
     html = _CONVERTER.convert(content)
     return html
+# matches the start and end marker of a fenced code block
+_FENCED_CODE_REGEXP = re.compile(r"^\s*(?:`{3,}|~{3,})", re.MULTILINE)
+# matches a regular table row (but not the column alignment row)
+_TABLE_ROW_REGEXP = re.compile(r"^\|\s*([^\s:-]+.*?)\s*\|$", re.MULTILINE)
+def markdown_with_line_numbers(input_lines: list[str], start_line_number: int) -> list[str]:
+    """
+    Injects XHTML line number markers in Markdown text.
+    Unfortunately, Python-Markdown doesn't propagate line numbers to downstream processors, making it challenging to
+    display helpful error messages to end users. This function injects XHTML self-closing tags into the Markdown source:
+    ```
+    <line-number value="#" />
+    ```
+    When tree visitors process the XHTML content generated by Python-Markdown and an error is triggered, the exception
+    handler can use these placeholder elements to guide end users in which part of the Markdown file they should look
+    by translating a tree node in the intermediate output into a line number in the source.
+    :param input_lines: Markdown source file split into lines.
+    :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
+    """
+    output_lines: list[str] = []
+    fence_marker: str | None = None
+    for number, line in enumerate(input_lines, start=start_line_number):
+        if not line:
+            output_lines.append("")
+            continue
+        # fenced code blocks
+        if fence_match := _FENCED_CODE_REGEXP.match(line):
+            marker = fence_match.group()
+            if fence_marker is None:
+                fence_marker = marker
+            elif marker == fence_marker:
+                fence_marker = None
+        elif fence_marker is None:
+            # not inside a fenced code block
+            if (
+                # not an admonition
+                not line.startswith("!!! ")
+                # not a Setext heading
+                and not (line.startswith("===") or line.startswith("---"))
+                # not a decorated ATX heading
+                and not line.endswith("#")
+                # not a math block formula
+                and not (line.startswith("$$") and line.endswith("$$"))
+                # not a Markdown table
+                and not (line.startswith("|") or line.endswith("|"))
+                # not a block-level HTML tag
+                and not (line.startswith("<") or line.endswith(">"))
+            ):
+                line = f'{line}<line-number value="{number}" />'
+            elif row_match := _TABLE_ROW_REGEXP.match(line):
+                line = f'| {row_match.group(1)}<line-number value="{number}" /> |'
+        output_lines.append(line)
+    return output_lines

md2conf/options.py CHANGED Viewed

@@ -6,10 +6,11 @@ Copyright 2022-2026, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
-import dataclasses
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Literal
+from .clio import boolean_option, composite_option, value_option
 @dataclass
 class ConfluencePageID:
@@ -25,8 +26,11 @@ class ImageLayoutOptions:
     :param max_width: Maximum display width for images [px]. Wider images are scaled down for page display. Original size kept for full-size viewing.
     """
-    alignment: Literal["center", "left", "right"] | None = None
-    max_width: int | None = None
+    alignment: Literal["center", "left", "right", None] = field(default=None, metadata=value_option("Alignment for block-level images and formulas."))
+    max_width: int | None = field(
+        default=None,
+        metadata=value_option("Maximum display width for images [px]. Wider images are scaled down for page display."),
+    )
 @dataclass
@@ -38,8 +42,8 @@ class TableLayoutOptions:
     :param display_mode: Whether to use fixed or responsive column widths.
     """
-    width: int | None = None
-    display_mode: Literal["fixed", "responsive"] | None = None
+    width: int | None = field(default=None, metadata=value_option("Maximum table width in pixels."))
+    display_mode: Literal["responsive", "fixed"] = field(default="responsive", metadata=value_option("Set table display mode."))
 @dataclass
@@ -54,9 +58,9 @@ class LayoutOptions:
     :param alignment: Default alignment (unless overridden with more specific setting).
     """
-    image: ImageLayoutOptions = dataclasses.field(default_factory=ImageLayoutOptions)
-    table: TableLayoutOptions = dataclasses.field(default_factory=TableLayoutOptions)
-    alignment: Literal["center", "left", "right"] | None = None
+    image: ImageLayoutOptions = field(default_factory=ImageLayoutOptions, metadata=composite_option())
+    table: TableLayoutOptions = field(default_factory=TableLayoutOptions, metadata=composite_option())
+    alignment: Literal["center", "left", "right", None] = field(default=None, metadata=value_option("Default alignment for block-level content."))
     def get_image_alignment(self) -> Literal["center", "left", "right"]:
         return self.image.alignment or self.alignment or "center"
@@ -69,8 +73,8 @@ class ConverterOptions:
     :param heading_anchors: When true, emit a structured macro *anchor* for each section heading using GitHub
         conversion rules for the identifier.
-    :param ignore_invalid_url: When true, ignore invalid URLs in input, emit a warning and replace the anchor with
-        plain text; when false, raise an exception.
+    :param force_valid_url: If enabled, raise an exception when relative URLs point to an invalid location. If disabled,
+        ignore invalid URLs, emit a warning and replace the anchor with plain text.
     :param skip_title_heading: Whether to remove the first heading from document body when used as page title.
     :param prefer_raster: Whether to choose PNG files over SVG files when available.
     :param render_drawio: Whether to pre-render (or use the pre-rendered version of) draw.io diagrams.
@@ -83,18 +87,81 @@ class ConverterOptions:
     :param layout: Layout options for content on a Confluence page.
     """
-    heading_anchors: bool = False
-    ignore_invalid_url: bool = False
-    skip_title_heading: bool = False
-    prefer_raster: bool = True
-    render_drawio: bool = False
-    render_mermaid: bool = False
-    render_plantuml: bool = False
-    render_latex: bool = False
-    diagram_output_format: Literal["png", "svg"] = "png"
-    webui_links: bool = False
-    use_panel: bool = False
-    layout: LayoutOptions = dataclasses.field(default_factory=LayoutOptions)
+    heading_anchors: bool = field(
+        default=False,
+        metadata=boolean_option(
+            "Place an anchor at each section heading with GitHub-style same-page identifiers.",
+            "Omit the extra anchor from section headings. (May break manually placed same-page references.)",
+        ),
+    )
+    force_valid_url: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Raise an error when relative URLs point to an invalid location.",
+            "Emit a warning but otherwise ignore relative URLs that point to an invalid location.",
+        ),
+    )
+    skip_title_heading: bool = field(
+        default=False,
+        metadata=boolean_option(
+            "Remove the first heading from document body when it is used as the page title (does not apply if title comes from front-matter).",
+            "Keep the first heading in document body even when used as page title.",
+        ),
+    )
+    prefer_raster: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Prefer PNG over SVG when both exist.",
+            "Use SVG files directly instead of preferring PNG equivalents.",
+        ),
+    )
+    render_drawio: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Render draw.io diagrams as image files. (Installed utility required to covert.)",
+            "Upload draw.io diagram sources as Confluence page attachments. (Marketplace app required to display.)",
+        ),
+    )
+    render_mermaid: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Render Mermaid diagrams as image files. (Installed utility required to convert.)",
+            "Upload Mermaid diagram sources as Confluence page attachments. (Marketplace app required to display.)",
+        ),
+    )
+    render_plantuml: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Render PlantUML diagrams as image files. (Installed utility required to convert.)",
+            "Upload PlantUML diagram sources as Confluence page attachments. (Marketplace app required to display.)",
+        ),
+    )
+    render_latex: bool = field(
+        default=True,
+        metadata=boolean_option(
+            "Render LaTeX formulas as image files. (Matplotlib required to convert.)",
+            "Inline LaTeX formulas in Confluence page. (Marketplace app required to display.)",
+        ),
+    )
+    diagram_output_format: Literal["png", "svg"] = field(
+        default="png",
+        metadata=value_option("Format for rendering Mermaid and draw.io diagrams."),
+    )
+    webui_links: bool = field(
+        default=False,
+        metadata=boolean_option(
+            "Enable Confluence Web UI links. (Typically required for on-prem versions of Confluence.)",
+            "Use hierarchical links including space and page ID.",
+        ),
+    )
+    use_panel: bool = field(
+        default=False,
+        metadata=boolean_option(
+            "Transform admonitions and alerts into a Confluence custom panel.",
+            "Use standard Confluence macro types for admonitions and alerts (info, tip, note and warning).",
+        ),
+    )
+    layout: LayoutOptions = field(default_factory=LayoutOptions, metadata=composite_option())
 @dataclass
@@ -108,6 +175,7 @@ class DocumentOptions:
     :param generated_by: Text to use as the generated-by prompt (or `None` to omit a prompt).
     :param skip_update: Whether to skip saving Confluence page ID in Markdown files.
     :param converter: Options for converting an HTML tree into Confluence Storage Format.
+    :param line_numbers: Inject line numbers in Markdown source to help localize conversion errors.
     """
     root_page_id: ConfluencePageID | None = None
@@ -115,4 +183,5 @@ class DocumentOptions:
     title_prefix: str | None = None
     generated_by: str | None = "This page has been generated with a tool."
     skip_update: bool = False
-    converter: ConverterOptions = dataclasses.field(default_factory=ConverterOptions)
+    converter: ConverterOptions = field(default_factory=ConverterOptions)
+    line_numbers: bool = False

md2conf/plantuml/extension.py CHANGED Viewed

@@ -148,7 +148,7 @@ class PlantUMLExtension(MarketplaceExtension):
         return AC_ELEM(
             "structured-macro",
             {
-                AC_ATTR("name"): "plantumlcloud",
+                AC_ATTR("name"): "plantumlcloud",  # spellchecker:disable-line
                 AC_ATTR("schema-version"): "1",
                 "data-layout": "default",
                 AC_ATTR("local-id"): local_id,

markdown-to-confluence 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

markdown-to-confluence 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl