PyPI - markdown-to-confluence - Versions diffs - 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

markdown-to-confluence 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/METADATA +95 -53
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/RECORD +29 -27
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/WHEEL +1 -1
md2conf/__init__.py +1 -1
md2conf/__main__.py +23 -172
md2conf/api.py +32 -67
md2conf/attachment.py +4 -3
md2conf/clio.py +226 -0
md2conf/compatibility.py +5 -0
md2conf/converter.py +235 -143
md2conf/csf.py +89 -9
md2conf/drawio/render.py +2 -0
md2conf/frontmatter.py +18 -6
md2conf/image.py +7 -5
md2conf/latex.py +8 -1
md2conf/markdown.py +68 -1
md2conf/options.py +93 -24
md2conf/plantuml/extension.py +1 -1
md2conf/publisher.py +81 -16
md2conf/reflection.py +74 -0
md2conf/scanner.py +9 -5
md2conf/serializer.py +12 -1
md2conf/svg.py +5 -2
md2conf/toc.py +1 -1
md2conf/xml.py +45 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe +0 -0

md2conf/publisher.py CHANGED Viewed

@@ -6,23 +6,29 @@ Copyright 2022-2026, Levente Hunyadi
 :see: https://github.com/hunyadi/md2conf
 """
+import hashlib
 import logging
+from dataclasses import dataclass
 from pathlib import Path
-from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluenceSession, ConfluenceStatus
+from .api import ConfluenceContentProperty, ConfluenceLabel, ConfluencePage, ConfluenceSession, ConfluenceStatus
 from .attachment import attachment_name
 from .compatibility import override, path_relative_to
-from .converter import ConfluenceDocument, get_volatile_attributes, get_volatile_elements
+from .converter import ConfluenceDocument, ElementType, get_volatile_attributes, get_volatile_elements
 from .csf import AC_ATTR, elements_from_string
 from .environment import PageError
 from .metadata import ConfluencePageMetadata
 from .options import ConfluencePageID, DocumentOptions
 from .processor import Converter, DocumentNode, Processor, ProcessorFactory
+from .serializer import json_to_object, object_to_json
 from .xml import is_xml_equal, unwrap_substitute
 LOGGER = logging.getLogger(__name__)
+CONTENT_PROPERTY_TAG = "md2conf"
 class _MissingType:
     pass
@@ -82,6 +88,19 @@ class ParentCatalog:
         return self.is_traceable(parent_id)
+@dataclass
+class ConfluenceMarkdownTag:
+    """
+    Captures information used to synchronize the Markdown source file with the Confluence target page.
+    :param page_version: Confluence page version number when the page was last synchronized.
+    :param source_digest: MD5 hash computed from the Markdown source file.
+    """
+    page_version: int
+    source_digest: str
 class SynchronizingProcessor(Processor):
     """
     Synchronizes a single Markdown page or a directory of Markdown pages with Confluence.
@@ -205,36 +224,82 @@ class SynchronizingProcessor(Processor):
         content = document.xhtml()
         LOGGER.debug("Generated Confluence Storage Format document:\n%s", content)
+        # compute content hash to help detect if document has changed
+        m = hashlib.md5()
+        with open(path, "rb") as f:
+            m.update(f.read())
+        source_digest = m.hexdigest()
+        # set Confluence title based on Markdown content
         title = self._get_unique_title(document, path)
         # fetch existing page
         page = self.api.get_page(page_id.page_id)
+        prop = self.api.get_content_property_for_page(page_id.page_id, CONTENT_PROPERTY_TAG)
+        tag: ConfluenceMarkdownTag | None = None
+        if prop is not None:
+            try:
+                tag = json_to_object(ConfluenceMarkdownTag, prop.value)
+                LOGGER.debug("Page with ID %s has last synchronized version of %d and hash of %s", page.id, tag.page_version, tag.source_digest)
+            except Exception:
+                pass
+        # keep existing Confluence title if cannot infer meaningful title from Markdown source
         if not title:  # empty or `None`
             title = page.title
-        # discard comments
-        tree = elements_from_string(page.content)
-        unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
+        # synchronize page if page has any changes
+        if self._has_changes(page, tag, title, document.root, source_digest):
+            if tag is not None and page.version.number != tag.page_version:
+                LOGGER.warning("Page with ID %s has been edited since last synchronized: %s", page.id, page.title)
-        # check if page has any changes
-        if page.title != title or not is_xml_equal(
-            document.root,
-            tree,
-            skip_attributes=get_volatile_attributes(),
-            skip_elements=get_volatile_elements(),
-        ):
-            self.api.update_page(page_id.page_id, content, title=title, version=page.version.number + 1)
+            relative_path = path_relative_to(path, self.root_dir)
+            version = page.version.number + 1
+            self.api.update_page(page.id, content, title=title, version=version, message=f"Synchronized by md2conf from Markdown file: {relative_path}")
         else:
-            LOGGER.info("Up-to-date page: %s", page_id.page_id)
+            version = page.version.number
         if document.labels is not None:
             self.api.update_labels(
-                page_id.page_id,
+                page.id,
                 [ConfluenceLabel(name=label, prefix="global") for label in document.labels],
             )
+        props = [ConfluenceContentProperty(CONTENT_PROPERTY_TAG, object_to_json(ConfluenceMarkdownTag(version, source_digest)))]
         if document.properties is not None:
-            self.api.update_content_properties_for_page(page_id.page_id, [ConfluenceContentProperty(key, value) for key, value in document.properties.items()])
+            props.extend(ConfluenceContentProperty(key, value) for key, value in document.properties.items())
+            self.api.update_content_properties_for_page(page.id, props)
+        else:
+            if tag is None or tag.page_version != version:
+                self.api.update_content_properties_for_page(page.id, props, keep_existing=True)
+    def _has_changes(self, page: ConfluencePage, tag: ConfluenceMarkdownTag | None, title: str, root: ElementType, source_digest: str) -> bool:
+        "True if the Confluence Storage Format content generated from the Markdown source file matches the Confluence target page content."
+        if page.title != title:
+            LOGGER.info("Detected page with new title: %s", page.id)
+            return True
+        if tag is not None and tag.source_digest != source_digest:
+            LOGGER.info("Detected page with updated Markdown source: %s", page.id)
+            return True
+        # discard comments
+        tree = elements_from_string(page.content)
+        unwrap_substitute(AC_ATTR("inline-comment-marker"), tree)
+        # visit XML nodes recursively
+        if not is_xml_equal(
+            root,
+            tree,
+            skip_attributes=get_volatile_attributes(),
+            skip_elements=get_volatile_elements(),
+        ):
+            LOGGER.info("Detected page with updated Markdown content: %s", page.id)
+            return True
+        LOGGER.info("Up-to-date page: %s", page.id)
+        return False
     def _get_extended_title(self, title: str) -> str:
         """

md2conf/reflection.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+Publish Markdown files to Confluence wiki.
+Copyright 2022-2026, Levente Hunyadi
+:see: https://github.com/hunyadi/md2conf
+"""
+from collections.abc import Sequence
+from dataclasses import fields, is_dataclass
+from types import NoneType, UnionType
+from typing import Any, Literal, Union, get_args, get_origin, get_type_hints
+def get_nested_types(items: Sequence[Any]) -> set[type[Any]]:
+    "Returns a set of types that are directly or indirectly referenced by any of the specified items."
+    tps: set[type[Any]] = set()
+    for item in items:
+        tps.update(_get_nested_types(item))
+    return tps
+def _get_nested_types(tp: Any) -> set[type[Any]]:
+    tps: set[type[Any]] = set()
+    if tp is not None and tp is not NoneType:
+        origin = get_origin(tp)
+        if origin is list:
+            (item_type,) = get_args(tp)
+            tps.update(_get_nested_types(item_type))
+        elif origin is dict:
+            key_type, value_type = get_args(tp)
+            tps.update(_get_nested_types(key_type))
+            tps.update(_get_nested_types(value_type))
+        elif origin is set:
+            (elem_type,) = get_args(tp)
+            tps.update(_get_nested_types(elem_type))
+        elif origin is UnionType or origin is Union:
+            for union_arg in get_args(tp):
+                tps.update(_get_nested_types(union_arg))
+        elif isinstance(tp, type):
+            tps.add(tp)
+            if is_dataclass(tp):
+                for field in fields(tp):
+                    tps.update(_get_nested_types(field.type))
+            elif isinstance(tp, type):  # required to please static type checkers
+                for field_type in get_type_hints(tp).values():
+                    tps.update(_get_nested_types(field_type))
+    return tps
+def format_initializer(tp: Any) -> str:
+    "Prints an initialization value for a type."
+    origin = get_origin(tp)
+    if tp is None or tp is NoneType:
+        return "None"
+    elif origin is list:
+        (item_type,) = get_args(tp)
+        return f"[{format_initializer(item_type)}]"
+    elif origin is dict:
+        key_type, value_type = get_args(tp)
+        return f"{{{format_initializer(key_type)}: {format_initializer(value_type)}}}"
+    elif origin is set:
+        (elem_type,) = get_args(tp)
+        return f"[{format_initializer(elem_type)}]"
+    elif origin is Literal:
+        return " or ".join(repr(arg) for arg in get_args(tp))
+    elif origin is UnionType or origin is Union:
+        return " or ".join(format_initializer(arg) for arg in get_args(tp))
+    elif isinstance(tp, type):
+        return f"{tp.__name__}()"
+    else:
+        return "..."

md2conf/scanner.py CHANGED Viewed

@@ -63,10 +63,12 @@ class ScannedDocument:
     :param properties: Properties extracted from the front-matter of a Markdown document.
     :param text: Text that remains after front-matter and inline properties have been extracted.
+    :param start_line_number: The first line of the Markdown document excluding front-matter, or 1 if there is no front-matter.
     """
     properties: DocumentProperties
     text: str
+    start_line_number: int
 class Scanner:
@@ -97,16 +99,18 @@ class Scanner:
         body_props = DocumentProperties(page_id=page_id, space_key=space_key, generated_by=generated_by)
         # extract front-matter
-        data, text = extract_frontmatter_json(text)
-        if data is not None:
-            frontmatter_props = json_to_object(DocumentProperties, data)
-            alias_props = json_to_object(AliasProperties, data)
+        frontmatter, text = extract_frontmatter_json(text)
+        if frontmatter is not None:
+            frontmatter_props = json_to_object(DocumentProperties, frontmatter.data)
+            alias_props = json_to_object(AliasProperties, frontmatter.data)
             if alias_props.confluence_page_id is not None:
                 frontmatter_props.page_id = alias_props.confluence_page_id
             if alias_props.confluence_space_key is not None:
                 frontmatter_props.space_key = alias_props.confluence_space_key
             props = coalesce(body_props, frontmatter_props)
+            start_line_number = frontmatter.outer_line_count + 1
         else:
             props = body_props
+            start_line_number = 1
-        return ScannedDocument(properties=props, text=text)
+        return ScannedDocument(properties=props, text=text, start_line_number=start_line_number)

md2conf/serializer.py CHANGED Viewed

@@ -8,7 +8,7 @@ Copyright 2022-2026, Levente Hunyadi
 import sys
 from datetime import datetime
-from typing import TypeVar
+from typing import TypeVar, cast
 from cattrs.preconf.orjson import make_converter  # spellchecker:disable-line
@@ -53,6 +53,17 @@ def json_to_object(typ: type[T], data: JsonType) -> T:
     return _converter.structure(data, typ)
+def object_to_json(data: object) -> JsonType:
+    """
+    Converts a structured object to a JSON object, ready to be serialized to a JSON string.
+    :param data: Python object to convert to a JSON object.
+    :returns: JSON object, ready to be serialized to a JSON encoded in UTF-8.
+    """
+    return cast(JsonType, _converter.unstructure(data))
 def object_to_json_payload(data: object) -> bytes:
     """
     Converts a structured object to a JSON string encoded in UTF-8.

md2conf/svg.py CHANGED Viewed

@@ -254,6 +254,9 @@ def fix_svg_dimensions(data: bytes) -> bytes:
     return data.replace(original_tag, new_tag, 1)
+_MEASURE_REGEXP = re.compile(r"^([+-]?(?:\d+\.?\d*|\.\d+))(%|px|pt|em|ex|in|cm|mm|pc)?$", re.IGNORECASE)
 def _parse_svg_length(value: str) -> int | None:
     """
     Parses an SVG length value and converts it to pixels.
@@ -271,7 +274,7 @@ def _parse_svg_length(value: str) -> int | None:
     value = value.strip()
     # Match number with optional unit
-    match = re.match(r"^([+-]?(?:\d+\.?\d*|\.\d+))(%|px|pt|em|ex|in|cm|mm|pc)?$", value, re.IGNORECASE)
+    match = _MEASURE_REGEXP.match(value)
     if not match:
         return None
@@ -321,7 +324,7 @@ def _parse_viewbox(viewbox: str) -> tuple[int, int] | None:
     # viewBox format: "min-x min-y width height"
     # Values can be separated by whitespace and/or commas
-    parts = re.split(r"[\s,]+", viewbox.strip())
+    parts = re.split(r"\s*,\s*|\s+", viewbox.strip())
     if len(parts) != 4:
         return None

md2conf/toc.py CHANGED Viewed

@@ -154,7 +154,7 @@ def unique_title(content: str) -> str | None:
     """
     builder = TableOfContentsBuilder()
-    for heading in headings(content.splitlines(keepends=True)):
+    for heading in headings(content.splitlines(keepends=True)):  # spellchecker:disable-line
         level, text = heading
         builder.add(level, text)
     return builder.get_title()

md2conf/xml.py CHANGED Viewed

@@ -106,6 +106,51 @@ def element_to_text(node: ElementType) -> str:
     return "".join(node.itertext()).strip()
+def remove_element(child: ElementType) -> None:
+    """
+    Removes a child element, taking care of its tail text.
+    This function may be unsafe when called in the body of a loop of a live collection iterator, i.e. use
+    ```
+    for child in list(node): ...
+    ```
+    instead of
+    ```
+    for child in node: ...
+    ```
+    """
+    parent = child.getparent()
+    if parent is None:
+        return
+    # preserve any text that comes after the heading (tail text)
+    tail = child.tail
+    # if there was tail text, attach it to the previous sibling's tail or to the parent's text if this was the first child
+    if tail:
+        index = parent.index(child)
+        if index > 0:
+            # append to previous sibling's tail
+            prev_sibling = parent[index - 1]
+            if prev_sibling.tail:
+                prev_sibling.tail += tail
+            else:
+                prev_sibling.tail = tail
+        else:
+            # no previous sibling, append to parent's text
+            if parent.text:
+                parent.text += tail
+            else:
+                parent.text = tail
+    # remove the element
+    parent.remove(child)
 def unwrap_substitute(name: str, root: ElementType) -> None:
     """
     Substitutes all occurrences of an element with its contents.

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

{markdown_to_confluence-0.5.4.dist-info → markdown_to_confluence-0.5.5.dist-info}/zip-safe RENAMED Viewed

File without changes

markdown-to-confluence 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

markdown-to-confluence 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl