PyPI - markdown-to-confluence - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

markdown-to-confluence 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/METADATA +133 -43
markdown_to_confluence-0.4.2.dist-info/RECORD +27 -0
md2conf/__init__.py +1 -1
md2conf/__main__.py +57 -18
md2conf/api.py +242 -125
md2conf/application.py +40 -48
md2conf/collection.py +17 -11
md2conf/converter.py +540 -107
md2conf/drawio.py +222 -0
md2conf/extra.py +13 -0
md2conf/local.py +5 -12
md2conf/matcher.py +64 -7
md2conf/mermaid.py +2 -7
md2conf/metadata.py +2 -0
md2conf/processor.py +48 -57
md2conf/properties.py +45 -12
md2conf/scanner.py +17 -9
md2conf/xml.py +70 -0
markdown_to_confluence-0.4.0.dist-info/RECORD +0 -25
{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/WHEEL +0 -0
{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/entry_points.txt +0 -0
{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/licenses/LICENSE +0 -0
{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/top_level.txt +0 -0
{markdown_to_confluence-0.4.0.dist-info → markdown_to_confluence-0.4.2.dist-info}/zip-safe +0 -0

md2conf/drawio.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""
+Publish Markdown files to Confluence wiki.
+Copyright 2022-2025, Levente Hunyadi
+:see: https://github.com/hunyadi/md2conf
+"""
+import base64
+import typing
+import zlib
+from pathlib import Path
+from struct import unpack
+from urllib.parse import unquote_to_bytes
+import lxml.etree as ET
+class DrawioError(ValueError):
+    """
+    Raised when the input does not adhere to the draw.io document format, or processing the input into a draw.io diagram fails.
+    Examples include:
+    * invalid or corrupt PNG file
+    * PNG chunk with embedded diagram data not found
+    * the structure of the outer XML does not match the expected format
+    * URL decoding error
+    * decompression error during INFLATE
+    """
+def inflate(data: bytes) -> bytes:
+    """
+    Decompresses (inflates) data compressed using the raw DEFLATE algorithm.
+    :param data: Compressed data using raw DEFLATE format.
+    :returns: Uncompressed data.
+    """
+    # -zlib.MAX_WBITS indicates raw DEFLATE stream (no zlib/gzip headers)
+    return zlib.decompress(data, -zlib.MAX_WBITS)
+def decompress_diagram(xml_data: typing.Union[bytes, str]) -> ET._Element:
+    """
+    Decompresses the text content of the `<diagram>` element in a draw.io XML document.
+    If the data is not compressed, the de-serialized XML element tree is returned.
+    Expected input (as `bytes` or `str`):
+    ```
+    <mxfile>
+        <diagram>... ENCODED_COMPRESSED_DATA ...</diagram>
+    </mxfile>
+    ```
+    Output (as XML element tree):
+    ```
+    <mxfile>
+        <diagram>
+            <mxGraphModel>
+                <root>
+                    ...
+                </root>
+            </mxGraphModel>
+        </diagram>
+    </mxfile>
+    ```
+    :param xml_data: The serialized XML document.
+    :returns: XML element tree with the text contained within the `<diagram>` element expanded into a sub-tree.
+    """
+    try:
+        root = ET.fromstring(xml_data)
+    except ET.ParseError as e:
+        raise DrawioError("invalid outer XML") from e
+    if root.tag != "mxfile":
+        raise DrawioError("root element is not `<mxfile>`")
+    diagram_elem = root.find("diagram")
+    if diagram_elem is None:
+        raise DrawioError("`<diagram>` element not found")
+    if len(diagram_elem) > 0:
+        # already decompressed
+        return root
+    if diagram_elem.text is None:
+        raise DrawioError("`<diagram>` element has no data")
+    # reverse base64-encoding of inner data
+    try:
+        base64_decoded = base64.b64decode(diagram_elem.text, validate=True)
+    except ValueError as e:
+        raise DrawioError("raw text data in `<diagram>` element is not properly Base64-encoded") from e
+    # decompress inner data
+    try:
+        embedded_data = inflate(base64_decoded)
+    except zlib.error as e:
+        raise DrawioError("`<diagram>` element text data cannot be decompressed using INFLATE") from e
+    # reverse URL-encoding of inner data
+    try:
+        url_decoded = unquote_to_bytes(embedded_data)
+    except ValueError as e:
+        raise DrawioError("decompressed data in `<diagram>` element is not properly URL-encoded") from e
+    # create sub-tree from decompressed data
+    try:
+        tree = ET.fromstring(url_decoded)
+    except ET.ParseError as e:
+        raise DrawioError("invalid inner XML extracted from `<diagram>` element") from e
+    # update document
+    diagram_elem.text = None
+    diagram_elem.append(tree)
+    return root
+def extract_xml_from_png(png_data: bytes) -> ET._Element:
+    """
+    Extracts an editable draw.io diagram from a PNG file.
+    :param png_data: PNG binary data, with an embedded draw.io diagram.
+    :returns: XML element tree of a draw.io diagram.
+    """
+    # PNG signature is always the first 8 bytes
+    png_signature = b"\x89PNG\r\n\x1a\n"
+    if not png_data.startswith(png_signature):
+        raise DrawioError("not a valid PNG file")
+    offset = len(png_signature)
+    while offset < len(png_data):
+        if offset + 8 > len(png_data):
+            raise DrawioError("corrupted PNG: incomplete chunk header")
+        # read chunk length (4 bytes) and type (4 bytes)
+        (length,) = unpack(">I", png_data[offset : offset + 4])
+        chunk_type = png_data[offset + 4 : offset + 8]
+        offset += 8
+        if offset + length + 4 > len(png_data):
+            raise DrawioError(f"corrupted PNG: incomplete data for chunk {chunk_type.decode('ascii')}")
+        # read chunk data
+        chunk_data = png_data[offset : offset + length]
+        offset += length
+        # skip CRC (4 bytes)
+        offset += 4
+        # extracts draw.io diagram data from a `tEXt` chunk with the keyword `mxfile` embedded in a PNG
+        if chunk_type != b"tEXt":
+            continue
+        # format: keyword\0text
+        null_pos = chunk_data.find(b"\x00")
+        if null_pos < 0:
+            raise DrawioError("corrupted PNG: tEXt chunk missing keyword")
+        keyword = chunk_data[:null_pos].decode("latin1")
+        if keyword != "mxfile":
+            continue
+        textual_data = chunk_data[null_pos + 1 :]
+        try:
+            url_decoded = unquote_to_bytes(textual_data)
+        except ValueError as e:
+            raise DrawioError("data in `tEXt` chunk is not properly URL-encoded") from e
+        # decompress data embedded in the outer XML wrapper
+        return decompress_diagram(url_decoded)
+    # matching `tEXt` chunk not found
+    raise DrawioError("not a PNG file made with draw.io")
+def extract_xml_from_svg(svg_data: bytes) -> ET._Element:
+    """
+    Extracts an editable draw.io diagram from an SVG file.
+    :param svg_data: SVG XML data, with an embedded draw.io diagram.
+    :returns: XML element tree of a draw.io diagram.
+    """
+    try:
+        root = ET.fromstring(svg_data)
+    except ET.ParseError as e:
+        raise DrawioError("invalid SVG XML") from e
+    content = root.attrib.get("content")
+    if content is None:
+        raise DrawioError("SVG root element has no attribute `content`")
+    return decompress_diagram(content)
+def extract_diagram(path: Path) -> bytes:
+    """
+    Extracts an editable draw.io diagram from a PNG file.
+    :param path: Path to a PNG or SVG file with an embedded draw.io diagram.
+    :returns: XML data of a draw.io diagram as bytes.
+    """
+    if path.name.endswith(".drawio.png"):
+        with open(path, "rb") as png_file:
+            root = extract_xml_from_png(png_file.read())
+    elif path.name.endswith(".drawio.svg"):
+        with open(path, "rb") as svg_file:
+            root = extract_xml_from_svg(svg_file.read())
+    else:
+        raise DrawioError(f"unrecognized file type for {path.name}")
+    return ET.tostring(root, encoding="utf8", method="xml")

md2conf/extra.py CHANGED Viewed

@@ -12,3 +12,16 @@ if sys.version_info >= (3, 12):
     from typing import override as override  # noqa: F401
 else:
     from typing_extensions import override as override  # noqa: F401
+if sys.version_info >= (3, 12):
+    from pathlib import Path
+    def path_relative_to(destination: Path, origin: Path) -> Path:
+        return destination.relative_to(origin, walk_up=True)
+else:
+    import os.path
+    from pathlib import Path
+    def path_relative_to(destination: Path, origin: Path) -> Path:
+        return Path(os.path.relpath(destination, start=origin))

md2conf/local.py CHANGED Viewed

@@ -45,9 +45,7 @@ class LocalProcessor(Processor):
         self.out_dir = out_dir or root_dir
     @override
-    def _synchronize_tree(
-        self, root: DocumentNode, root_id: Optional[ConfluencePageID]
-    ) -> None:
+    def _synchronize_tree(self, root: DocumentNode, root_id: Optional[ConfluencePageID]) -> None:
         """
         Creates the cross-reference index.
@@ -59,9 +57,7 @@ class LocalProcessor(Processor):
                 page_id = node.page_id
             else:
                 digest = self._generate_hash(node.absolute_path)
-                LOGGER.info(
-                    "Identifier %s assigned to page: %s", digest, node.absolute_path
-                )
+                LOGGER.info("Identifier %s assigned to page: %s", digest, node.absolute_path)
                 page_id = digest
             self.page_metadata.add(
@@ -70,13 +66,12 @@ class LocalProcessor(Processor):
                     page_id=page_id,
                     space_key=node.space_key or self.site.space_key or "HOME",
                     title=node.title or "",
+                    synchronized=node.synchronized,
                 ),
             )
     @override
-    def _update_page(
-        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
-    ) -> None:
+    def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
         """
         Saves the document as Confluence Storage Format XHTML to the local disk.
         """
@@ -101,9 +96,7 @@ class LocalProcessorFactory(ProcessorFactory):
         self.out_dir = out_dir
     def create(self, root_dir: Path) -> Processor:
-        return LocalProcessor(
-            self.options, self.site, out_dir=self.out_dir, root_dir=root_dir
-        )
+        return LocalProcessor(self.options, self.site, out_dir=self.out_dir, root_dir=root_dir)
 class LocalConverter(Converter):

md2conf/matcher.py CHANGED Viewed

@@ -10,14 +10,57 @@ import os.path
 from dataclasses import dataclass
 from fnmatch import fnmatch
 from pathlib import Path
-from typing import Iterable, Optional, Union, overload
+from typing import Iterable, Optional, Union, final, overload
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=True)
+class _BaseEntry:
+    """
+    Represents a file or directory entry.
+    Entries are primarily sorted alphabetically case-insensitive.
+    When two items are equal case-insensitive, conflicting items are put in case-sensitive order.
+    :param name: Name of the file-system entry.
+    """
+    name: str
+    @property
+    def lower_name(self) -> str:
+        return self.name.lower()
+    def __lt__(self, other: "_BaseEntry") -> bool:
+        return (self.lower_name, self.name) < (other.lower_name, other.name)
+    def __le__(self, other: "_BaseEntry") -> bool:
+        return (self.lower_name, self.name) <= (other.lower_name, other.name)
+    def __ge__(self, other: "_BaseEntry") -> bool:
+        return (self.lower_name, self.name) >= (other.lower_name, other.name)
+    def __gt__(self, other: "_BaseEntry") -> bool:
+        return (self.lower_name, self.name) > (other.lower_name, other.name)
+@final
+class FileEntry(_BaseEntry):
+    pass
+@final
+class DirectoryEntry(_BaseEntry):
+    pass
+@dataclass(frozen=True, eq=True)
 class Entry:
     """
     Represents a file or directory entry.
+    When sorted, directories come before files and items are primarily arranged in alphabetical order case-insensitive.
+    When two items are equal case-insensitive, conflicting items are put in case-sensitive order.
     :param name: Name of the file-system entry to match against the rule-set.
     :param is_dir: True if the entry is a directory.
     """
@@ -25,6 +68,22 @@ class Entry:
     name: str
     is_dir: bool
+    @property
+    def lower_name(self) -> str:
+        return self.name.lower()
+    def __lt__(self, other: "Entry") -> bool:
+        return (not self.is_dir, self.lower_name, self.name) < (not other.is_dir, other.lower_name, other.name)
+    def __le__(self, other: "Entry") -> bool:
+        return (not self.is_dir, self.lower_name, self.name) <= (not other.is_dir, other.lower_name, other.name)
+    def __ge__(self, other: "Entry") -> bool:
+        return (not self.is_dir, self.lower_name, self.name) >= (not other.is_dir, other.lower_name, other.name)
+    def __gt__(self, other: "Entry") -> bool:
+        return (not self.is_dir, self.lower_name, self.name) > (not other.is_dir, other.lower_name, other.name)
 @dataclass
 class MatcherOptions:
@@ -146,9 +205,9 @@ class Matcher:
         :returns: A filtered list of names that didn't match any of the exclusion rules.
         """
-        return [entry for entry in entries if self.is_included(entry)]
+        return sorted(entry for entry in entries if self.is_included(entry))
-    def scandir(self, path: Path) -> list[Entry]:
+    def listing(self, path: Path) -> list[Entry]:
         """
         Returns only those entries in a directory whose name doesn't match any of the exclusion rules.
@@ -156,6 +215,4 @@ class Matcher:
         :returns: A filtered list of entries whose name didn't match any of the exclusion rules.
         """
-        return self.filter(
-            Entry(entry.name, entry.is_dir()) for entry in os.scandir(path)
-        )
+        return self.filter(Entry(entry.name, entry.is_dir()) for entry in os.scandir(path))

md2conf/mermaid.py CHANGED Viewed

@@ -19,10 +19,7 @@ LOGGER = logging.getLogger(__name__)
 def is_docker() -> bool:
     "True if the application is running in a Docker container."
-    return (
-        os.environ.get("CHROME_BIN") == "/usr/bin/chromium-browser"
-        and os.environ.get("PUPPETEER_SKIP_DOWNLOAD") == "true"
-    )
+    return os.environ.get("CHROME_BIN") == "/usr/bin/chromium-browser" and os.environ.get("PUPPETEER_SKIP_DOWNLOAD") == "true"
 def get_mmdc() -> str:
@@ -79,9 +76,7 @@ def render_diagram(source: str, output_format: Literal["png", "svg"] = "png") ->
         )
         stdout, stderr = proc.communicate(input=source.encode("utf-8"))
         if proc.returncode:
-            messages = [
-                f"failed to convert Mermaid diagram; exit code: {proc.returncode}"
-            ]
+            messages = [f"failed to convert Mermaid diagram; exit code: {proc.returncode}"]
             console_output = stdout.decode("utf-8")
             if console_output:
                 messages.append(f"output:\n{console_output}")

md2conf/metadata.py CHANGED Viewed

@@ -33,8 +33,10 @@ class ConfluencePageMetadata:
     :param page_id: Confluence page ID.
     :param space_key: Confluence space key.
     :param title: Document title.
+    :param synchronized: True if the document content is parsed and synchronized with Confluence.
     """
     page_id: str
     space_key: str
     title: str
+    synchronized: bool

md2conf/processor.py CHANGED Viewed

@@ -15,7 +15,7 @@ from typing import Iterable, Optional
 from .collection import ConfluencePageCollection
 from .converter import ConfluenceDocument, ConfluenceDocumentOptions, ConfluencePageID
-from .matcher import Matcher, MatcherOptions
+from .matcher import DirectoryEntry, FileEntry, Matcher, MatcherOptions
 from .metadata import ConfluenceSiteMetadata
 from .properties import ArgumentError
 from .scanner import Scanner
@@ -28,6 +28,7 @@ class DocumentNode:
     page_id: Optional[str]
     space_key: Optional[str]
     title: Optional[str]
+    synchronized: bool
     _children: list["DocumentNode"]
@@ -35,13 +36,15 @@ class DocumentNode:
         self,
         absolute_path: Path,
         page_id: Optional[str],
-        space_key: Optional[str] = None,
-        title: Optional[str] = None,
+        space_key: Optional[str],
+        title: Optional[str],
+        synchronized: bool,
     ):
         self.absolute_path = absolute_path
         self.page_id = page_id
         self.space_key = space_key
         self.title = title
+        self.synchronized = synchronized
         self._children = []
     def count(self) -> int:
@@ -98,16 +101,11 @@ class Processor:
         local_dir = local_dir.resolve(True)
         LOGGER.info("Processing directory: %s", local_dir)
-        # Step 1: build index of all Markdown files in directory hierarchy
+        # build index of all Markdown files in directory hierarchy
         root = self._index_directory(local_dir, None)
         LOGGER.info("Indexed %d document(s)", root.count())
-        # Step 2: synchronize directory tree structure with page hierarchy in space
-        self._synchronize_tree(root, self.options.root_page_id)
-        # Step 3: synchronize files in directory hierarchy with pages in space
-        for path, metadata in self.page_metadata.items():
-            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
+        self._process_items(root)
     def process_page(self, path: Path) -> None:
         """
@@ -115,31 +113,33 @@ class Processor:
         """
         LOGGER.info("Processing page: %s", path)
-        # Step 1: parse Markdown file
         root = self._index_file(path)
-        # Step 2: find matching page in Confluence
+        self._process_items(root)
+    def _process_items(self, root: DocumentNode) -> None:
+        """
+        Processes a sub-tree rooted at an ancestor node.
+        """
+        # synchronize directory tree structure with page hierarchy in space (find matching pages in Confluence)
         self._synchronize_tree(root, self.options.root_page_id)
-        # Step 3: synchronize document with page in space
+        # synchronize files in directory hierarchy with pages in space
         for path, metadata in self.page_metadata.items():
-            self._synchronize_page(path, ConfluencePageID(metadata.page_id))
+            if metadata.synchronized:
+                self._synchronize_page(path, ConfluencePageID(metadata.page_id))
     def _synchronize_page(self, path: Path, page_id: ConfluencePageID) -> None:
         """
         Synchronizes a single Markdown document with its corresponding Confluence page.
         """
-        page_id, document = ConfluenceDocument.create(
-            path, self.options, self.root_dir, self.site, self.page_metadata
-        )
+        page_id, document = ConfluenceDocument.create(path, self.options, self.root_dir, self.site, self.page_metadata)
         self._update_page(page_id, document, path)
     @abstractmethod
-    def _synchronize_tree(
-        self, node: DocumentNode, page_id: Optional[ConfluencePageID]
-    ) -> None:
+    def _synchronize_tree(self, node: DocumentNode, page_id: Optional[ConfluencePageID]) -> None:
         """
         Creates the cross-reference index and synchronizes the directory tree structure with the Confluence page hierarchy.
@@ -150,17 +150,13 @@ class Processor:
         ...
     @abstractmethod
-    def _update_page(
-        self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path
-    ) -> None:
+    def _update_page(self, page_id: ConfluencePageID, document: ConfluenceDocument, path: Path) -> None:
         """
         Saves the document as Confluence Storage Format XHTML.
         """
         ...
-    def _index_directory(
-        self, local_dir: Path, parent: Optional[DocumentNode]
-    ) -> DocumentNode:
+    def _index_directory(self, local_dir: Path, parent: Optional[DocumentNode]) -> DocumentNode:
         """
         Indexes Markdown files in a directory hierarchy recursively.
         """
@@ -169,36 +165,40 @@ class Processor:
         matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir)
-        files: list[Path] = []
-        directories: list[Path] = []
+        files: list[FileEntry] = []
+        directories: list[DirectoryEntry] = []
         for entry in os.scandir(local_dir):
             if matcher.is_excluded(entry):
                 continue
             if entry.is_file():
-                files.append(Path(local_dir) / entry.name)
+                files.append(FileEntry(entry.name))
             elif entry.is_dir():
-                directories.append(Path(local_dir) / entry.name)
+                directories.append(DirectoryEntry(entry.name))
+        files.sort()
+        directories.sort()
         # make page act as parent node
         parent_doc: Optional[Path] = None
-        if (Path(local_dir) / "index.md") in files:
-            parent_doc = Path(local_dir) / "index.md"
-        elif (Path(local_dir) / "README.md") in files:
-            parent_doc = Path(local_dir) / "README.md"
-        elif (Path(local_dir) / f"{local_dir.name}.md") in files:
-            parent_doc = Path(local_dir) / f"{local_dir.name}.md"
+        if FileEntry("index.md") in files:
+            parent_doc = local_dir / "index.md"
+        elif FileEntry("README.md") in files:
+            parent_doc = local_dir / "README.md"
+        elif FileEntry(f"{local_dir.name}.md") in files:
+            parent_doc = local_dir / f"{local_dir.name}.md"
         if parent_doc is None and self.options.keep_hierarchy:
-            parent_doc = Path(local_dir) / "index.md"
+            parent_doc = local_dir / "index.md"
             # create a blank page for directory entry
-            with open(parent_doc, "w"):
-                pass
+            with open(parent_doc, "w") as f:
+                print("[[_LISTING_]]", file=f)
         if parent_doc is not None:
-            if parent_doc in files:
-                files.remove(parent_doc)
+            parent_entry = FileEntry(parent_doc.name)
+            if parent_entry in files:
+                files.remove(parent_entry)
             # promote Markdown document in directory as parent page in Confluence
             node = self._index_file(parent_doc)
@@ -206,20 +206,14 @@ class Processor:
                 parent.add_child(node)
             parent = node
         elif parent is None:
-            # create new top-level node
-            if self.options.root_page_id is not None:
-                page_id = self.options.root_page_id.page_id
-                parent = DocumentNode(local_dir, page_id=page_id)
-            else:
-                # local use only, raises error with remote synchronization
-                parent = DocumentNode(local_dir, page_id=None)
+            raise ArgumentError(f"root page requires corresponding top-level Markdown document in {local_dir}")
         for file in files:
-            node = self._index_file(file)
+            node = self._index_file(local_dir / Path(file.name))
             parent.add_child(node)
         for directory in directories:
-            self._index_directory(directory, parent)
+            self._index_directory(local_dir / Path(directory.name), parent)
         return parent
@@ -238,6 +232,7 @@ class Processor:
             page_id=document.page_id,
             space_key=document.space_key,
             title=document.title,
+            synchronized=document.synchronized if document.synchronized is not None else True,
         )
     def _generate_hash(self, absolute_path: Path) -> str:
@@ -254,9 +249,7 @@ class ProcessorFactory:
     options: ConfluenceDocumentOptions
     site: ConfluenceSiteMetadata
-    def __init__(
-        self, options: ConfluenceDocumentOptions, site: ConfluenceSiteMetadata
-    ) -> None:
+    def __init__(self, options: ConfluenceDocumentOptions, site: ConfluenceSiteMetadata) -> None:
         self.options = options
         self.site = site
@@ -283,9 +276,7 @@ class Converter:
         else:
             raise ArgumentError(f"expected: valid file or directory path; got: {path}")
-    def process_directory(
-        self, local_dir: Path, root_dir: Optional[Path] = None
-    ) -> None:
+    def process_directory(self, local_dir: Path, root_dir: Optional[Path] = None) -> None:
         """
         Recursively scans a directory hierarchy for Markdown files, and processes each, resolving cross-references.
         """

markdown-to-confluence 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

markdown-to-confluence 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl